/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
60
# How many commits before each inventory fulltext
61
_DEFAULT_INV_FULLTEXT = 200
62
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
63
# How many inventories to cache
64
_DEFAULT_INV_CACHE_SIZE = 10
65
0.64.41 by Ian Clatworthy
update multiple working trees if requested
66
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
class GenericProcessor(processor.ImportProcessor):
68
    """An import processor that handles basic imports.
69
70
    Current features supported:
71
0.64.16 by Ian Clatworthy
safe processing tweaks
72
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
73
    * files and symlinks commits are supported
74
    * checkpoints automatically happen at a configurable frequency
75
      over and above the stream requested checkpoints
76
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
77
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
78
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
79
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
80
    At checkpoints and on completion, the commit-id -> revision-id map is
81
    saved to a file called 'fastimport-id-map'. If the import crashes
82
    or is interrupted, it can be started again and this file will be
83
    used to skip over already loaded revisions. The format of each line
84
    is "commit-id revision-id" so commit-ids cannot include spaces.
85
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
86
    Here are the supported parameters:
87
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
88
    * info - name of a hints file holding the analysis generated
89
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
90
      importing large repositories, this parameter is needed so
91
      that the importer knows what blobs to intelligently cache.
92
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
94
      By default, the importer updates the repository
95
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
96
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
97
98
    * checkpoint - automatically checkpoint every n commits over and
99
      above any checkpoints contained in the import stream.
100
      The default is 10000.
101
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
102
    * count - only import this many commits then exit. If not set
103
      or negative, all commits are imported.
104
    
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
105
    * inv-fulltext - create an inventory fulltext every n commits.
106
      The default is 200.
107
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
108
    * inv-cache - number of inventories to cache.
109
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
110
111
    * experimental - enable experimental mode, i.e. use features
112
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
113
    """
114
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
115
    known_params = [
116
        'info',
117
        'trees',
118
        'checkpoint',
119
        'count',
120
        'inv-cache',
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
121
        'inv-fulltext',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
122
        'experimental',
123
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
124
125
    def note(self, msg, *args):
126
        """Output a note but timestamp it."""
127
        msg = "%s %s" % (self._time_of_day(), msg)
128
        note(msg, *args)
129
130
    def warning(self, msg, *args):
131
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
132
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
133
        warning(msg, *args)
134
0.64.67 by James Westby
Add support for -Dfast-import.
135
    def debug(self, mgs, *args):
136
        """Output a debug message if the appropriate -D option was given."""
137
        if "fast-import" in debug.debug_flags:
138
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
139
            mutter(msg, *args)
140
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
141
    def _time_of_day(self):
142
        """Time of day as a string."""
143
        # Note: this is a separate method so tests can patch in a fixed value
144
        return time.strftime("%H:%M:%S")
0.64.67 by James Westby
Add support for -Dfast-import.
145
    
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
146
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
147
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
148
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
149
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
150
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
151
        self.skip_total = self._init_id_map()
152
        if self.skip_total:
153
            self.note("Found %d commits already loaded - "
154
                "skipping over these ...", self.skip_total)
155
        self._revision_count = 0
156
157
        # mapping of tag name to revision_id
158
        self.tags = {}
159
160
        # Create the revision loader needed for committing
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
161
        if self._experimental:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
162
            def fulltext_when(count):
163
                total = self.total_commits
164
                if total is not None and count == total:
165
                    fulltext = True
166
                else:
167
                    fulltext = count % self.inv_fulltext_every == 0
168
                if fulltext:
169
                    self.note("%d commits - storing inventory as full-text",
170
                        count)
171
                return fulltext
172
173
            self.loader = revisionloader.ExperimentalRevisionLoader(
174
                self.repo, self.inventory_cache_size,
175
                fulltext_when=fulltext_when)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
176
        else:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
177
            self.loader = revisionloader.ImportRevisionLoader(
178
                self.repo, self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
179
0.64.51 by Ian Clatworthy
disable autopacking
180
        # Disable autopacking if the repo format supports it.
181
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
182
        if isinstance(self.repo, pack_repo.KnitPackRepository):
183
            self._original_max_pack_count = \
184
                self.repo._pack_collection._max_pack_count
185
            def _max_pack_count_for_import(total_revisions):
186
                return total_revisions + 1
187
            self.repo._pack_collection._max_pack_count = \
188
                _max_pack_count_for_import
189
        else:
190
            self._original_max_pack_count = None
191
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
192
        # Create a write group. This is committed at the end of the import.
193
        # Checkpointing closes the current one and starts a new one.
194
        self.repo.start_write_group()
195
196
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
197
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
198
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
199
        # This is currently hard-coded but might be configurable via
200
        # parameters one day if that's needed
201
        repo_transport = self.repo.control_files._transport
202
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
203
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
204
        # Load the info file, if any
205
        info_path = self.params.get('info')
206
        if info_path is not None:
207
            self.info = configobj.ConfigObj(info_path)
208
        else:
209
            self.info = None
210
0.64.41 by Ian Clatworthy
update multiple working trees if requested
211
        # Decide how often to automatically report progress
212
        # (not a parameter yet)
213
        self.progress_every = _DEFAULT_AUTO_PROGRESS
214
        if self.verbose:
215
            self.progress_every = self.progress_every / 10
216
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
217
        # Decide how often to automatically checkpoint
218
        self.checkpoint_every = int(self.params.get('checkpoint',
219
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
220
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
221
        # Decide how often to fulltext the inventory
222
        self.inv_fulltext_every = int(self.params.get('inv-fulltext',
223
            _DEFAULT_INV_FULLTEXT))
224
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
225
        # Decide how big to make the inventory cache
226
        self.inventory_cache_size = int(self.params.get('inv-cache',
227
            _DEFAULT_INV_CACHE_SIZE))
228
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
229
        # Find the maximum number of commits to import (None means all)
230
        # and prepare progress reporting. Just in case the info file
231
        # has an outdated count of commits, we store the max counts
232
        # at which we need to terminate separately to the total used
233
        # for progress tracking.
234
        try:
235
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
236
            if self.max_commits < 0:
237
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
238
        except KeyError:
239
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
240
        if self.info is not None:
241
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
242
            if (self.max_commits is not None and
243
                self.total_commits > self.max_commits):
244
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
245
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
246
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
247
0.64.27 by Ian Clatworthy
1st cut at performance tuning
248
    def _process(self, command_iter):
249
        # if anything goes wrong, abort the write group if any
250
        try:
251
            processor.ImportProcessor._process(self, command_iter)
252
        except:
253
            if self.repo is not None and self.repo.is_in_write_group():
254
                self.repo.abort_write_group()
255
            raise
256
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
257
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
258
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
259
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
260
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
261
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
262
        # Update the branches
263
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
264
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
265
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
266
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
267
        branches_updated, branches_lost = updater.update()
268
        self._branch_count = len(branches_updated)
269
270
        # Tell the user about branches that were not created
271
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
272
            if not self.repo.is_shared():
273
                self.warning("Cannot import multiple branches into "
274
                    "an unshared repository")
275
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
276
            for lost_info in branches_lost:
277
                head_revision = lost_info[1]
278
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
279
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
280
281
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
282
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
283
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
284
        if self._branch_count == 0:
285
            self.note("no branches to update")
286
            self.note("no working trees to update")
287
            remind_about_update = False
288
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
289
            trees = self._get_working_trees(branches_updated)
290
            if trees:
291
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
292
                if self.verbose:
293
                    report = delta._ChangeReporter()
294
                else:
295
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
296
                for wt in trees:
297
                    wt.update(reporter)
298
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
299
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
300
            else:
301
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
302
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
303
304
        # Finish up by telling the user what to do next.
305
        if self._original_max_pack_count:
306
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
307
            # checkpoint instead. We now pack the repository to optimise
308
            # how data is stored.
309
            if self._revision_count > self.checkpoint_every:
310
                self.note("Packing repository ...")
311
                self.repo.pack()
312
                # To be conservative, packing puts the old packs and
313
                # indices in obsolete_packs. We err on the side of
314
                # optimism and clear out that directory to save space.
315
                self.note("Removing obsolete packs ...")
316
                # TODO: Use a public API for this once one exists
317
                repo_transport = self.repo._pack_collection.transport
318
                repo_transport.clone('obsolete_packs').delete_multi(
319
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
320
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
321
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
322
            note("To refresh the working tree for a branch, "
323
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
324
325
    def _get_working_trees(self, branches):
326
        """Get the working trees for branches in the repository."""
327
        result = []
328
        wt_expected = self.repo.make_working_trees()
329
        for br in branches:
330
            if br == self.branch and br is not None:
331
                wt = self.working_tree
332
            elif wt_expected:
333
                try:
334
                    wt = br.bzrdir.open_workingtree()
335
                except errors.NoWorkingTree:
336
                    self.warning("No working tree for branch %s", br)
337
                    continue
338
            else:
339
                continue
340
            result.append(wt)
341
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
342
343
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
344
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
345
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
346
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
347
        wtc = self._tree_count
348
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
349
            rc, helpers.single_plural(rc, "revision", "revisions"),
350
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
351
            wtc, helpers.single_plural(wtc, "tree", "trees"),
352
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
353
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
354
    def _init_id_map(self):
355
        """Load the id-map and check it matches the repository.
356
        
357
        :return: the number of entries in the map
358
        """
359
        # Currently, we just check the size. In the future, we might
360
        # decide to be more paranoid and check that the revision-ids
361
        # are identical as well.
362
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
363
            self.id_map_path)
364
        existing_count = len(self.repo.all_revision_ids())
365
        if existing_count != known:
366
            raise plugin_errors.BadRepositorySize(known, existing_count)
367
        return known
368
369
    def _save_id_map(self):
370
        """Save the id-map."""
371
        # Save the whole lot every time. If this proves a problem, we can
372
        # change to 'append just the new ones' at a later time.
373
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
374
0.64.5 by Ian Clatworthy
first cut at generic processing method
375
    def blob_handler(self, cmd):
376
        """Process a BlobCommand."""
377
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
378
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
379
        else:
380
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
381
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
382
383
    def checkpoint_handler(self, cmd):
384
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
385
        # Commit the current write group and start a new one
386
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
387
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
388
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
389
390
    def commit_handler(self, cmd):
391
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
392
        if self.skip_total and self._revision_count < self.skip_total:
393
            _track_heads(cmd, self.cache_mgr)
394
            # Check that we really do know about this commit-id
395
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
396
                raise plugin_errors.BadRestart(cmd.id)
397
            # Consume the file commands and free any non-sticky blobs
398
            for fc in cmd.file_iter():
399
                pass
400
            self.cache_mgr._blobs = {}
401
            self._revision_count += 1
402
            # If we're finished getting back to where we were,
403
            # load the file-ids cache
404
            if self._revision_count == self.skip_total:
405
                self._gen_file_ids_cache()
406
                self.note("Generated the file-ids cache - %d entries",
407
                    len(self.cache_mgr.file_ids.keys()))
408
            return
409
410
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
411
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
412
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
413
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
414
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
415
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
416
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
417
418
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
419
        if (self.max_commits is not None and
420
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
421
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
422
            self.finished = True
423
        elif self._revision_count % self.checkpoint_every == 0:
424
            self.note("%d commits - automatic checkpoint triggered",
425
                self._revision_count)
426
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
427
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
428
    def _gen_file_ids_cache(self):
429
        """Generate the file-id cache by searching repository inventories.
430
        """
431
        # Get the interesting revisions - the heads
432
        head_ids = self.cache_mgr.heads.keys()
433
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
434
435
        # Update the fileid cache
436
        file_ids = {}
437
        for revision_id in revision_ids:
438
            inv = self.repo.revision_tree(revision_id).inventory
439
            # Cache the inventoires while we're at it
440
            self.cache_mgr.inventories[revision_id] = inv
441
            for path, ie in inv.iter_entries():
442
                file_ids[path] = ie.file_id
443
        self.cache_mgr.file_ids = file_ids
444
0.64.25 by Ian Clatworthy
slightly better progress reporting
445
    def report_progress(self, details=''):
446
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
447
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
448
            if self.total_commits is not None:
449
                counts = "%d/%d" % (self._revision_count, self.total_commits)
450
                eta = progress.get_eta(self._start_time, self._revision_count,
451
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
452
                eta_str = progress.str_tdelta(eta)
453
                if eta_str.endswith('--'):
454
                    eta_str = ''
455
                else:
456
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
457
            else:
458
                counts = "%d" % (self._revision_count,)
459
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
460
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
461
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
462
    def progress_handler(self, cmd):
463
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
464
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
465
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
466
467
    def reset_handler(self, cmd):
468
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
469
        if cmd.ref.startswith('refs/tags/'):
470
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
471
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
472
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
473
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
474
475
    def tag_handler(self, cmd):
476
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
477
        self._set_tag(cmd.id, cmd.from_)
478
479
    def _set_tag(self, name, from_):
480
        """Define a tag given a name an import 'from' reference."""
481
        bzr_tag_name = name.decode('utf-8', 'replace')
482
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
483
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
484
485
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
486
class GenericCacheManager(object):
487
    """A manager of caches for the GenericProcessor."""
488
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
489
    def __init__(self, info, verbose=False, inventory_cache_size=10):
490
        """Create a manager of caches.
491
492
        :param info: a ConfigObj holding the output from
493
            the --info processor, or None if no hints are available
494
        """
495
        self.verbose = verbose
496
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
497
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
498
        # Sticky blobs aren't removed after being referenced.
499
        self._blobs = {}
500
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
501
502
        # revision-id -> Inventory cache
503
        # these are large and we probably don't need too many as
504
        # most parents are recent in history
505
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
506
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
507
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
508
        # we need to keep all of these but they are small
509
        self.revision_ids = {}
510
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
511
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
512
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
513
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
514
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
515
        self.last_ref = None
516
        self.last_ids = {}
517
        self.heads = {}
518
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
519
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
520
        self._blobs_to_keep = None
521
        if info is not None:
522
            try:
523
                self._blobs_to_keep = info['Blob usage tracking']['multi']
524
            except KeyError:
525
                # info not in file - possible when no blobs used
526
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
527
528
    def store_blob(self, id, data):
529
        """Store a blob of data."""
530
        if (self._blobs_to_keep is None or data == '' or
531
            id in self._blobs_to_keep):
532
            self._sticky_blobs[id] = data
533
        else:
534
            self._blobs[id] = data
535
536
    def fetch_blob(self, id):
537
        """Fetch a blob of data."""
538
        try:
539
            return self._sticky_blobs[id]
540
        except KeyError:
541
            return self._blobs.pop(id)
542
0.64.16 by Ian Clatworthy
safe processing tweaks
543
    def _delete_path(self, path):
544
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
545
        # we actually want to remember what file-id we gave a path,
546
        # even when that file is deleted, so doing nothing is correct
547
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
548
549
    def _rename_path(self, old_path, new_path):
550
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
551
        # In this case, we need to forget the file-id we gave a path,
552
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
553
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
554
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
555
556
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
557
def _track_heads(cmd, cache_mgr):
558
    """Track the repository heads given a CommitCommand.
559
    
560
    :return: the list of parents in terms of commit-ids
561
    """
562
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
563
    if cmd.from_ is not None:
564
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
565
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
566
        last_id = cache_mgr.last_ids.get(cmd.ref)
567
        if last_id is not None:
568
            parents = [last_id]
569
        else:
570
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
571
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
572
    # Track the heads
573
    for parent in parents:
574
        try:
575
            del cache_mgr.heads[parent]
576
        except KeyError:
577
            # it's ok if the parent isn't there - another
578
            # commit may have already removed it
579
            pass
580
    cache_mgr.heads[cmd.id] = cmd.ref
581
    cache_mgr.last_ids[cmd.ref] = cmd.id
582
    cache_mgr.last_ref = cmd.ref
583
    return parents
584
585
0.64.5 by Ian Clatworthy
first cut at generic processing method
586
class GenericCommitHandler(processor.CommitHandler):
587
0.64.48 by Ian Clatworthy
one revision loader instance
588
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
589
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
590
        processor.CommitHandler.__init__(self, command)
591
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
592
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
593
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
594
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
595
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
596
0.64.43 by Ian Clatworthy
verbose mode cleanup
597
    def note(self, msg, *args):
598
        """Output a note but add context."""
599
        msg = "%s (%s)" % (msg, self.command.id)
600
        note(msg, *args)
601
602
    def warning(self, msg, *args):
603
        """Output a warning but add context."""
604
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
605
        warning(msg, *args)
606
0.64.67 by James Westby
Add support for -Dfast-import.
607
    def debug(self, msg, *args):
608
        """Output a mutter if the appropriate -D option was given."""
609
        if "fast-import" in debug.debug_flags:
610
            msg = "%s (%s)" % (msg, self.command.id)
611
            mutter(msg, *args)
612
0.64.5 by Ian Clatworthy
first cut at generic processing method
613
    def pre_process_files(self):
614
        """Prepare for committing."""
615
        self.revision_id = self.gen_revision_id()
616
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
617
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
618
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
619
        # Track the heads and get the real parent list
620
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
621
0.64.14 by Ian Clatworthy
commit of modified files working
622
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
623
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
624
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
625
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
626
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
627
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
628
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
629
0.64.14 by Ian Clatworthy
commit of modified files working
630
        # Seed the inventory from the previous one
631
        if len(self.parents) == 0:
632
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
633
        else:
634
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
635
            inv = self.get_inventory(self.parents[0])
636
            # TODO: Shallow copy - deep inventory copying is expensive
637
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
638
        if not self.repo.supports_rich_root():
639
            # In this repository, root entries have no knit or weave. When
640
            # serializing out to disk and back in, root.revision is always
641
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
642
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
643
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
644
        # directory-path -> inventory-entry for current inventory
645
        self.directory_entries = dict(self.inventory.directories())
646
0.64.14 by Ian Clatworthy
commit of modified files working
647
    def post_process_files(self):
648
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
649
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
650
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
651
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
652
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
653
        committer = self.command.committer
654
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
655
        author = self.command.author
656
        if author is not None:
657
            author_id = "%s <%s>" % (author[0],author[1])
658
            if author_id != who:
659
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
660
        rev = revision.Revision(
661
           timestamp=committer[2],
662
           timezone=committer[3],
663
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
664
           message=self._escape_commit_message(self.command.message),
665
           revision_id=self.revision_id,
666
           properties=rev_props,
667
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
668
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
669
            lambda file_id: self._get_lines(file_id),
670
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
671
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
672
    def _escape_commit_message(self, message):
673
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
674
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
675
        # Code copied from bzrlib.commit.
676
        
677
        # Python strings can include characters that can't be
678
        # represented in well-formed XML; escape characters that
679
        # aren't listed in the XML specification
680
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
681
        message, _ = re.subn(
682
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
683
            lambda match: match.group(0).encode('unicode_escape'),
684
            message)
685
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
686
687
    def modify_handler(self, filecmd):
688
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
689
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
690
        else:
691
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
692
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
693
        self._modify_inventory(filecmd.path, filecmd.kind,
694
            filecmd.is_executable, data)
695
696
    def delete_handler(self, filecmd):
697
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
698
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
699
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
700
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
701
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
702
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
703
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
704
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
705
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
706
        try:
707
            self.cache_mgr._delete_path(path)
708
        except KeyError:
709
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
710
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
711
    def _warn_unless_in_merges(self, fileid, path):
712
        if len(self.parents) <= 1:
713
            return
714
        for parent in self.parents[1:]:
715
            if fileid in self.get_inventory(parent):
716
                return
717
        self.warning("ignoring delete of %s as not in parent inventories", path)
718
0.64.5 by Ian Clatworthy
first cut at generic processing method
719
    def copy_handler(self, filecmd):
720
        raise NotImplementedError(self.copy_handler)
721
722
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
723
        old_path = filecmd.old_path
724
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
725
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
726
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
727
        basename, new_parent_ie = self._ensure_directory(new_path)
728
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
729
        existing_id = self.inventory.path2id(new_path)
730
        if existing_id is not None:
731
            self.inventory.remove_recursive_id(existing_id)
0.65.4 by James Westby
Make the rename handling more robust.
732
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
733
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
734
735
    def deleteall_handler(self, filecmd):
736
        raise NotImplementedError(self.deleteall_handler)
737
0.64.16 by Ian Clatworthy
safe processing tweaks
738
    def bzr_file_id_and_new(self, path):
739
        """Get a Bazaar file identifier and new flag for a path.
740
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
741
        :return: file_id, is_new where
742
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
743
        """
744
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
745
            id = self.cache_mgr.file_ids[path]
746
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
747
        except KeyError:
748
            id = generate_ids.gen_file_id(path)
749
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
750
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
751
            return id, True
752
0.64.5 by Ian Clatworthy
first cut at generic processing method
753
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
754
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
755
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
756
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
757
    def gen_initial_inventory(self):
758
        """Generate an inventory for a parentless revision."""
759
        inv = inventory.Inventory(revision_id=self.revision_id)
760
        return inv
761
0.64.5 by Ian Clatworthy
first cut at generic processing method
762
    def gen_revision_id(self):
763
        """Generate a revision id.
764
765
        Subclasses may override this to produce deterministic ids say.
766
        """
767
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
768
        # Perhaps 'who' being the person running the import is ok? If so,
769
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
770
        who = "%s <%s>" % (committer[0],committer[1])
771
        timestamp = committer[2]
772
        return generate_ids.gen_revision_id(who, timestamp)
773
0.64.7 by Ian Clatworthy
start of multiple commit handling
774
    def get_inventory(self, revision_id):
775
        """Get the inventory for a revision id."""
776
        try:
777
            inv = self.cache_mgr.inventories[revision_id]
778
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
779
            if self.verbose:
780
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
781
            # Not cached so reconstruct from repository
782
            inv = self.repo.revision_tree(revision_id).inventory
783
            self.cache_mgr.inventories[revision_id] = inv
784
        return inv
785
0.64.5 by Ian Clatworthy
first cut at generic processing method
786
    def _get_inventories(self, revision_ids):
787
        """Get the inventories for revision-ids.
788
        
789
        This is a callback used by the RepositoryLoader to
790
        speed up inventory reconstruction."""
791
        present = []
792
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
793
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
794
        # successfully loaded into the repsoitory
795
        for revision_id in revision_ids:
796
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
797
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
798
                present.append(revision_id)
799
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
800
                if self.verbose:
801
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
802
                # Not cached so reconstruct from repository
803
                if self.repo.has_revision(revision_id):
804
                    rev_tree = self.repo.revision_tree(revision_id)
805
                    present.append(revision_id)
806
                else:
807
                    rev_tree = self.repo.revision_tree(None)
808
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
809
                self.cache_mgr.inventories[revision_id] = inv
810
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
811
        return present, inventories
812
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
813
    def _get_lines(self, file_id):
814
        """Get the lines for a file-id."""
815
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
816
817
    def _modify_inventory(self, path, kind, is_executable, data):
818
        """Add to or change an item in the inventory."""
819
        # Create the new InventoryEntry
820
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
821
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
822
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
823
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
824
        if isinstance(ie, inventory.InventoryFile):
825
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
826
            lines = osutils.split_lines(data)
827
            ie.text_sha1 = osutils.sha_strings(lines)
828
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
829
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
830
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
831
            ie.symlink_target = data.encode('utf8')
832
            # There are no lines stored for a symlink so
833
            # make sure the cache used by get_lines knows that
834
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
835
        else:
836
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
837
                (kind,))
838
0.64.16 by Ian Clatworthy
safe processing tweaks
839
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
840
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
841
            # HACK: no API for this (del+add does more than it needs to)
842
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
843
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
844
        else:
845
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
846
847
    def _ensure_directory(self, path):
848
        """Ensure that the containing directory exists for 'path'"""
849
        dirname, basename = osutils.split(path)
850
        if dirname == '':
851
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
852
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
853
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
854
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
855
        except KeyError:
856
            # We will create this entry, since it doesn't exist
857
            pass
858
        else:
859
            return basename, ie
860
861
        # No directory existed, we will just create one, first, make sure
862
        # the parent exists
863
        dir_basename, parent_ie = self._ensure_directory(dirname)
864
        dir_file_id = self.bzr_file_id(dirname)
865
        ie = inventory.entry_factory['directory'](dir_file_id,
866
                                                  dir_basename,
867
                                                  parent_ie.file_id)
868
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
869
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
870
        # There are no lines stored for a directory so
871
        # make sure the cache used by get_lines knows that
872
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
873
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
874
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
875
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
876
877
0.64.34 by Ian Clatworthy
report lost branches
878
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
879
0.64.64 by Ian Clatworthy
save tags known about in each branch
880
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
881
        """Create an object responsible for updating branches.
882
883
        :param heads_by_ref: a dictionary where
884
          names are git-style references like refs/heads/master;
885
          values are one item lists of commits marks.
886
        """
0.64.37 by Ian Clatworthy
create branches as required
887
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
888
        self.branch = branch
889
        self.cache_mgr = cache_mgr
890
        self.heads_by_ref = heads_by_ref
891
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
892
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
893
894
    def update(self):
895
        """Update the Bazaar branches and tips matching the heads.
896
897
        If the repository is shared, this routine creates branches
898
        as required. If it isn't, warnings are produced about the
899
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
900
0.64.34 by Ian Clatworthy
report lost branches
901
        :return: updated, lost_heads where
902
          updated = the list of branches updated
903
          lost_heads = a list of (bazaar-name,revision) for branches that
904
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
905
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
906
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
907
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
908
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
909
            if self._update_branch(br, tip):
910
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
911
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
912
913
    def _get_matching_branches(self):
914
        """Get the Bazaar branches.
915
0.64.34 by Ian Clatworthy
report lost branches
916
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
917
          default_tip = the last commit mark for the default branch
918
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
919
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
920
            would have been created had the repository been shared and
921
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
922
        """
0.64.37 by Ian Clatworthy
create branches as required
923
        branch_tips = []
924
        lost_heads = []
925
        ref_names = self.heads_by_ref.keys()
926
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
927
            trunk = self.select_trunk(ref_names)
928
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
929
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
930
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
931
932
        # Convert the reference names into Bazaar speak
933
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
934
0.64.37 by Ian Clatworthy
create branches as required
935
        # Policy for locating branches
936
        def dir_under_current(name, ref_name):
937
            # Using the Bazaar name, get a directory under the current one
938
            return name
939
        def dir_sister_branch(name, ref_name):
940
            # Using the Bazaar name, get a sister directory to the branch
941
            return osutils.pathjoin(self.branch.base, "..", name)
942
        if self.branch is not None:
943
            dir_policy = dir_sister_branch
944
        else:
945
            dir_policy = dir_under_current
946
0.64.34 by Ian Clatworthy
report lost branches
947
        # Create/track missing branches
948
        shared_repo = self.repo.is_shared()
949
        for name in sorted(bzr_names.keys()):
950
            ref_name = bzr_names[name]
951
            tip = self.heads_by_ref[ref_name][0]
952
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
953
                location = dir_policy(name, ref_name)
954
                try:
955
                    br = self.make_branch(location)
956
                    branch_tips.append((br,tip))
957
                    continue
958
                except errors.BzrError, ex:
959
                    error("ERROR: failed to create branch %s: %s",
960
                        location, ex)
961
            lost_head = self.cache_mgr.revision_ids[tip]
962
            lost_info = (name, lost_head)
963
            lost_heads.append(lost_info)
964
        return branch_tips, lost_heads
965
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
966
    def select_trunk(self, ref_names):
967
        """Given a set of ref names, choose one as the trunk."""
968
        for candidate in ['refs/heads/master']:
969
            if candidate in ref_names:
970
                return candidate
971
        # Use the last reference in the import stream
972
        return self.last_ref
973
0.64.37 by Ian Clatworthy
create branches as required
974
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
975
        """Make a branch in the repository if not already there."""
976
        try:
977
            return bzrdir.BzrDir.open(location).open_branch()
978
        except errors.NotBranchError, ex:
979
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
980
981
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
982
        """Generate Bazaar branch names from import ref names.
983
        
984
        :return: a dictionary with Bazaar names as keys and
985
          the original reference names as values.
986
        """
0.64.34 by Ian Clatworthy
report lost branches
987
        bazaar_names = {}
988
        for ref_name in sorted(ref_names):
989
            parts = ref_name.split('/')
990
            if parts[0] == 'refs':
991
                parts.pop(0)
992
            full_name = "--".join(parts)
993
            bazaar_name = parts[-1]
994
            if bazaar_name in bazaar_names:
995
                bazaar_name = full_name
996
            bazaar_names[bazaar_name] = ref_name
997
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
998
999
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1000
        """Update a branch with last revision and tag information.
1001
        
1002
        :return: whether the branch was changed or not
1003
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1004
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
1005
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1006
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1007
        existing_revno, existing_last_rev_id = br.last_revision_info()
1008
        changed = False
1009
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
1010
            br.set_last_revision_info(revno, last_rev_id)
1011
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
1012
        # apply tags known in this branch
1013
        my_tags = {}
1014
        if self.tags:
1015
            for tag,rev in self.tags.items():
1016
                if rev in revs:
1017
                    my_tags[tag] = rev
1018
            if my_tags:
1019
                br.tags._set_tag_dict(my_tags)
1020
                changed = True
1021
        if changed:
1022
            tagno = len(my_tags)
1023
            note("\t branch %s now has %d %s and %d %s", br.nick,
1024
                revno, helpers.single_plural(revno, "revision", "revisions"),
1025
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1026
        return changed