/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
60
# How many commits before each inventory fulltext
61
_DEFAULT_INV_FULLTEXT = 200
62
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
63
# How many inventories to cache
64
_DEFAULT_INV_CACHE_SIZE = 10
65
0.64.41 by Ian Clatworthy
update multiple working trees if requested
66
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
class GenericProcessor(processor.ImportProcessor):
68
    """An import processor that handles basic imports.
69
70
    Current features supported:
71
0.64.16 by Ian Clatworthy
safe processing tweaks
72
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
73
    * files and symlinks commits are supported
74
    * checkpoints automatically happen at a configurable frequency
75
      over and above the stream requested checkpoints
76
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
77
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
78
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
79
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
80
    At checkpoints and on completion, the commit-id -> revision-id map is
81
    saved to a file called 'fastimport-id-map'. If the import crashes
82
    or is interrupted, it can be started again and this file will be
83
    used to skip over already loaded revisions. The format of each line
84
    is "commit-id revision-id" so commit-ids cannot include spaces.
85
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
86
    Here are the supported parameters:
87
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
88
    * info - name of a hints file holding the analysis generated
89
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
90
      importing large repositories, this parameter is needed so
91
      that the importer knows what blobs to intelligently cache.
92
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
94
      By default, the importer updates the repository
95
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
96
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
97
98
    * checkpoint - automatically checkpoint every n commits over and
99
      above any checkpoints contained in the import stream.
100
      The default is 10000.
101
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
102
    * count - only import this many commits then exit. If not set
103
      or negative, all commits are imported.
104
    
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
105
    * inv-fulltext - create an inventory fulltext every n commits.
106
      The default is 200.
107
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
108
    * inv-cache - number of inventories to cache.
109
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
110
111
    * experimental - enable experimental mode, i.e. use features
112
      not yet fully tested.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
113
    """
114
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
115
    known_params = [
116
        'info',
117
        'trees',
118
        'checkpoint',
119
        'count',
120
        'inv-cache',
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
121
        'inv-fulltext',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
122
        'experimental',
123
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
124
125
    def note(self, msg, *args):
126
        """Output a note but timestamp it."""
127
        msg = "%s %s" % (self._time_of_day(), msg)
128
        note(msg, *args)
129
130
    def warning(self, msg, *args):
131
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
132
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
133
        warning(msg, *args)
134
0.64.67 by James Westby
Add support for -Dfast-import.
135
    def debug(self, mgs, *args):
136
        """Output a debug message if the appropriate -D option was given."""
137
        if "fast-import" in debug.debug_flags:
138
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
139
            mutter(msg, *args)
140
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
141
    def _time_of_day(self):
142
        """Time of day as a string."""
143
        # Note: this is a separate method so tests can patch in a fixed value
144
        return time.strftime("%H:%M:%S")
0.64.67 by James Westby
Add support for -Dfast-import.
145
    
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
146
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
147
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
148
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
149
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
150
            self.inventory_cache_size)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
151
        self.skip_total = self._init_id_map()
152
        if self.skip_total:
153
            self.note("Found %d commits already loaded - "
154
                "skipping over these ...", self.skip_total)
155
        self._revision_count = 0
156
157
        # mapping of tag name to revision_id
158
        self.tags = {}
159
160
        # Create the revision loader needed for committing
0.64.79 by Ian Clatworthy
support new Repository API
161
        new_repo_api = hasattr(self.repo, 'revisions')
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
162
        if self._experimental:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
163
            def fulltext_when(count):
164
                total = self.total_commits
165
                if total is not None and count == total:
166
                    fulltext = True
167
                else:
168
                    fulltext = count % self.inv_fulltext_every == 0
169
                if fulltext:
170
                    self.note("%d commits - storing inventory as full-text",
171
                        count)
172
                return fulltext
173
0.64.79 by Ian Clatworthy
support new Repository API
174
            if new_repo_api:
175
                self.loader = revisionloader.ImportRevisionLoader2(
176
                    self.repo, self.inventory_cache_size,
177
                    fulltext_when=fulltext_when)
178
            else:
179
                self.loader = revisionloader.ImportRevisionLoader1(
180
                    self.repo, self.inventory_cache_size,
181
                    fulltext_when=fulltext_when)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
182
        else:
0.64.79 by Ian Clatworthy
support new Repository API
183
            if new_repo_api:
184
                self.loader = revisionloader.RevisionLoader2(self.repo)
185
            else:
186
                self.loader = revisionloader.RevisionLoader1(self.repo)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
187
0.64.51 by Ian Clatworthy
disable autopacking
188
        # Disable autopacking if the repo format supports it.
189
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
190
        if isinstance(self.repo, pack_repo.KnitPackRepository):
191
            self._original_max_pack_count = \
192
                self.repo._pack_collection._max_pack_count
193
            def _max_pack_count_for_import(total_revisions):
194
                return total_revisions + 1
195
            self.repo._pack_collection._max_pack_count = \
196
                _max_pack_count_for_import
197
        else:
198
            self._original_max_pack_count = None
199
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
200
        # Create a write group. This is committed at the end of the import.
201
        # Checkpointing closes the current one and starts a new one.
202
        self.repo.start_write_group()
203
204
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
205
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
206
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
207
        # This is currently hard-coded but might be configurable via
208
        # parameters one day if that's needed
209
        repo_transport = self.repo.control_files._transport
210
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
211
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
212
        # Load the info file, if any
213
        info_path = self.params.get('info')
214
        if info_path is not None:
215
            self.info = configobj.ConfigObj(info_path)
216
        else:
217
            self.info = None
218
0.64.41 by Ian Clatworthy
update multiple working trees if requested
219
        # Decide how often to automatically report progress
220
        # (not a parameter yet)
221
        self.progress_every = _DEFAULT_AUTO_PROGRESS
222
        if self.verbose:
223
            self.progress_every = self.progress_every / 10
224
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
225
        # Decide how often to automatically checkpoint
226
        self.checkpoint_every = int(self.params.get('checkpoint',
227
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
228
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
229
        # Decide how often to fulltext the inventory
230
        self.inv_fulltext_every = int(self.params.get('inv-fulltext',
231
            _DEFAULT_INV_FULLTEXT))
232
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
233
        # Decide how big to make the inventory cache
234
        self.inventory_cache_size = int(self.params.get('inv-cache',
235
            _DEFAULT_INV_CACHE_SIZE))
236
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
237
        # Find the maximum number of commits to import (None means all)
238
        # and prepare progress reporting. Just in case the info file
239
        # has an outdated count of commits, we store the max counts
240
        # at which we need to terminate separately to the total used
241
        # for progress tracking.
242
        try:
243
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
244
            if self.max_commits < 0:
245
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
246
        except KeyError:
247
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
248
        if self.info is not None:
249
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
250
            if (self.max_commits is not None and
251
                self.total_commits > self.max_commits):
252
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
253
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
254
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
255
0.64.27 by Ian Clatworthy
1st cut at performance tuning
256
    def _process(self, command_iter):
257
        # if anything goes wrong, abort the write group if any
258
        try:
259
            processor.ImportProcessor._process(self, command_iter)
260
        except:
261
            if self.repo is not None and self.repo.is_in_write_group():
262
                self.repo.abort_write_group()
263
            raise
264
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
265
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
266
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
267
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
268
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
269
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
270
        # Update the branches
271
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
272
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
273
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
274
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
275
        branches_updated, branches_lost = updater.update()
276
        self._branch_count = len(branches_updated)
277
278
        # Tell the user about branches that were not created
279
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
280
            if not self.repo.is_shared():
281
                self.warning("Cannot import multiple branches into "
282
                    "an unshared repository")
283
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
284
            for lost_info in branches_lost:
285
                head_revision = lost_info[1]
286
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
287
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
288
289
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
290
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
291
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
292
        if self._branch_count == 0:
293
            self.note("no branches to update")
294
            self.note("no working trees to update")
295
            remind_about_update = False
296
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
297
            trees = self._get_working_trees(branches_updated)
298
            if trees:
299
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
300
                if self.verbose:
301
                    report = delta._ChangeReporter()
302
                else:
303
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
304
                for wt in trees:
305
                    wt.update(reporter)
306
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
307
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
308
            else:
309
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
310
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
311
312
        # Finish up by telling the user what to do next.
313
        if self._original_max_pack_count:
314
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
315
            # checkpoint instead. We now pack the repository to optimise
316
            # how data is stored.
317
            if self._revision_count > self.checkpoint_every:
318
                self.note("Packing repository ...")
319
                self.repo.pack()
320
                # To be conservative, packing puts the old packs and
321
                # indices in obsolete_packs. We err on the side of
322
                # optimism and clear out that directory to save space.
323
                self.note("Removing obsolete packs ...")
324
                # TODO: Use a public API for this once one exists
325
                repo_transport = self.repo._pack_collection.transport
326
                repo_transport.clone('obsolete_packs').delete_multi(
327
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
328
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
329
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
330
            note("To refresh the working tree for a branch, "
331
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
332
333
    def _get_working_trees(self, branches):
334
        """Get the working trees for branches in the repository."""
335
        result = []
336
        wt_expected = self.repo.make_working_trees()
337
        for br in branches:
338
            if br == self.branch and br is not None:
339
                wt = self.working_tree
340
            elif wt_expected:
341
                try:
342
                    wt = br.bzrdir.open_workingtree()
343
                except errors.NoWorkingTree:
344
                    self.warning("No working tree for branch %s", br)
345
                    continue
346
            else:
347
                continue
348
            result.append(wt)
349
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
350
351
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
352
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
353
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
354
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
355
        wtc = self._tree_count
356
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
357
            rc, helpers.single_plural(rc, "revision", "revisions"),
358
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
359
            wtc, helpers.single_plural(wtc, "tree", "trees"),
360
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
361
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
362
    def _init_id_map(self):
363
        """Load the id-map and check it matches the repository.
364
        
365
        :return: the number of entries in the map
366
        """
367
        # Currently, we just check the size. In the future, we might
368
        # decide to be more paranoid and check that the revision-ids
369
        # are identical as well.
370
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
371
            self.id_map_path)
372
        existing_count = len(self.repo.all_revision_ids())
373
        if existing_count != known:
374
            raise plugin_errors.BadRepositorySize(known, existing_count)
375
        return known
376
377
    def _save_id_map(self):
378
        """Save the id-map."""
379
        # Save the whole lot every time. If this proves a problem, we can
380
        # change to 'append just the new ones' at a later time.
381
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
382
0.64.5 by Ian Clatworthy
first cut at generic processing method
383
    def blob_handler(self, cmd):
384
        """Process a BlobCommand."""
385
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
386
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
387
        else:
388
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
389
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
390
391
    def checkpoint_handler(self, cmd):
392
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
393
        # Commit the current write group and start a new one
394
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
395
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
396
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
397
398
    def commit_handler(self, cmd):
399
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
400
        if self.skip_total and self._revision_count < self.skip_total:
401
            _track_heads(cmd, self.cache_mgr)
402
            # Check that we really do know about this commit-id
403
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
404
                raise plugin_errors.BadRestart(cmd.id)
405
            # Consume the file commands and free any non-sticky blobs
406
            for fc in cmd.file_iter():
407
                pass
408
            self.cache_mgr._blobs = {}
409
            self._revision_count += 1
410
            # If we're finished getting back to where we were,
411
            # load the file-ids cache
412
            if self._revision_count == self.skip_total:
413
                self._gen_file_ids_cache()
414
                self.note("Generated the file-ids cache - %d entries",
415
                    len(self.cache_mgr.file_ids.keys()))
416
            return
417
418
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
419
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
420
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
421
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
422
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
423
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
424
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
425
426
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
427
        if (self.max_commits is not None and
428
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
429
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
430
            self.finished = True
431
        elif self._revision_count % self.checkpoint_every == 0:
432
            self.note("%d commits - automatic checkpoint triggered",
433
                self._revision_count)
434
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
435
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
436
    def _gen_file_ids_cache(self):
437
        """Generate the file-id cache by searching repository inventories.
438
        """
439
        # Get the interesting revisions - the heads
440
        head_ids = self.cache_mgr.heads.keys()
441
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
442
443
        # Update the fileid cache
444
        file_ids = {}
445
        for revision_id in revision_ids:
446
            inv = self.repo.revision_tree(revision_id).inventory
447
            # Cache the inventoires while we're at it
448
            self.cache_mgr.inventories[revision_id] = inv
449
            for path, ie in inv.iter_entries():
450
                file_ids[path] = ie.file_id
451
        self.cache_mgr.file_ids = file_ids
452
0.64.25 by Ian Clatworthy
slightly better progress reporting
453
    def report_progress(self, details=''):
454
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
455
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
456
            if self.total_commits is not None:
457
                counts = "%d/%d" % (self._revision_count, self.total_commits)
458
                eta = progress.get_eta(self._start_time, self._revision_count,
459
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
460
                eta_str = progress.str_tdelta(eta)
461
                if eta_str.endswith('--'):
462
                    eta_str = ''
463
                else:
464
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
465
            else:
466
                counts = "%d" % (self._revision_count,)
467
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
468
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
469
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
470
    def progress_handler(self, cmd):
471
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
472
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
473
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
474
475
    def reset_handler(self, cmd):
476
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
477
        if cmd.ref.startswith('refs/tags/'):
478
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
479
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
480
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
481
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
482
483
    def tag_handler(self, cmd):
484
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
485
        self._set_tag(cmd.id, cmd.from_)
486
487
    def _set_tag(self, name, from_):
488
        """Define a tag given a name an import 'from' reference."""
489
        bzr_tag_name = name.decode('utf-8', 'replace')
490
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
491
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
492
493
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
494
class GenericCacheManager(object):
495
    """A manager of caches for the GenericProcessor."""
496
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
497
    def __init__(self, info, verbose=False, inventory_cache_size=10):
498
        """Create a manager of caches.
499
500
        :param info: a ConfigObj holding the output from
501
            the --info processor, or None if no hints are available
502
        """
503
        self.verbose = verbose
504
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
505
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
506
        # Sticky blobs aren't removed after being referenced.
507
        self._blobs = {}
508
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
509
510
        # revision-id -> Inventory cache
511
        # these are large and we probably don't need too many as
512
        # most parents are recent in history
513
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
514
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
515
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
516
        # we need to keep all of these but they are small
517
        self.revision_ids = {}
518
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
519
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
520
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
521
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
522
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
523
        self.last_ref = None
524
        self.last_ids = {}
525
        self.heads = {}
526
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
527
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
528
        self._blobs_to_keep = None
529
        if info is not None:
530
            try:
531
                self._blobs_to_keep = info['Blob usage tracking']['multi']
532
            except KeyError:
533
                # info not in file - possible when no blobs used
534
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
535
536
    def store_blob(self, id, data):
537
        """Store a blob of data."""
538
        if (self._blobs_to_keep is None or data == '' or
539
            id in self._blobs_to_keep):
540
            self._sticky_blobs[id] = data
541
        else:
542
            self._blobs[id] = data
543
544
    def fetch_blob(self, id):
545
        """Fetch a blob of data."""
546
        try:
547
            return self._sticky_blobs[id]
548
        except KeyError:
549
            return self._blobs.pop(id)
550
0.64.16 by Ian Clatworthy
safe processing tweaks
551
    def _delete_path(self, path):
552
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
553
        # we actually want to remember what file-id we gave a path,
554
        # even when that file is deleted, so doing nothing is correct
555
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
556
557
    def _rename_path(self, old_path, new_path):
558
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
559
        # In this case, we need to forget the file-id we gave a path,
560
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
561
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
562
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
563
564
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
565
def _track_heads(cmd, cache_mgr):
566
    """Track the repository heads given a CommitCommand.
567
    
568
    :return: the list of parents in terms of commit-ids
569
    """
570
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
571
    if cmd.from_ is not None:
572
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
573
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
574
        last_id = cache_mgr.last_ids.get(cmd.ref)
575
        if last_id is not None:
576
            parents = [last_id]
577
        else:
578
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
579
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
580
    # Track the heads
581
    for parent in parents:
582
        try:
583
            del cache_mgr.heads[parent]
584
        except KeyError:
585
            # it's ok if the parent isn't there - another
586
            # commit may have already removed it
587
            pass
588
    cache_mgr.heads[cmd.id] = cmd.ref
589
    cache_mgr.last_ids[cmd.ref] = cmd.id
590
    cache_mgr.last_ref = cmd.ref
591
    return parents
592
593
0.64.5 by Ian Clatworthy
first cut at generic processing method
594
class GenericCommitHandler(processor.CommitHandler):
595
0.64.48 by Ian Clatworthy
one revision loader instance
596
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
597
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
598
        processor.CommitHandler.__init__(self, command)
599
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
600
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
601
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
602
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
603
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
604
0.64.43 by Ian Clatworthy
verbose mode cleanup
605
    def note(self, msg, *args):
606
        """Output a note but add context."""
607
        msg = "%s (%s)" % (msg, self.command.id)
608
        note(msg, *args)
609
610
    def warning(self, msg, *args):
611
        """Output a warning but add context."""
612
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
613
        warning(msg, *args)
614
0.64.67 by James Westby
Add support for -Dfast-import.
615
    def debug(self, msg, *args):
616
        """Output a mutter if the appropriate -D option was given."""
617
        if "fast-import" in debug.debug_flags:
618
            msg = "%s (%s)" % (msg, self.command.id)
619
            mutter(msg, *args)
620
0.64.5 by Ian Clatworthy
first cut at generic processing method
621
    def pre_process_files(self):
622
        """Prepare for committing."""
623
        self.revision_id = self.gen_revision_id()
624
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
625
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
626
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
627
        # Track the heads and get the real parent list
628
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
629
0.64.14 by Ian Clatworthy
commit of modified files working
630
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
631
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
632
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
633
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
634
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
635
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
636
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
637
0.64.14 by Ian Clatworthy
commit of modified files working
638
        # Seed the inventory from the previous one
639
        if len(self.parents) == 0:
640
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
641
        else:
642
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
643
            inv = self.get_inventory(self.parents[0])
644
            # TODO: Shallow copy - deep inventory copying is expensive
645
            self.inventory = inv.copy()
0.64.13 by Ian Clatworthy
commit of new files working
646
        if not self.repo.supports_rich_root():
647
            # In this repository, root entries have no knit or weave. When
648
            # serializing out to disk and back in, root.revision is always
649
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
650
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
651
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
652
        # directory-path -> inventory-entry for current inventory
653
        self.directory_entries = dict(self.inventory.directories())
654
0.64.14 by Ian Clatworthy
commit of modified files working
655
    def post_process_files(self):
656
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
657
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
658
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
659
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
660
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
661
        committer = self.command.committer
662
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
663
        author = self.command.author
664
        if author is not None:
665
            author_id = "%s <%s>" % (author[0],author[1])
666
            if author_id != who:
667
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
668
        rev = revision.Revision(
669
           timestamp=committer[2],
670
           timezone=committer[3],
671
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
672
           message=self._escape_commit_message(self.command.message),
673
           revision_id=self.revision_id,
674
           properties=rev_props,
675
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
676
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
677
            lambda file_id: self._get_lines(file_id),
678
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
679
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
680
    def _escape_commit_message(self, message):
681
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
682
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
683
        # Code copied from bzrlib.commit.
684
        
685
        # Python strings can include characters that can't be
686
        # represented in well-formed XML; escape characters that
687
        # aren't listed in the XML specification
688
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
689
        message, _ = re.subn(
690
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
691
            lambda match: match.group(0).encode('unicode_escape'),
692
            message)
693
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
694
695
    def modify_handler(self, filecmd):
696
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
697
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
698
        else:
699
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
700
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
701
        self._modify_inventory(filecmd.path, filecmd.kind,
702
            filecmd.is_executable, data)
703
704
    def delete_handler(self, filecmd):
705
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
706
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
707
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
708
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
709
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
710
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
711
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
712
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
713
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
714
        try:
715
            self.cache_mgr._delete_path(path)
716
        except KeyError:
717
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
718
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
719
    def _warn_unless_in_merges(self, fileid, path):
720
        if len(self.parents) <= 1:
721
            return
722
        for parent in self.parents[1:]:
723
            if fileid in self.get_inventory(parent):
724
                return
725
        self.warning("ignoring delete of %s as not in parent inventories", path)
726
0.64.5 by Ian Clatworthy
first cut at generic processing method
727
    def copy_handler(self, filecmd):
728
        raise NotImplementedError(self.copy_handler)
729
730
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
731
        old_path = filecmd.old_path
732
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
733
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
734
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
735
        basename, new_parent_ie = self._ensure_directory(new_path)
736
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
737
        existing_id = self.inventory.path2id(new_path)
738
        if existing_id is not None:
739
            self.inventory.remove_recursive_id(existing_id)
0.65.4 by James Westby
Make the rename handling more robust.
740
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
741
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
742
743
    def deleteall_handler(self, filecmd):
744
        raise NotImplementedError(self.deleteall_handler)
745
0.64.16 by Ian Clatworthy
safe processing tweaks
746
    def bzr_file_id_and_new(self, path):
747
        """Get a Bazaar file identifier and new flag for a path.
748
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
749
        :return: file_id, is_new where
750
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
751
        """
752
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
753
            id = self.cache_mgr.file_ids[path]
754
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
755
        except KeyError:
756
            id = generate_ids.gen_file_id(path)
757
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
758
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
759
            return id, True
760
0.64.5 by Ian Clatworthy
first cut at generic processing method
761
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
762
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
763
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
764
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
765
    def gen_initial_inventory(self):
766
        """Generate an inventory for a parentless revision."""
767
        inv = inventory.Inventory(revision_id=self.revision_id)
768
        return inv
769
0.64.5 by Ian Clatworthy
first cut at generic processing method
770
    def gen_revision_id(self):
771
        """Generate a revision id.
772
773
        Subclasses may override this to produce deterministic ids say.
774
        """
775
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
776
        # Perhaps 'who' being the person running the import is ok? If so,
777
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
778
        who = "%s <%s>" % (committer[0],committer[1])
779
        timestamp = committer[2]
780
        return generate_ids.gen_revision_id(who, timestamp)
781
0.64.7 by Ian Clatworthy
start of multiple commit handling
782
    def get_inventory(self, revision_id):
783
        """Get the inventory for a revision id."""
784
        try:
785
            inv = self.cache_mgr.inventories[revision_id]
786
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
787
            if self.verbose:
788
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
789
            # Not cached so reconstruct from repository
790
            inv = self.repo.revision_tree(revision_id).inventory
791
            self.cache_mgr.inventories[revision_id] = inv
792
        return inv
793
0.64.5 by Ian Clatworthy
first cut at generic processing method
794
    def _get_inventories(self, revision_ids):
795
        """Get the inventories for revision-ids.
796
        
797
        This is a callback used by the RepositoryLoader to
798
        speed up inventory reconstruction."""
799
        present = []
800
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
801
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
802
        # successfully loaded into the repsoitory
803
        for revision_id in revision_ids:
804
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
805
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
806
                present.append(revision_id)
807
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
808
                if self.verbose:
809
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
810
                # Not cached so reconstruct from repository
811
                if self.repo.has_revision(revision_id):
812
                    rev_tree = self.repo.revision_tree(revision_id)
813
                    present.append(revision_id)
814
                else:
815
                    rev_tree = self.repo.revision_tree(None)
816
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
817
                self.cache_mgr.inventories[revision_id] = inv
818
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
819
        return present, inventories
820
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
821
    def _get_lines(self, file_id):
822
        """Get the lines for a file-id."""
823
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
824
825
    def _modify_inventory(self, path, kind, is_executable, data):
826
        """Add to or change an item in the inventory."""
827
        # Create the new InventoryEntry
828
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
829
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
830
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
831
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
832
        if isinstance(ie, inventory.InventoryFile):
833
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
834
            lines = osutils.split_lines(data)
835
            ie.text_sha1 = osutils.sha_strings(lines)
836
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
837
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
838
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
839
            ie.symlink_target = data.encode('utf8')
840
            # There are no lines stored for a symlink so
841
            # make sure the cache used by get_lines knows that
842
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
843
        else:
844
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
845
                (kind,))
846
0.64.16 by Ian Clatworthy
safe processing tweaks
847
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
848
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
849
            # HACK: no API for this (del+add does more than it needs to)
850
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
851
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
852
        else:
853
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
854
855
    def _ensure_directory(self, path):
856
        """Ensure that the containing directory exists for 'path'"""
857
        dirname, basename = osutils.split(path)
858
        if dirname == '':
859
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
860
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
861
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
862
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
863
        except KeyError:
864
            # We will create this entry, since it doesn't exist
865
            pass
866
        else:
867
            return basename, ie
868
869
        # No directory existed, we will just create one, first, make sure
870
        # the parent exists
871
        dir_basename, parent_ie = self._ensure_directory(dirname)
872
        dir_file_id = self.bzr_file_id(dirname)
873
        ie = inventory.entry_factory['directory'](dir_file_id,
874
                                                  dir_basename,
875
                                                  parent_ie.file_id)
876
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
877
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
878
        # There are no lines stored for a directory so
879
        # make sure the cache used by get_lines knows that
880
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
881
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
882
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
883
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
884
885
0.64.34 by Ian Clatworthy
report lost branches
886
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
887
0.64.64 by Ian Clatworthy
save tags known about in each branch
888
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
889
        """Create an object responsible for updating branches.
890
891
        :param heads_by_ref: a dictionary where
892
          names are git-style references like refs/heads/master;
893
          values are one item lists of commits marks.
894
        """
0.64.37 by Ian Clatworthy
create branches as required
895
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
896
        self.branch = branch
897
        self.cache_mgr = cache_mgr
898
        self.heads_by_ref = heads_by_ref
899
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
900
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
901
902
    def update(self):
903
        """Update the Bazaar branches and tips matching the heads.
904
905
        If the repository is shared, this routine creates branches
906
        as required. If it isn't, warnings are produced about the
907
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
908
0.64.34 by Ian Clatworthy
report lost branches
909
        :return: updated, lost_heads where
910
          updated = the list of branches updated
911
          lost_heads = a list of (bazaar-name,revision) for branches that
912
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
913
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
914
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
915
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
916
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
917
            if self._update_branch(br, tip):
918
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
919
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
920
921
    def _get_matching_branches(self):
922
        """Get the Bazaar branches.
923
0.64.34 by Ian Clatworthy
report lost branches
924
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
925
          default_tip = the last commit mark for the default branch
926
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
927
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
928
            would have been created had the repository been shared and
929
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
930
        """
0.64.37 by Ian Clatworthy
create branches as required
931
        branch_tips = []
932
        lost_heads = []
933
        ref_names = self.heads_by_ref.keys()
934
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
935
            trunk = self.select_trunk(ref_names)
936
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
937
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
938
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
939
940
        # Convert the reference names into Bazaar speak
941
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
942
0.64.37 by Ian Clatworthy
create branches as required
943
        # Policy for locating branches
944
        def dir_under_current(name, ref_name):
945
            # Using the Bazaar name, get a directory under the current one
946
            return name
947
        def dir_sister_branch(name, ref_name):
948
            # Using the Bazaar name, get a sister directory to the branch
949
            return osutils.pathjoin(self.branch.base, "..", name)
950
        if self.branch is not None:
951
            dir_policy = dir_sister_branch
952
        else:
953
            dir_policy = dir_under_current
954
0.64.34 by Ian Clatworthy
report lost branches
955
        # Create/track missing branches
956
        shared_repo = self.repo.is_shared()
957
        for name in sorted(bzr_names.keys()):
958
            ref_name = bzr_names[name]
959
            tip = self.heads_by_ref[ref_name][0]
960
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
961
                location = dir_policy(name, ref_name)
962
                try:
963
                    br = self.make_branch(location)
964
                    branch_tips.append((br,tip))
965
                    continue
966
                except errors.BzrError, ex:
967
                    error("ERROR: failed to create branch %s: %s",
968
                        location, ex)
969
            lost_head = self.cache_mgr.revision_ids[tip]
970
            lost_info = (name, lost_head)
971
            lost_heads.append(lost_info)
972
        return branch_tips, lost_heads
973
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
974
    def select_trunk(self, ref_names):
975
        """Given a set of ref names, choose one as the trunk."""
976
        for candidate in ['refs/heads/master']:
977
            if candidate in ref_names:
978
                return candidate
979
        # Use the last reference in the import stream
980
        return self.last_ref
981
0.64.37 by Ian Clatworthy
create branches as required
982
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
983
        """Make a branch in the repository if not already there."""
984
        try:
985
            return bzrdir.BzrDir.open(location).open_branch()
986
        except errors.NotBranchError, ex:
987
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
988
989
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
990
        """Generate Bazaar branch names from import ref names.
991
        
992
        :return: a dictionary with Bazaar names as keys and
993
          the original reference names as values.
994
        """
0.64.34 by Ian Clatworthy
report lost branches
995
        bazaar_names = {}
996
        for ref_name in sorted(ref_names):
997
            parts = ref_name.split('/')
998
            if parts[0] == 'refs':
999
                parts.pop(0)
1000
            full_name = "--".join(parts)
1001
            bazaar_name = parts[-1]
1002
            if bazaar_name in bazaar_names:
1003
                bazaar_name = full_name
1004
            bazaar_names[bazaar_name] = ref_name
1005
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1006
1007
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1008
        """Update a branch with last revision and tag information.
1009
        
1010
        :return: whether the branch was changed or not
1011
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1012
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
1013
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1014
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1015
        existing_revno, existing_last_rev_id = br.last_revision_info()
1016
        changed = False
1017
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
1018
            br.set_last_revision_info(revno, last_rev_id)
1019
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
1020
        # apply tags known in this branch
1021
        my_tags = {}
1022
        if self.tags:
1023
            for tag,rev in self.tags.items():
1024
                if rev in revs:
1025
                    my_tags[tag] = rev
1026
            if my_tags:
1027
                br.tags._set_tag_dict(my_tags)
1028
                changed = True
1029
        if changed:
1030
            tagno = len(my_tags)
1031
            note("\t branch %s now has %d %s and %d %s", br.nick,
1032
                revno, helpers.single_plural(revno, "revision", "revisions"),
1033
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1034
        return changed