/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
60
# How many inventories to cache
61
_DEFAULT_INV_CACHE_SIZE = 10
62
0.64.41 by Ian Clatworthy
update multiple working trees if requested
63
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
64
class GenericProcessor(processor.ImportProcessor):
65
    """An import processor that handles basic imports.
66
67
    Current features supported:
68
0.64.16 by Ian Clatworthy
safe processing tweaks
69
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
70
    * files and symlinks commits are supported
71
    * checkpoints automatically happen at a configurable frequency
72
      over and above the stream requested checkpoints
73
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
74
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
77
    At checkpoints and on completion, the commit-id -> revision-id map is
78
    saved to a file called 'fastimport-id-map'. If the import crashes
79
    or is interrupted, it can be started again and this file will be
80
    used to skip over already loaded revisions. The format of each line
81
    is "commit-id revision-id" so commit-ids cannot include spaces.
82
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
83
    Here are the supported parameters:
84
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
85
    * info - name of a hints file holding the analysis generated
86
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      importing large repositories, this parameter is needed so
88
      that the importer knows what blobs to intelligently cache.
89
0.64.41 by Ian Clatworthy
update multiple working trees if requested
90
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
91
      By default, the importer updates the repository
92
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
94
95
    * checkpoint - automatically checkpoint every n commits over and
96
      above any checkpoints contained in the import stream.
97
      The default is 10000.
98
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
99
    * count - only import this many commits then exit. If not set
100
      or negative, all commits are imported.
101
    
102
    * inv-cache - number of inventories to cache.
103
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
104
105
    * experimental - enable experimental mode, i.e. use features
106
      not yet fully tested.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
107
108
    * import-marks - name of file to read to load mark information from
109
110
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
111
    """
112
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
113
    known_params = [
114
        'info',
115
        'trees',
116
        'checkpoint',
117
        'count',
118
        'inv-cache',
119
        'experimental',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
120
        'import-marks',
121
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
122
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
123
124
    def note(self, msg, *args):
125
        """Output a note but timestamp it."""
126
        msg = "%s %s" % (self._time_of_day(), msg)
127
        note(msg, *args)
128
129
    def warning(self, msg, *args):
130
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
131
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
132
        warning(msg, *args)
133
0.64.67 by James Westby
Add support for -Dfast-import.
134
    def debug(self, mgs, *args):
135
        """Output a debug message if the appropriate -D option was given."""
136
        if "fast-import" in debug.debug_flags:
137
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
138
            mutter(msg, *args)
139
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
140
    def _time_of_day(self):
141
        """Time of day as a string."""
142
        # Note: this is a separate method so tests can patch in a fixed value
143
        return time.strftime("%H:%M:%S")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
144
145
    def _import_marks(self, filename):
146
        try:
147
            f = file(filename)
148
        except IOError:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
149
            self.warning(
150
                "Could not open import-marks file, not importing marks")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
151
            return
152
153
        firstline = f.readline()
154
        match = re.match(r'^format=(\d+)$', firstline)
155
        if not match:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
156
            print >>sys.stderr, "%r doesn't look like a mark file" % \
157
                (filename,)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
158
            sys.exit(1)
159
        elif match.group(1) != '1':
160
            print >>sys.stderr, 'format version in mark file not supported'
161
            sys.exit(1)
162
163
        for string in f.readline().rstrip('\n').split('\0'):
164
            if not string:
165
                continue
166
            name, integer = string.rsplit('.', 1)
167
            # We really can't do anything with the branch information, so we
168
            # just skip it
169
            
170
        self.cache_mgr.revision_ids = {}
171
        for line in f:
172
            line = line.rstrip('\n')
173
            mark, revid = line.split(' ', 1)
174
            self.cache_mgr.revision_ids[mark] = revid
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
175
        f.close()
0.64.67 by James Westby
Add support for -Dfast-import.
176
    
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
177
    def export_marks(self, filename):
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
178
        try:
179
            f = file(filename, 'w')
180
        except IOError:
181
            self.warning(
182
                "Could not open export-marks file, not exporting marks")
183
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
184
        f.write('format=1\n')
185
        f.write('\0tmp.0\n')
186
        for mark, revid in self.cache_mgr.revision_ids.iteritems():
187
            f.write('%s %s\n' % (mark, revid))
188
        f.close()
189
        
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
190
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
191
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
192
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
193
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
194
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
195
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
196
        if self.params.get("import-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
197
            self._import_marks(self.params.get("import-marks"))
198
            self.skip_total = False
199
            self.first_incremental_commit = True
200
        else:
201
            self.first_incremental_commit = False
202
            self.skip_total = self._init_id_map()
203
            if self.skip_total:
204
                self.note("Found %d commits already loaded - "
205
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
206
        self._revision_count = 0
207
208
        # mapping of tag name to revision_id
209
        self.tags = {}
210
211
        # Create the revision loader needed for committing
0.64.79 by Ian Clatworthy
support new Repository API
212
        new_repo_api = hasattr(self.repo, 'revisions')
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
213
        if new_repo_api:
214
            self.loader = revisionloader.RevisionLoader2(self.repo)
215
        elif not self._experimental:
216
            self.loader = revisionloader.RevisionLoader1(self.repo)
217
        else:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
218
            def fulltext_when(count):
219
                total = self.total_commits
220
                if total is not None and count == total:
221
                    fulltext = True
222
                else:
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
223
                    # Create an inventory fulltext every 200 revisions
224
                    fulltext = count % 200 == 0
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
225
                if fulltext:
226
                    self.note("%d commits - storing inventory as full-text",
227
                        count)
228
                return fulltext
229
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
230
            self.loader = revisionloader.ImportRevisionLoader1(
231
                self.repo, self.inventory_cache_size,
232
                fulltext_when=fulltext_when)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
233
0.64.51 by Ian Clatworthy
disable autopacking
234
        # Disable autopacking if the repo format supports it.
235
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
236
        if isinstance(self.repo, pack_repo.KnitPackRepository):
237
            self._original_max_pack_count = \
238
                self.repo._pack_collection._max_pack_count
239
            def _max_pack_count_for_import(total_revisions):
240
                return total_revisions + 1
241
            self.repo._pack_collection._max_pack_count = \
242
                _max_pack_count_for_import
243
        else:
244
            self._original_max_pack_count = None
245
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
246
        # Create a write group. This is committed at the end of the import.
247
        # Checkpointing closes the current one and starts a new one.
248
        self.repo.start_write_group()
249
250
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
251
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
252
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
253
        # This is currently hard-coded but might be configurable via
254
        # parameters one day if that's needed
255
        repo_transport = self.repo.control_files._transport
256
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
257
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
258
        # Load the info file, if any
259
        info_path = self.params.get('info')
260
        if info_path is not None:
261
            self.info = configobj.ConfigObj(info_path)
262
        else:
263
            self.info = None
264
0.64.41 by Ian Clatworthy
update multiple working trees if requested
265
        # Decide how often to automatically report progress
266
        # (not a parameter yet)
267
        self.progress_every = _DEFAULT_AUTO_PROGRESS
268
        if self.verbose:
269
            self.progress_every = self.progress_every / 10
270
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
271
        # Decide how often to automatically checkpoint
272
        self.checkpoint_every = int(self.params.get('checkpoint',
273
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
274
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
275
        # Decide how big to make the inventory cache
276
        self.inventory_cache_size = int(self.params.get('inv-cache',
277
            _DEFAULT_INV_CACHE_SIZE))
278
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
279
        # Find the maximum number of commits to import (None means all)
280
        # and prepare progress reporting. Just in case the info file
281
        # has an outdated count of commits, we store the max counts
282
        # at which we need to terminate separately to the total used
283
        # for progress tracking.
284
        try:
285
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
286
            if self.max_commits < 0:
287
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
288
        except KeyError:
289
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
290
        if self.info is not None:
291
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
292
            if (self.max_commits is not None and
293
                self.total_commits > self.max_commits):
294
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
295
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
296
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
297
0.64.27 by Ian Clatworthy
1st cut at performance tuning
298
    def _process(self, command_iter):
299
        # if anything goes wrong, abort the write group if any
300
        try:
301
            processor.ImportProcessor._process(self, command_iter)
302
        except:
303
            if self.repo is not None and self.repo.is_in_write_group():
304
                self.repo.abort_write_group()
305
            raise
306
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
308
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
309
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
310
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
311
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
312
        if self.params.get("export-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
313
            self.export_marks(self.params.get("export-marks"))
314
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
315
        # Update the branches
316
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
317
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
318
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
319
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
320
        branches_updated, branches_lost = updater.update()
321
        self._branch_count = len(branches_updated)
322
323
        # Tell the user about branches that were not created
324
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
325
            if not self.repo.is_shared():
326
                self.warning("Cannot import multiple branches into "
327
                    "an unshared repository")
328
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
329
            for lost_info in branches_lost:
330
                head_revision = lost_info[1]
331
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
332
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
333
334
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
335
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
336
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
337
        if self._branch_count == 0:
338
            self.note("no branches to update")
339
            self.note("no working trees to update")
340
            remind_about_update = False
341
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
342
            trees = self._get_working_trees(branches_updated)
343
            if trees:
344
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
345
                if self.verbose:
346
                    report = delta._ChangeReporter()
347
                else:
348
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
349
                for wt in trees:
350
                    wt.update(reporter)
351
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
352
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
353
            else:
354
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
355
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
356
357
        # Finish up by telling the user what to do next.
358
        if self._original_max_pack_count:
359
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
360
            # checkpoint instead. We now pack the repository to optimise
361
            # how data is stored.
362
            if self._revision_count > self.checkpoint_every:
363
                self.note("Packing repository ...")
364
                self.repo.pack()
365
                # To be conservative, packing puts the old packs and
366
                # indices in obsolete_packs. We err on the side of
367
                # optimism and clear out that directory to save space.
368
                self.note("Removing obsolete packs ...")
369
                # TODO: Use a public API for this once one exists
370
                repo_transport = self.repo._pack_collection.transport
371
                repo_transport.clone('obsolete_packs').delete_multi(
372
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
373
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
374
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
375
            note("To refresh the working tree for a branch, "
376
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
377
378
    def _get_working_trees(self, branches):
379
        """Get the working trees for branches in the repository."""
380
        result = []
381
        wt_expected = self.repo.make_working_trees()
382
        for br in branches:
383
            if br == self.branch and br is not None:
384
                wt = self.working_tree
385
            elif wt_expected:
386
                try:
387
                    wt = br.bzrdir.open_workingtree()
388
                except errors.NoWorkingTree:
389
                    self.warning("No working tree for branch %s", br)
390
                    continue
391
            else:
392
                continue
393
            result.append(wt)
394
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
395
396
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
397
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
398
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
399
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
400
        wtc = self._tree_count
401
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
402
            rc, helpers.single_plural(rc, "revision", "revisions"),
403
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
404
            wtc, helpers.single_plural(wtc, "tree", "trees"),
405
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
406
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
407
    def _init_id_map(self):
408
        """Load the id-map and check it matches the repository.
409
        
410
        :return: the number of entries in the map
411
        """
412
        # Currently, we just check the size. In the future, we might
413
        # decide to be more paranoid and check that the revision-ids
414
        # are identical as well.
415
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
416
            self.id_map_path)
417
        existing_count = len(self.repo.all_revision_ids())
418
        if existing_count != known:
419
            raise plugin_errors.BadRepositorySize(known, existing_count)
420
        return known
421
422
    def _save_id_map(self):
423
        """Save the id-map."""
424
        # Save the whole lot every time. If this proves a problem, we can
425
        # change to 'append just the new ones' at a later time.
426
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
427
0.64.5 by Ian Clatworthy
first cut at generic processing method
428
    def blob_handler(self, cmd):
429
        """Process a BlobCommand."""
430
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
431
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
432
        else:
433
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
434
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
435
436
    def checkpoint_handler(self, cmd):
437
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
438
        # Commit the current write group and start a new one
439
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
440
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
441
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
442
443
    def commit_handler(self, cmd):
444
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
445
        if self.skip_total and self._revision_count < self.skip_total:
446
            _track_heads(cmd, self.cache_mgr)
447
            # Check that we really do know about this commit-id
448
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
449
                raise plugin_errors.BadRestart(cmd.id)
450
            # Consume the file commands and free any non-sticky blobs
451
            for fc in cmd.file_iter():
452
                pass
453
            self.cache_mgr._blobs = {}
454
            self._revision_count += 1
455
            # If we're finished getting back to where we were,
456
            # load the file-ids cache
457
            if self._revision_count == self.skip_total:
458
                self._gen_file_ids_cache()
459
                self.note("Generated the file-ids cache - %d entries",
460
                    len(self.cache_mgr.file_ids.keys()))
461
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
462
        if self.first_incremental_commit:
463
            self.first_incremental_commit = None
464
            parents = _track_heads(cmd, self.cache_mgr)
465
            self._gen_file_ids_cache(parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
466
467
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
468
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
469
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
470
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
471
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
472
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
473
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
474
475
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
476
        if (self.max_commits is not None and
477
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
478
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
479
            self.finished = True
480
        elif self._revision_count % self.checkpoint_every == 0:
481
            self.note("%d commits - automatic checkpoint triggered",
482
                self._revision_count)
483
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
484
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
485
    def _gen_file_ids_cache(self, revs=False):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
486
        """Generate the file-id cache by searching repository inventories.
487
        """
488
        # Get the interesting revisions - the heads
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
489
        if revs:
490
            head_ids = revs
491
        else:
492
            head_ids = self.cache_mgr.heads.keys()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
493
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
494
495
        # Update the fileid cache
496
        file_ids = {}
497
        for revision_id in revision_ids:
498
            inv = self.repo.revision_tree(revision_id).inventory
0.64.93 by Ian Clatworthy
minor comment clean-ups
499
            # Cache the inventories while we're at it
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
500
            self.cache_mgr.inventories[revision_id] = inv
501
            for path, ie in inv.iter_entries():
502
                file_ids[path] = ie.file_id
503
        self.cache_mgr.file_ids = file_ids
504
0.64.25 by Ian Clatworthy
slightly better progress reporting
505
    def report_progress(self, details=''):
506
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
507
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
508
            if self.total_commits is not None:
509
                counts = "%d/%d" % (self._revision_count, self.total_commits)
510
                eta = progress.get_eta(self._start_time, self._revision_count,
511
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
512
                eta_str = progress.str_tdelta(eta)
513
                if eta_str.endswith('--'):
514
                    eta_str = ''
515
                else:
516
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
517
            else:
518
                counts = "%d" % (self._revision_count,)
519
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
520
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
521
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
522
    def progress_handler(self, cmd):
523
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
524
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
525
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
526
527
    def reset_handler(self, cmd):
528
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
529
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
530
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
531
            if cmd.from_ is not None:
532
                self._set_tag(tag_name, cmd.from_)
533
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
534
                self.warning("ignoring reset refs/tags/%s - no from clause"
535
                    % tag_name)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
536
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
537
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
538
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
539
540
    def tag_handler(self, cmd):
541
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
542
        self._set_tag(cmd.id, cmd.from_)
543
544
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
545
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
546
        bzr_tag_name = name.decode('utf-8', 'replace')
547
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
548
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
549
550
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
551
class GenericCacheManager(object):
552
    """A manager of caches for the GenericProcessor."""
553
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
554
    def __init__(self, info, verbose=False, inventory_cache_size=10):
555
        """Create a manager of caches.
556
557
        :param info: a ConfigObj holding the output from
558
            the --info processor, or None if no hints are available
559
        """
560
        self.verbose = verbose
561
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
562
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
563
        # Sticky blobs aren't removed after being referenced.
564
        self._blobs = {}
565
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
566
567
        # revision-id -> Inventory cache
568
        # these are large and we probably don't need too many as
569
        # most parents are recent in history
570
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
571
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
572
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
573
        # we need to keep all of these but they are small
574
        self.revision_ids = {}
575
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
576
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
577
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
578
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
579
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
580
        self.last_ref = None
581
        self.last_ids = {}
582
        self.heads = {}
583
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
584
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
585
        self._blobs_to_keep = None
586
        if info is not None:
587
            try:
588
                self._blobs_to_keep = info['Blob usage tracking']['multi']
589
            except KeyError:
590
                # info not in file - possible when no blobs used
591
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
592
593
    def store_blob(self, id, data):
594
        """Store a blob of data."""
595
        if (self._blobs_to_keep is None or data == '' or
596
            id in self._blobs_to_keep):
597
            self._sticky_blobs[id] = data
598
        else:
599
            self._blobs[id] = data
600
601
    def fetch_blob(self, id):
602
        """Fetch a blob of data."""
603
        try:
604
            return self._sticky_blobs[id]
605
        except KeyError:
606
            return self._blobs.pop(id)
607
0.64.16 by Ian Clatworthy
safe processing tweaks
608
    def _delete_path(self, path):
609
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
610
        # we actually want to remember what file-id we gave a path,
611
        # even when that file is deleted, so doing nothing is correct
612
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
613
614
    def _rename_path(self, old_path, new_path):
615
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
616
        # In this case, we need to forget the file-id we gave a path,
617
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
618
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
619
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
620
621
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
622
def _track_heads(cmd, cache_mgr):
623
    """Track the repository heads given a CommitCommand.
624
    
625
    :return: the list of parents in terms of commit-ids
626
    """
627
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
628
    if cmd.from_ is not None:
629
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
630
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
631
        last_id = cache_mgr.last_ids.get(cmd.ref)
632
        if last_id is not None:
633
            parents = [last_id]
634
        else:
635
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
636
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
637
    # Track the heads
638
    for parent in parents:
639
        try:
640
            del cache_mgr.heads[parent]
641
        except KeyError:
642
            # it's ok if the parent isn't there - another
643
            # commit may have already removed it
644
            pass
645
    cache_mgr.heads[cmd.id] = cmd.ref
646
    cache_mgr.last_ids[cmd.ref] = cmd.id
647
    cache_mgr.last_ref = cmd.ref
648
    return parents
649
650
0.64.5 by Ian Clatworthy
first cut at generic processing method
651
class GenericCommitHandler(processor.CommitHandler):
652
0.64.48 by Ian Clatworthy
one revision loader instance
653
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
654
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
655
        processor.CommitHandler.__init__(self, command)
656
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
657
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
658
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
659
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
660
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
661
0.64.43 by Ian Clatworthy
verbose mode cleanup
662
    def note(self, msg, *args):
663
        """Output a note but add context."""
664
        msg = "%s (%s)" % (msg, self.command.id)
665
        note(msg, *args)
666
667
    def warning(self, msg, *args):
668
        """Output a warning but add context."""
669
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
670
        warning(msg, *args)
671
0.64.67 by James Westby
Add support for -Dfast-import.
672
    def debug(self, msg, *args):
673
        """Output a mutter if the appropriate -D option was given."""
674
        if "fast-import" in debug.debug_flags:
675
            msg = "%s (%s)" % (msg, self.command.id)
676
            mutter(msg, *args)
677
0.64.5 by Ian Clatworthy
first cut at generic processing method
678
    def pre_process_files(self):
679
        """Prepare for committing."""
680
        self.revision_id = self.gen_revision_id()
681
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
682
        self.lines_for_commit = {}
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
683
        if self.repo.supports_rich_root():
684
            self.lines_for_commit[inventory.ROOT_ID] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
685
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
686
        # Track the heads and get the real parent list
687
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
688
0.64.93 by Ian Clatworthy
minor comment clean-ups
689
        # Convert the parent commit-ids to bzr revision-ids
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
690
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
691
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
692
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
693
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
694
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
695
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
696
0.64.14 by Ian Clatworthy
commit of modified files working
697
        # Seed the inventory from the previous one
698
        if len(self.parents) == 0:
699
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
700
        else:
701
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
702
            inv = self.get_inventory(self.parents[0])
703
            # TODO: Shallow copy - deep inventory copying is expensive
704
            self.inventory = inv.copy()
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
705
        if self.repo.supports_rich_root():
706
            self.inventory.revision_id = self.revision_id
707
        else:
0.64.13 by Ian Clatworthy
commit of new files working
708
            # In this repository, root entries have no knit or weave. When
709
            # serializing out to disk and back in, root.revision is always
710
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
711
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
712
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
713
        # directory-path -> inventory-entry for current inventory
714
        self.directory_entries = dict(self.inventory.directories())
715
0.64.14 by Ian Clatworthy
commit of modified files working
716
    def post_process_files(self):
717
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
718
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
719
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
720
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
721
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
722
        committer = self.command.committer
723
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
724
        author = self.command.author
725
        if author is not None:
726
            author_id = "%s <%s>" % (author[0],author[1])
727
            if author_id != who:
728
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
729
        rev = revision.Revision(
730
           timestamp=committer[2],
731
           timezone=committer[3],
732
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
733
           message=self._escape_commit_message(self.command.message),
734
           revision_id=self.revision_id,
735
           properties=rev_props,
736
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
737
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
738
            lambda file_id: self._get_lines(file_id),
739
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
740
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
741
    def _escape_commit_message(self, message):
742
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
743
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
744
        # Code copied from bzrlib.commit.
745
        
746
        # Python strings can include characters that can't be
747
        # represented in well-formed XML; escape characters that
748
        # aren't listed in the XML specification
749
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
750
        message, _ = re.subn(
751
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
752
            lambda match: match.group(0).encode('unicode_escape'),
753
            message)
754
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
755
756
    def modify_handler(self, filecmd):
757
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
758
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
759
        else:
760
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
761
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
762
        self._modify_inventory(filecmd.path, filecmd.kind,
763
            filecmd.is_executable, data)
764
765
    def delete_handler(self, filecmd):
766
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
767
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
768
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
769
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
770
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
771
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
772
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
773
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
774
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
775
        try:
776
            self.cache_mgr._delete_path(path)
777
        except KeyError:
778
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
779
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
780
    def _warn_unless_in_merges(self, fileid, path):
781
        if len(self.parents) <= 1:
782
            return
783
        for parent in self.parents[1:]:
784
            if fileid in self.get_inventory(parent):
785
                return
786
        self.warning("ignoring delete of %s as not in parent inventories", path)
787
0.64.5 by Ian Clatworthy
first cut at generic processing method
788
    def copy_handler(self, filecmd):
789
        raise NotImplementedError(self.copy_handler)
790
791
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
792
        old_path = filecmd.old_path
793
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
794
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
795
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
796
        basename, new_parent_ie = self._ensure_directory(new_path)
797
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
798
        existing_id = self.inventory.path2id(new_path)
799
        if existing_id is not None:
800
            self.inventory.remove_recursive_id(existing_id)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
801
        ie = self.inventory[file_id]
802
        lines = self.loader._get_lines(file_id, ie.revision)
803
        self.lines_for_commit[file_id] = lines
0.65.4 by James Westby
Make the rename handling more robust.
804
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
805
        self.cache_mgr._rename_path(old_path, new_path)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
806
        self.inventory[file_id].revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
807
808
    def deleteall_handler(self, filecmd):
0.73.1 by Miklos Vajna
Implement the 'deleteall' command.
809
        self.debug("deleting all files (and also all directories)")
810
        for path, fileid in self.cache_mgr.file_ids.items():
811
            del self.inventory[fileid]
812
            self.cache_mgr._delete_path(path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
813
0.64.16 by Ian Clatworthy
safe processing tweaks
814
    def bzr_file_id_and_new(self, path):
815
        """Get a Bazaar file identifier and new flag for a path.
816
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
817
        :return: file_id, is_new where
818
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
819
        """
820
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
821
            id = self.cache_mgr.file_ids[path]
822
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
823
        except KeyError:
824
            id = generate_ids.gen_file_id(path)
825
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
826
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
827
            return id, True
828
0.64.5 by Ian Clatworthy
first cut at generic processing method
829
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
830
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
831
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
832
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
833
    def gen_initial_inventory(self):
834
        """Generate an inventory for a parentless revision."""
835
        inv = inventory.Inventory(revision_id=self.revision_id)
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
836
        if self.repo.supports_rich_root():
837
            # The very first root needs to have the right revision
838
            inv.root.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
839
        return inv
840
0.64.5 by Ian Clatworthy
first cut at generic processing method
841
    def gen_revision_id(self):
842
        """Generate a revision id.
843
844
        Subclasses may override this to produce deterministic ids say.
845
        """
846
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
847
        # Perhaps 'who' being the person running the import is ok? If so,
848
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
849
        who = "%s <%s>" % (committer[0],committer[1])
850
        timestamp = committer[2]
851
        return generate_ids.gen_revision_id(who, timestamp)
852
0.64.7 by Ian Clatworthy
start of multiple commit handling
853
    def get_inventory(self, revision_id):
854
        """Get the inventory for a revision id."""
855
        try:
856
            inv = self.cache_mgr.inventories[revision_id]
857
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
858
            if self.verbose:
859
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
860
            # Not cached so reconstruct from repository
861
            inv = self.repo.revision_tree(revision_id).inventory
862
            self.cache_mgr.inventories[revision_id] = inv
863
        return inv
864
0.64.5 by Ian Clatworthy
first cut at generic processing method
865
    def _get_inventories(self, revision_ids):
866
        """Get the inventories for revision-ids.
867
        
868
        This is a callback used by the RepositoryLoader to
0.64.93 by Ian Clatworthy
minor comment clean-ups
869
        speed up inventory reconstruction.
870
        """
0.64.5 by Ian Clatworthy
first cut at generic processing method
871
        present = []
872
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
873
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
874
        # successfully loaded into the repsoitory
875
        for revision_id in revision_ids:
876
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
877
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
878
                present.append(revision_id)
879
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
880
                if self.verbose:
881
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
882
                # Not cached so reconstruct from repository
883
                if self.repo.has_revision(revision_id):
884
                    rev_tree = self.repo.revision_tree(revision_id)
885
                    present.append(revision_id)
886
                else:
887
                    rev_tree = self.repo.revision_tree(None)
888
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
889
                self.cache_mgr.inventories[revision_id] = inv
890
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
891
        return present, inventories
892
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
893
    def _get_lines(self, file_id):
894
        """Get the lines for a file-id."""
895
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
896
897
    def _modify_inventory(self, path, kind, is_executable, data):
898
        """Add to or change an item in the inventory."""
899
        # Create the new InventoryEntry
900
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
901
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
902
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
903
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
904
        if isinstance(ie, inventory.InventoryFile):
905
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
906
            lines = osutils.split_lines(data)
907
            ie.text_sha1 = osutils.sha_strings(lines)
908
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
909
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
910
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
911
            ie.symlink_target = data.encode('utf8')
912
            # There are no lines stored for a symlink so
913
            # make sure the cache used by get_lines knows that
914
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
915
        else:
916
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
917
                (kind,))
918
0.64.16 by Ian Clatworthy
safe processing tweaks
919
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
920
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
921
            # HACK: no API for this (del+add does more than it needs to)
922
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
923
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
924
        else:
925
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
926
927
    def _ensure_directory(self, path):
928
        """Ensure that the containing directory exists for 'path'"""
929
        dirname, basename = osutils.split(path)
930
        if dirname == '':
931
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
932
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
933
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
934
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
935
        except KeyError:
936
            # We will create this entry, since it doesn't exist
937
            pass
938
        else:
939
            return basename, ie
940
941
        # No directory existed, we will just create one, first, make sure
942
        # the parent exists
943
        dir_basename, parent_ie = self._ensure_directory(dirname)
944
        dir_file_id = self.bzr_file_id(dirname)
945
        ie = inventory.entry_factory['directory'](dir_file_id,
946
                                                  dir_basename,
947
                                                  parent_ie.file_id)
948
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
949
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
950
        # There are no lines stored for a directory so
951
        # make sure the cache used by get_lines knows that
952
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
953
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
954
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
955
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
956
957
0.64.34 by Ian Clatworthy
report lost branches
958
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
959
0.64.64 by Ian Clatworthy
save tags known about in each branch
960
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
961
        """Create an object responsible for updating branches.
962
963
        :param heads_by_ref: a dictionary where
964
          names are git-style references like refs/heads/master;
965
          values are one item lists of commits marks.
966
        """
0.64.37 by Ian Clatworthy
create branches as required
967
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
968
        self.branch = branch
969
        self.cache_mgr = cache_mgr
970
        self.heads_by_ref = heads_by_ref
971
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
972
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
973
974
    def update(self):
975
        """Update the Bazaar branches and tips matching the heads.
976
977
        If the repository is shared, this routine creates branches
978
        as required. If it isn't, warnings are produced about the
979
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
980
0.64.34 by Ian Clatworthy
report lost branches
981
        :return: updated, lost_heads where
982
          updated = the list of branches updated
983
          lost_heads = a list of (bazaar-name,revision) for branches that
984
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
985
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
986
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
987
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
988
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
989
            if self._update_branch(br, tip):
990
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
991
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
992
993
    def _get_matching_branches(self):
994
        """Get the Bazaar branches.
995
0.64.93 by Ian Clatworthy
minor comment clean-ups
996
        :return: default_tip, branch_tips, lost_heads where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
997
          default_tip = the last commit mark for the default branch
998
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
999
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
1000
            would have been created had the repository been shared and
1001
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1002
        """
0.64.37 by Ian Clatworthy
create branches as required
1003
        branch_tips = []
1004
        lost_heads = []
1005
        ref_names = self.heads_by_ref.keys()
1006
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1007
            trunk = self.select_trunk(ref_names)
1008
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
1009
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1010
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
1011
1012
        # Convert the reference names into Bazaar speak
1013
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
1014
0.64.37 by Ian Clatworthy
create branches as required
1015
        # Policy for locating branches
1016
        def dir_under_current(name, ref_name):
1017
            # Using the Bazaar name, get a directory under the current one
1018
            return name
1019
        def dir_sister_branch(name, ref_name):
1020
            # Using the Bazaar name, get a sister directory to the branch
1021
            return osutils.pathjoin(self.branch.base, "..", name)
1022
        if self.branch is not None:
1023
            dir_policy = dir_sister_branch
1024
        else:
1025
            dir_policy = dir_under_current
1026
0.64.34 by Ian Clatworthy
report lost branches
1027
        # Create/track missing branches
1028
        shared_repo = self.repo.is_shared()
1029
        for name in sorted(bzr_names.keys()):
1030
            ref_name = bzr_names[name]
1031
            tip = self.heads_by_ref[ref_name][0]
1032
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
1033
                location = dir_policy(name, ref_name)
1034
                try:
1035
                    br = self.make_branch(location)
1036
                    branch_tips.append((br,tip))
1037
                    continue
1038
                except errors.BzrError, ex:
1039
                    error("ERROR: failed to create branch %s: %s",
1040
                        location, ex)
1041
            lost_head = self.cache_mgr.revision_ids[tip]
1042
            lost_info = (name, lost_head)
1043
            lost_heads.append(lost_info)
1044
        return branch_tips, lost_heads
1045
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1046
    def select_trunk(self, ref_names):
1047
        """Given a set of ref names, choose one as the trunk."""
1048
        for candidate in ['refs/heads/master']:
1049
            if candidate in ref_names:
1050
                return candidate
1051
        # Use the last reference in the import stream
1052
        return self.last_ref
1053
0.64.37 by Ian Clatworthy
create branches as required
1054
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1055
        """Make a branch in the repository if not already there."""
1056
        try:
1057
            return bzrdir.BzrDir.open(location).open_branch()
1058
        except errors.NotBranchError, ex:
1059
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
1060
1061
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
1062
        """Generate Bazaar branch names from import ref names.
1063
        
1064
        :return: a dictionary with Bazaar names as keys and
1065
          the original reference names as values.
1066
        """
0.64.34 by Ian Clatworthy
report lost branches
1067
        bazaar_names = {}
1068
        for ref_name in sorted(ref_names):
1069
            parts = ref_name.split('/')
1070
            if parts[0] == 'refs':
1071
                parts.pop(0)
1072
            full_name = "--".join(parts)
1073
            bazaar_name = parts[-1]
1074
            if bazaar_name in bazaar_names:
1075
                bazaar_name = full_name
1076
            bazaar_names[bazaar_name] = ref_name
1077
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1078
1079
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1080
        """Update a branch with last revision and tag information.
1081
        
1082
        :return: whether the branch was changed or not
1083
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1084
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
1085
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1086
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1087
        existing_revno, existing_last_rev_id = br.last_revision_info()
1088
        changed = False
1089
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
1090
            br.set_last_revision_info(revno, last_rev_id)
1091
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
1092
        # apply tags known in this branch
1093
        my_tags = {}
1094
        if self.tags:
1095
            for tag,rev in self.tags.items():
1096
                if rev in revs:
1097
                    my_tags[tag] = rev
1098
            if my_tags:
1099
                br.tags._set_tag_dict(my_tags)
1100
                changed = True
1101
        if changed:
1102
            tagno = len(my_tags)
1103
            note("\t branch %s now has %d %s and %d %s", br.nick,
1104
                revno, helpers.single_plural(revno, "revision", "revisions"),
1105
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1106
        return changed