/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
60
# How many commits before each inventory fulltext
61
_DEFAULT_INV_FULLTEXT = 200
62
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
63
# How many inventories to cache
64
_DEFAULT_INV_CACHE_SIZE = 10
65
0.64.41 by Ian Clatworthy
update multiple working trees if requested
66
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
class GenericProcessor(processor.ImportProcessor):
68
    """An import processor that handles basic imports.
69
70
    Current features supported:
71
0.64.16 by Ian Clatworthy
safe processing tweaks
72
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
73
    * files and symlinks commits are supported
74
    * checkpoints automatically happen at a configurable frequency
75
      over and above the stream requested checkpoints
76
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
77
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
78
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
79
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
80
    At checkpoints and on completion, the commit-id -> revision-id map is
81
    saved to a file called 'fastimport-id-map'. If the import crashes
82
    or is interrupted, it can be started again and this file will be
83
    used to skip over already loaded revisions. The format of each line
84
    is "commit-id revision-id" so commit-ids cannot include spaces.
85
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
86
    Here are the supported parameters:
87
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
88
    * info - name of a hints file holding the analysis generated
89
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
90
      importing large repositories, this parameter is needed so
91
      that the importer knows what blobs to intelligently cache.
92
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
94
      By default, the importer updates the repository
95
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
96
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
97
98
    * checkpoint - automatically checkpoint every n commits over and
99
      above any checkpoints contained in the import stream.
100
      The default is 10000.
101
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
102
    * count - only import this many commits then exit. If not set
103
      or negative, all commits are imported.
104
    
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
105
    * inv-fulltext - create an inventory fulltext every n commits.
106
      The default is 200.
107
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
108
    * inv-cache - number of inventories to cache.
109
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
110
111
    * experimental - enable experimental mode, i.e. use features
112
      not yet fully tested.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
113
114
    * import-marks - name of file to read to load mark information from
115
116
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
117
    """
118
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
119
    known_params = [
120
        'info',
121
        'trees',
122
        'checkpoint',
123
        'count',
124
        'inv-cache',
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
125
        'inv-fulltext',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
126
        'experimental',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
127
        'import-marks',
128
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
129
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
130
131
    def note(self, msg, *args):
132
        """Output a note but timestamp it."""
133
        msg = "%s %s" % (self._time_of_day(), msg)
134
        note(msg, *args)
135
136
    def warning(self, msg, *args):
137
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
138
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
139
        warning(msg, *args)
140
0.64.67 by James Westby
Add support for -Dfast-import.
141
    def debug(self, mgs, *args):
142
        """Output a debug message if the appropriate -D option was given."""
143
        if "fast-import" in debug.debug_flags:
144
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
145
            mutter(msg, *args)
146
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
147
    def _time_of_day(self):
148
        """Time of day as a string."""
149
        # Note: this is a separate method so tests can patch in a fixed value
150
        return time.strftime("%H:%M:%S")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
151
152
    def _import_marks(self, filename):
153
        try:
154
            f = file(filename)
155
        except IOError:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
156
            self.warning(
157
                "Could not open import-marks file, not importing marks")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
158
            return
159
160
        firstline = f.readline()
161
        match = re.match(r'^format=(\d+)$', firstline)
162
        if not match:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
163
            print >>sys.stderr, "%r doesn't look like a mark file" % \
164
                (filename,)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
165
            sys.exit(1)
166
        elif match.group(1) != '1':
167
            print >>sys.stderr, 'format version in mark file not supported'
168
            sys.exit(1)
169
170
        for string in f.readline().rstrip('\n').split('\0'):
171
            if not string:
172
                continue
173
            name, integer = string.rsplit('.', 1)
174
            # We really can't do anything with the branch information, so we
175
            # just skip it
176
            
177
        self.cache_mgr.revision_ids = {}
178
        for line in f:
179
            line = line.rstrip('\n')
180
            mark, revid = line.split(' ', 1)
181
            self.cache_mgr.revision_ids[mark] = revid
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
182
        f.close()
0.64.67 by James Westby
Add support for -Dfast-import.
183
    
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
184
    def export_marks(self, filename):
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
185
        try:
186
            f = file(filename, 'w')
187
        except IOError:
188
            self.warning(
189
                "Could not open export-marks file, not exporting marks")
190
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
191
        f.write('format=1\n')
192
        f.write('\0tmp.0\n')
193
        for mark, revid in self.cache_mgr.revision_ids.iteritems():
194
            f.write('%s %s\n' % (mark, revid))
195
        f.close()
196
        
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
197
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
198
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
199
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
200
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
201
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
202
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
203
        if self.params.get("import-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
204
            self._import_marks(self.params.get("import-marks"))
205
            self.skip_total = False
206
            self.first_incremental_commit = True
207
        else:
208
            self.first_incremental_commit = False
209
            self.skip_total = self._init_id_map()
210
            if self.skip_total:
211
                self.note("Found %d commits already loaded - "
212
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
213
        self._revision_count = 0
214
215
        # mapping of tag name to revision_id
216
        self.tags = {}
217
218
        # Create the revision loader needed for committing
0.64.79 by Ian Clatworthy
support new Repository API
219
        new_repo_api = hasattr(self.repo, 'revisions')
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
220
        if self._experimental:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
221
            def fulltext_when(count):
222
                total = self.total_commits
223
                if total is not None and count == total:
224
                    fulltext = True
225
                else:
226
                    fulltext = count % self.inv_fulltext_every == 0
227
                if fulltext:
228
                    self.note("%d commits - storing inventory as full-text",
229
                        count)
230
                return fulltext
231
0.64.79 by Ian Clatworthy
support new Repository API
232
            if new_repo_api:
233
                self.loader = revisionloader.ImportRevisionLoader2(
234
                    self.repo, self.inventory_cache_size,
235
                    fulltext_when=fulltext_when)
236
            else:
237
                self.loader = revisionloader.ImportRevisionLoader1(
238
                    self.repo, self.inventory_cache_size,
239
                    fulltext_when=fulltext_when)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
240
        else:
0.64.79 by Ian Clatworthy
support new Repository API
241
            if new_repo_api:
242
                self.loader = revisionloader.RevisionLoader2(self.repo)
243
            else:
244
                self.loader = revisionloader.RevisionLoader1(self.repo)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
245
0.64.51 by Ian Clatworthy
disable autopacking
246
        # Disable autopacking if the repo format supports it.
247
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
248
        if isinstance(self.repo, pack_repo.KnitPackRepository):
249
            self._original_max_pack_count = \
250
                self.repo._pack_collection._max_pack_count
251
            def _max_pack_count_for_import(total_revisions):
252
                return total_revisions + 1
253
            self.repo._pack_collection._max_pack_count = \
254
                _max_pack_count_for_import
255
        else:
256
            self._original_max_pack_count = None
257
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
258
        # Create a write group. This is committed at the end of the import.
259
        # Checkpointing closes the current one and starts a new one.
260
        self.repo.start_write_group()
261
262
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
263
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
264
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
265
        # This is currently hard-coded but might be configurable via
266
        # parameters one day if that's needed
267
        repo_transport = self.repo.control_files._transport
268
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
269
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
270
        # Load the info file, if any
271
        info_path = self.params.get('info')
272
        if info_path is not None:
273
            self.info = configobj.ConfigObj(info_path)
274
        else:
275
            self.info = None
276
0.64.41 by Ian Clatworthy
update multiple working trees if requested
277
        # Decide how often to automatically report progress
278
        # (not a parameter yet)
279
        self.progress_every = _DEFAULT_AUTO_PROGRESS
280
        if self.verbose:
281
            self.progress_every = self.progress_every / 10
282
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
283
        # Decide how often to automatically checkpoint
284
        self.checkpoint_every = int(self.params.get('checkpoint',
285
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
286
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
287
        # Decide how often to fulltext the inventory
288
        self.inv_fulltext_every = int(self.params.get('inv-fulltext',
289
            _DEFAULT_INV_FULLTEXT))
290
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
291
        # Decide how big to make the inventory cache
292
        self.inventory_cache_size = int(self.params.get('inv-cache',
293
            _DEFAULT_INV_CACHE_SIZE))
294
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
295
        # Find the maximum number of commits to import (None means all)
296
        # and prepare progress reporting. Just in case the info file
297
        # has an outdated count of commits, we store the max counts
298
        # at which we need to terminate separately to the total used
299
        # for progress tracking.
300
        try:
301
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
302
            if self.max_commits < 0:
303
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
304
        except KeyError:
305
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
306
        if self.info is not None:
307
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
308
            if (self.max_commits is not None and
309
                self.total_commits > self.max_commits):
310
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
311
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
312
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
313
0.64.27 by Ian Clatworthy
1st cut at performance tuning
314
    def _process(self, command_iter):
315
        # if anything goes wrong, abort the write group if any
316
        try:
317
            processor.ImportProcessor._process(self, command_iter)
318
        except:
319
            if self.repo is not None and self.repo.is_in_write_group():
320
                self.repo.abort_write_group()
321
            raise
322
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
323
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
324
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
325
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
326
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
327
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
328
        if self.params.get("export-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
329
            self.export_marks(self.params.get("export-marks"))
330
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
331
        # Update the branches
332
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
333
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
334
            helpers.invert_dict(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
335
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
336
        branches_updated, branches_lost = updater.update()
337
        self._branch_count = len(branches_updated)
338
339
        # Tell the user about branches that were not created
340
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
341
            if not self.repo.is_shared():
342
                self.warning("Cannot import multiple branches into "
343
                    "an unshared repository")
344
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
345
            for lost_info in branches_lost:
346
                head_revision = lost_info[1]
347
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
348
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
349
350
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
351
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
352
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
353
        if self._branch_count == 0:
354
            self.note("no branches to update")
355
            self.note("no working trees to update")
356
            remind_about_update = False
357
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
358
            trees = self._get_working_trees(branches_updated)
359
            if trees:
360
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
361
                if self.verbose:
362
                    report = delta._ChangeReporter()
363
                else:
364
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
365
                for wt in trees:
366
                    wt.update(reporter)
367
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
368
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
369
            else:
370
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
371
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
372
373
        # Finish up by telling the user what to do next.
374
        if self._original_max_pack_count:
375
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
376
            # checkpoint instead. We now pack the repository to optimise
377
            # how data is stored.
378
            if self._revision_count > self.checkpoint_every:
379
                self.note("Packing repository ...")
380
                self.repo.pack()
381
                # To be conservative, packing puts the old packs and
382
                # indices in obsolete_packs. We err on the side of
383
                # optimism and clear out that directory to save space.
384
                self.note("Removing obsolete packs ...")
385
                # TODO: Use a public API for this once one exists
386
                repo_transport = self.repo._pack_collection.transport
387
                repo_transport.clone('obsolete_packs').delete_multi(
388
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
389
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
390
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
391
            note("To refresh the working tree for a branch, "
392
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
393
394
    def _get_working_trees(self, branches):
395
        """Get the working trees for branches in the repository."""
396
        result = []
397
        wt_expected = self.repo.make_working_trees()
398
        for br in branches:
399
            if br == self.branch and br is not None:
400
                wt = self.working_tree
401
            elif wt_expected:
402
                try:
403
                    wt = br.bzrdir.open_workingtree()
404
                except errors.NoWorkingTree:
405
                    self.warning("No working tree for branch %s", br)
406
                    continue
407
            else:
408
                continue
409
            result.append(wt)
410
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
411
412
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
413
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
414
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
415
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
416
        wtc = self._tree_count
417
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
418
            rc, helpers.single_plural(rc, "revision", "revisions"),
419
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
420
            wtc, helpers.single_plural(wtc, "tree", "trees"),
421
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
422
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
423
    def _init_id_map(self):
424
        """Load the id-map and check it matches the repository.
425
        
426
        :return: the number of entries in the map
427
        """
428
        # Currently, we just check the size. In the future, we might
429
        # decide to be more paranoid and check that the revision-ids
430
        # are identical as well.
431
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
432
            self.id_map_path)
433
        existing_count = len(self.repo.all_revision_ids())
434
        if existing_count != known:
435
            raise plugin_errors.BadRepositorySize(known, existing_count)
436
        return known
437
438
    def _save_id_map(self):
439
        """Save the id-map."""
440
        # Save the whole lot every time. If this proves a problem, we can
441
        # change to 'append just the new ones' at a later time.
442
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
443
0.64.5 by Ian Clatworthy
first cut at generic processing method
444
    def blob_handler(self, cmd):
445
        """Process a BlobCommand."""
446
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
447
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
448
        else:
449
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
450
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
451
452
    def checkpoint_handler(self, cmd):
453
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
454
        # Commit the current write group and start a new one
455
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
456
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
457
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
458
459
    def commit_handler(self, cmd):
460
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
461
        if self.skip_total and self._revision_count < self.skip_total:
462
            _track_heads(cmd, self.cache_mgr)
463
            # Check that we really do know about this commit-id
464
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
465
                raise plugin_errors.BadRestart(cmd.id)
466
            # Consume the file commands and free any non-sticky blobs
467
            for fc in cmd.file_iter():
468
                pass
469
            self.cache_mgr._blobs = {}
470
            self._revision_count += 1
471
            # If we're finished getting back to where we were,
472
            # load the file-ids cache
473
            if self._revision_count == self.skip_total:
474
                self._gen_file_ids_cache()
475
                self.note("Generated the file-ids cache - %d entries",
476
                    len(self.cache_mgr.file_ids.keys()))
477
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
478
        if self.first_incremental_commit:
479
            self.first_incremental_commit = None
480
            parents = _track_heads(cmd, self.cache_mgr)
481
            self._gen_file_ids_cache(parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
482
483
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
484
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
485
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
486
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
487
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
488
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
489
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
490
491
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
492
        if (self.max_commits is not None and
493
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
494
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
495
            self.finished = True
496
        elif self._revision_count % self.checkpoint_every == 0:
497
            self.note("%d commits - automatic checkpoint triggered",
498
                self._revision_count)
499
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
500
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
501
    def _gen_file_ids_cache(self, revs=False):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
502
        """Generate the file-id cache by searching repository inventories.
503
        """
504
        # Get the interesting revisions - the heads
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
505
        if revs:
506
            head_ids = revs
507
        else:
508
            head_ids = self.cache_mgr.heads.keys()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
509
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
510
511
        # Update the fileid cache
512
        file_ids = {}
513
        for revision_id in revision_ids:
514
            inv = self.repo.revision_tree(revision_id).inventory
0.64.93 by Ian Clatworthy
minor comment clean-ups
515
            # Cache the inventories while we're at it
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
516
            self.cache_mgr.inventories[revision_id] = inv
517
            for path, ie in inv.iter_entries():
518
                file_ids[path] = ie.file_id
519
        self.cache_mgr.file_ids = file_ids
520
0.64.25 by Ian Clatworthy
slightly better progress reporting
521
    def report_progress(self, details=''):
522
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
523
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
524
            if self.total_commits is not None:
525
                counts = "%d/%d" % (self._revision_count, self.total_commits)
526
                eta = progress.get_eta(self._start_time, self._revision_count,
527
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
528
                eta_str = progress.str_tdelta(eta)
529
                if eta_str.endswith('--'):
530
                    eta_str = ''
531
                else:
532
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
533
            else:
534
                counts = "%d" % (self._revision_count,)
535
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
536
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
537
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
538
    def progress_handler(self, cmd):
539
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
540
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
541
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
542
543
    def reset_handler(self, cmd):
544
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
545
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
546
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
547
            if cmd.from_ is not None:
548
                self._set_tag(tag_name, cmd.from_)
549
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
550
                self.warning("ignoring reset refs/tags/%s - no from clause"
551
                    % tag_name)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
552
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
553
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
554
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
556
    def tag_handler(self, cmd):
557
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
558
        self._set_tag(cmd.id, cmd.from_)
559
560
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
561
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
562
        bzr_tag_name = name.decode('utf-8', 'replace')
563
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
564
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
565
566
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
567
class GenericCacheManager(object):
568
    """A manager of caches for the GenericProcessor."""
569
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
570
    def __init__(self, info, verbose=False, inventory_cache_size=10):
571
        """Create a manager of caches.
572
573
        :param info: a ConfigObj holding the output from
574
            the --info processor, or None if no hints are available
575
        """
576
        self.verbose = verbose
577
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
578
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
579
        # Sticky blobs aren't removed after being referenced.
580
        self._blobs = {}
581
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
582
583
        # revision-id -> Inventory cache
584
        # these are large and we probably don't need too many as
585
        # most parents are recent in history
586
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
587
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
588
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
589
        # we need to keep all of these but they are small
590
        self.revision_ids = {}
591
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
592
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
593
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
594
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
595
        # Head tracking: last ref, last id per ref & map of commit ids to ref
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
596
        self.last_ref = None
597
        self.last_ids = {}
598
        self.heads = {}
599
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
600
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
601
        self._blobs_to_keep = None
602
        if info is not None:
603
            try:
604
                self._blobs_to_keep = info['Blob usage tracking']['multi']
605
            except KeyError:
606
                # info not in file - possible when no blobs used
607
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
608
609
    def store_blob(self, id, data):
610
        """Store a blob of data."""
611
        if (self._blobs_to_keep is None or data == '' or
612
            id in self._blobs_to_keep):
613
            self._sticky_blobs[id] = data
614
        else:
615
            self._blobs[id] = data
616
617
    def fetch_blob(self, id):
618
        """Fetch a blob of data."""
619
        try:
620
            return self._sticky_blobs[id]
621
        except KeyError:
622
            return self._blobs.pop(id)
623
0.64.16 by Ian Clatworthy
safe processing tweaks
624
    def _delete_path(self, path):
625
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
626
        # we actually want to remember what file-id we gave a path,
627
        # even when that file is deleted, so doing nothing is correct
628
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
629
630
    def _rename_path(self, old_path, new_path):
631
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
632
        # In this case, we need to forget the file-id we gave a path,
633
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
634
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
635
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
636
637
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
638
def _track_heads(cmd, cache_mgr):
639
    """Track the repository heads given a CommitCommand.
640
    
641
    :return: the list of parents in terms of commit-ids
642
    """
643
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
644
    if cmd.from_ is not None:
645
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
646
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
647
        last_id = cache_mgr.last_ids.get(cmd.ref)
648
        if last_id is not None:
649
            parents = [last_id]
650
        else:
651
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
652
    parents.extend(cmd.merges)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
653
    # Track the heads
654
    for parent in parents:
655
        try:
656
            del cache_mgr.heads[parent]
657
        except KeyError:
658
            # it's ok if the parent isn't there - another
659
            # commit may have already removed it
660
            pass
661
    cache_mgr.heads[cmd.id] = cmd.ref
662
    cache_mgr.last_ids[cmd.ref] = cmd.id
663
    cache_mgr.last_ref = cmd.ref
664
    return parents
665
666
0.64.5 by Ian Clatworthy
first cut at generic processing method
667
class GenericCommitHandler(processor.CommitHandler):
668
0.64.48 by Ian Clatworthy
one revision loader instance
669
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
670
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
671
        processor.CommitHandler.__init__(self, command)
672
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
673
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
674
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
675
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
676
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
677
0.64.43 by Ian Clatworthy
verbose mode cleanup
678
    def note(self, msg, *args):
679
        """Output a note but add context."""
680
        msg = "%s (%s)" % (msg, self.command.id)
681
        note(msg, *args)
682
683
    def warning(self, msg, *args):
684
        """Output a warning but add context."""
685
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
686
        warning(msg, *args)
687
0.64.67 by James Westby
Add support for -Dfast-import.
688
    def debug(self, msg, *args):
689
        """Output a mutter if the appropriate -D option was given."""
690
        if "fast-import" in debug.debug_flags:
691
            msg = "%s (%s)" % (msg, self.command.id)
692
            mutter(msg, *args)
693
0.64.5 by Ian Clatworthy
first cut at generic processing method
694
    def pre_process_files(self):
695
        """Prepare for committing."""
696
        self.revision_id = self.gen_revision_id()
697
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
698
        self.lines_for_commit = {}
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
699
        if self.repo.supports_rich_root():
700
            self.lines_for_commit[inventory.ROOT_ID] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
701
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
702
        # Track the heads and get the real parent list
703
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
704
0.64.93 by Ian Clatworthy
minor comment clean-ups
705
        # Convert the parent commit-ids to bzr revision-ids
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
706
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
707
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
708
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
709
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
710
            self.parents = []
0.64.67 by James Westby
Add support for -Dfast-import.
711
        self.debug("revision parents are %s", str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
712
0.64.14 by Ian Clatworthy
commit of modified files working
713
        # Seed the inventory from the previous one
714
        if len(self.parents) == 0:
715
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
716
        else:
717
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
718
            inv = self.get_inventory(self.parents[0])
719
            # TODO: Shallow copy - deep inventory copying is expensive
720
            self.inventory = inv.copy()
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
721
        if self.repo.supports_rich_root():
722
            self.inventory.revision_id = self.revision_id
723
        else:
0.64.13 by Ian Clatworthy
commit of new files working
724
            # In this repository, root entries have no knit or weave. When
725
            # serializing out to disk and back in, root.revision is always
726
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
727
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
728
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
729
        # directory-path -> inventory-entry for current inventory
730
        self.directory_entries = dict(self.inventory.directories())
731
0.64.14 by Ian Clatworthy
commit of modified files working
732
    def post_process_files(self):
733
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
734
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
735
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
736
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
737
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
738
        committer = self.command.committer
739
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
740
        author = self.command.author
741
        if author is not None:
742
            author_id = "%s <%s>" % (author[0],author[1])
743
            if author_id != who:
744
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
745
        rev = revision.Revision(
746
           timestamp=committer[2],
747
           timezone=committer[3],
748
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
749
           message=self._escape_commit_message(self.command.message),
750
           revision_id=self.revision_id,
751
           properties=rev_props,
752
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
753
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
754
            lambda file_id: self._get_lines(file_id),
755
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
756
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
757
    def _escape_commit_message(self, message):
758
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
759
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
760
        # Code copied from bzrlib.commit.
761
        
762
        # Python strings can include characters that can't be
763
        # represented in well-formed XML; escape characters that
764
        # aren't listed in the XML specification
765
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
766
        message, _ = re.subn(
767
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
768
            lambda match: match.group(0).encode('unicode_escape'),
769
            message)
770
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
771
772
    def modify_handler(self, filecmd):
773
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
774
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
775
        else:
776
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
777
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
778
        self._modify_inventory(filecmd.path, filecmd.kind,
779
            filecmd.is_executable, data)
780
781
    def delete_handler(self, filecmd):
782
        path = filecmd.path
0.64.67 by James Westby
Add support for -Dfast-import.
783
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
784
        fileid = self.bzr_file_id(path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
785
        try:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
786
            del self.inventory[fileid]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
787
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
788
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
789
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
790
            self._warn_unless_in_merges(fileid, path)
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
791
        try:
792
            self.cache_mgr._delete_path(path)
793
        except KeyError:
794
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
795
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
796
    def _warn_unless_in_merges(self, fileid, path):
797
        if len(self.parents) <= 1:
798
            return
799
        for parent in self.parents[1:]:
800
            if fileid in self.get_inventory(parent):
801
                return
802
        self.warning("ignoring delete of %s as not in parent inventories", path)
803
0.64.5 by Ian Clatworthy
first cut at generic processing method
804
    def copy_handler(self, filecmd):
805
        raise NotImplementedError(self.copy_handler)
806
807
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
808
        old_path = filecmd.old_path
809
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
810
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
811
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
812
        basename, new_parent_ie = self._ensure_directory(new_path)
813
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
814
        existing_id = self.inventory.path2id(new_path)
815
        if existing_id is not None:
816
            self.inventory.remove_recursive_id(existing_id)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
817
        ie = self.inventory[file_id]
818
        lines = self.loader._get_lines(file_id, ie.revision)
819
        self.lines_for_commit[file_id] = lines
0.65.4 by James Westby
Make the rename handling more robust.
820
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
821
        self.cache_mgr._rename_path(old_path, new_path)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
822
        self.inventory[file_id].revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
823
824
    def deleteall_handler(self, filecmd):
0.73.1 by Miklos Vajna
Implement the 'deleteall' command.
825
        self.debug("deleting all files (and also all directories)")
826
        for path, fileid in self.cache_mgr.file_ids.items():
827
            del self.inventory[fileid]
828
            self.cache_mgr._delete_path(path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
829
0.64.16 by Ian Clatworthy
safe processing tweaks
830
    def bzr_file_id_and_new(self, path):
831
        """Get a Bazaar file identifier and new flag for a path.
832
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
833
        :return: file_id, is_new where
834
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
835
        """
836
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
837
            id = self.cache_mgr.file_ids[path]
838
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
839
        except KeyError:
840
            id = generate_ids.gen_file_id(path)
841
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
842
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
843
            return id, True
844
0.64.5 by Ian Clatworthy
first cut at generic processing method
845
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
846
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
847
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
848
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
849
    def gen_initial_inventory(self):
850
        """Generate an inventory for a parentless revision."""
851
        inv = inventory.Inventory(revision_id=self.revision_id)
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
852
        if self.repo.supports_rich_root():
853
            # The very first root needs to have the right revision
854
            inv.root.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
855
        return inv
856
0.64.5 by Ian Clatworthy
first cut at generic processing method
857
    def gen_revision_id(self):
858
        """Generate a revision id.
859
860
        Subclasses may override this to produce deterministic ids say.
861
        """
862
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
863
        # Perhaps 'who' being the person running the import is ok? If so,
864
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
865
        who = "%s <%s>" % (committer[0],committer[1])
866
        timestamp = committer[2]
867
        return generate_ids.gen_revision_id(who, timestamp)
868
0.64.7 by Ian Clatworthy
start of multiple commit handling
869
    def get_inventory(self, revision_id):
870
        """Get the inventory for a revision id."""
871
        try:
872
            inv = self.cache_mgr.inventories[revision_id]
873
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
874
            if self.verbose:
875
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
876
            # Not cached so reconstruct from repository
877
            inv = self.repo.revision_tree(revision_id).inventory
878
            self.cache_mgr.inventories[revision_id] = inv
879
        return inv
880
0.64.5 by Ian Clatworthy
first cut at generic processing method
881
    def _get_inventories(self, revision_ids):
882
        """Get the inventories for revision-ids.
883
        
884
        This is a callback used by the RepositoryLoader to
0.64.93 by Ian Clatworthy
minor comment clean-ups
885
        speed up inventory reconstruction.
886
        """
0.64.5 by Ian Clatworthy
first cut at generic processing method
887
        present = []
888
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
889
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
890
        # successfully loaded into the repsoitory
891
        for revision_id in revision_ids:
892
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
893
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
894
                present.append(revision_id)
895
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
896
                if self.verbose:
897
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
898
                # Not cached so reconstruct from repository
899
                if self.repo.has_revision(revision_id):
900
                    rev_tree = self.repo.revision_tree(revision_id)
901
                    present.append(revision_id)
902
                else:
903
                    rev_tree = self.repo.revision_tree(None)
904
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
905
                self.cache_mgr.inventories[revision_id] = inv
906
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
907
        return present, inventories
908
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
909
    def _get_lines(self, file_id):
910
        """Get the lines for a file-id."""
911
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
912
913
    def _modify_inventory(self, path, kind, is_executable, data):
914
        """Add to or change an item in the inventory."""
915
        # Create the new InventoryEntry
916
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
917
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
918
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
919
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
920
        if isinstance(ie, inventory.InventoryFile):
921
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
922
            lines = osutils.split_lines(data)
923
            ie.text_sha1 = osutils.sha_strings(lines)
924
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
925
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
926
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
927
            ie.symlink_target = data.encode('utf8')
928
            # There are no lines stored for a symlink so
929
            # make sure the cache used by get_lines knows that
930
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
931
        else:
932
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
933
                (kind,))
934
0.64.16 by Ian Clatworthy
safe processing tweaks
935
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
936
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
937
            # HACK: no API for this (del+add does more than it needs to)
938
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
939
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
940
        else:
941
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
942
943
    def _ensure_directory(self, path):
944
        """Ensure that the containing directory exists for 'path'"""
945
        dirname, basename = osutils.split(path)
946
        if dirname == '':
947
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
948
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
949
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
950
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
951
        except KeyError:
952
            # We will create this entry, since it doesn't exist
953
            pass
954
        else:
955
            return basename, ie
956
957
        # No directory existed, we will just create one, first, make sure
958
        # the parent exists
959
        dir_basename, parent_ie = self._ensure_directory(dirname)
960
        dir_file_id = self.bzr_file_id(dirname)
961
        ie = inventory.entry_factory['directory'](dir_file_id,
962
                                                  dir_basename,
963
                                                  parent_ie.file_id)
964
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
965
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
966
        # There are no lines stored for a directory so
967
        # make sure the cache used by get_lines knows that
968
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
969
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
970
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
971
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
972
973
0.64.34 by Ian Clatworthy
report lost branches
974
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
975
0.64.64 by Ian Clatworthy
save tags known about in each branch
976
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
977
        """Create an object responsible for updating branches.
978
979
        :param heads_by_ref: a dictionary where
980
          names are git-style references like refs/heads/master;
981
          values are one item lists of commits marks.
982
        """
0.64.37 by Ian Clatworthy
create branches as required
983
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
984
        self.branch = branch
985
        self.cache_mgr = cache_mgr
986
        self.heads_by_ref = heads_by_ref
987
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
988
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
989
990
    def update(self):
991
        """Update the Bazaar branches and tips matching the heads.
992
993
        If the repository is shared, this routine creates branches
994
        as required. If it isn't, warnings are produced about the
995
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
996
0.64.34 by Ian Clatworthy
report lost branches
997
        :return: updated, lost_heads where
998
          updated = the list of branches updated
999
          lost_heads = a list of (bazaar-name,revision) for branches that
1000
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1001
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
1002
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
1003
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1004
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1005
            if self._update_branch(br, tip):
1006
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
1007
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1008
1009
    def _get_matching_branches(self):
1010
        """Get the Bazaar branches.
1011
0.64.93 by Ian Clatworthy
minor comment clean-ups
1012
        :return: default_tip, branch_tips, lost_heads where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1013
          default_tip = the last commit mark for the default branch
1014
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
1015
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
1016
            would have been created had the repository been shared and
1017
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1018
        """
0.64.37 by Ian Clatworthy
create branches as required
1019
        branch_tips = []
1020
        lost_heads = []
1021
        ref_names = self.heads_by_ref.keys()
1022
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1023
            trunk = self.select_trunk(ref_names)
1024
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
1025
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1026
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
1027
1028
        # Convert the reference names into Bazaar speak
1029
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
1030
0.64.37 by Ian Clatworthy
create branches as required
1031
        # Policy for locating branches
1032
        def dir_under_current(name, ref_name):
1033
            # Using the Bazaar name, get a directory under the current one
1034
            return name
1035
        def dir_sister_branch(name, ref_name):
1036
            # Using the Bazaar name, get a sister directory to the branch
1037
            return osutils.pathjoin(self.branch.base, "..", name)
1038
        if self.branch is not None:
1039
            dir_policy = dir_sister_branch
1040
        else:
1041
            dir_policy = dir_under_current
1042
0.64.34 by Ian Clatworthy
report lost branches
1043
        # Create/track missing branches
1044
        shared_repo = self.repo.is_shared()
1045
        for name in sorted(bzr_names.keys()):
1046
            ref_name = bzr_names[name]
1047
            tip = self.heads_by_ref[ref_name][0]
1048
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
1049
                location = dir_policy(name, ref_name)
1050
                try:
1051
                    br = self.make_branch(location)
1052
                    branch_tips.append((br,tip))
1053
                    continue
1054
                except errors.BzrError, ex:
1055
                    error("ERROR: failed to create branch %s: %s",
1056
                        location, ex)
1057
            lost_head = self.cache_mgr.revision_ids[tip]
1058
            lost_info = (name, lost_head)
1059
            lost_heads.append(lost_info)
1060
        return branch_tips, lost_heads
1061
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1062
    def select_trunk(self, ref_names):
1063
        """Given a set of ref names, choose one as the trunk."""
1064
        for candidate in ['refs/heads/master']:
1065
            if candidate in ref_names:
1066
                return candidate
1067
        # Use the last reference in the import stream
1068
        return self.last_ref
1069
0.64.37 by Ian Clatworthy
create branches as required
1070
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1071
        """Make a branch in the repository if not already there."""
1072
        try:
1073
            return bzrdir.BzrDir.open(location).open_branch()
1074
        except errors.NotBranchError, ex:
1075
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
1076
1077
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
1078
        """Generate Bazaar branch names from import ref names.
1079
        
1080
        :return: a dictionary with Bazaar names as keys and
1081
          the original reference names as values.
1082
        """
0.64.34 by Ian Clatworthy
report lost branches
1083
        bazaar_names = {}
1084
        for ref_name in sorted(ref_names):
1085
            parts = ref_name.split('/')
1086
            if parts[0] == 'refs':
1087
                parts.pop(0)
1088
            full_name = "--".join(parts)
1089
            bazaar_name = parts[-1]
1090
            if bazaar_name in bazaar_names:
1091
                bazaar_name = full_name
1092
            bazaar_names[bazaar_name] = ref_name
1093
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1094
1095
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1096
        """Update a branch with last revision and tag information.
1097
        
1098
        :return: whether the branch was changed or not
1099
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1100
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
1101
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1102
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1103
        existing_revno, existing_last_rev_id = br.last_revision_info()
1104
        changed = False
1105
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
1106
            br.set_last_revision_info(revno, last_rev_id)
1107
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
1108
        # apply tags known in this branch
1109
        my_tags = {}
1110
        if self.tags:
1111
            for tag,rev in self.tags.items():
1112
                if rev in revs:
1113
                    my_tags[tag] = rev
1114
            if my_tags:
1115
                br.tags._set_tag_dict(my_tags)
1116
                changed = True
1117
        if changed:
1118
            tagno = len(my_tags)
1119
            note("\t branch %s now has %d %s and %d %s", br.nick,
1120
                revno, helpers.single_plural(revno, "revision", "revisions"),
1121
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1122
        return changed