/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.67 by James Westby
Add support for -Dfast-import.
25
    debug,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
26
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
    errors,
28
    generate_ids,
29
    inventory,
30
    lru_cache,
31
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
32
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
33
    revision,
34
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
35
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
36
    )
0.64.51 by Ian Clatworthy
disable autopacking
37
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
38
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
39
    error,
40
    mutter,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
    note,
42
    warning,
43
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
44
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
45
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
46
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
47
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
48
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
    processor,
50
    revisionloader,
51
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
53
0.64.41 by Ian Clatworthy
update multiple working trees if requested
54
# How many commits before automatically reporting progress
55
_DEFAULT_AUTO_PROGRESS = 1000
56
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
57
# How many commits before automatically checkpointing
58
_DEFAULT_AUTO_CHECKPOINT = 10000
59
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
60
# How many inventories to cache
61
_DEFAULT_INV_CACHE_SIZE = 10
62
0.64.41 by Ian Clatworthy
update multiple working trees if requested
63
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
64
class GenericProcessor(processor.ImportProcessor):
65
    """An import processor that handles basic imports.
66
67
    Current features supported:
68
0.64.16 by Ian Clatworthy
safe processing tweaks
69
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
70
    * files and symlinks commits are supported
71
    * checkpoints automatically happen at a configurable frequency
72
      over and above the stream requested checkpoints
73
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
74
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
77
    At checkpoints and on completion, the commit-id -> revision-id map is
78
    saved to a file called 'fastimport-id-map'. If the import crashes
79
    or is interrupted, it can be started again and this file will be
80
    used to skip over already loaded revisions. The format of each line
81
    is "commit-id revision-id" so commit-ids cannot include spaces.
82
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
83
    Here are the supported parameters:
84
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
85
    * info - name of a hints file holding the analysis generated
86
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      importing large repositories, this parameter is needed so
88
      that the importer knows what blobs to intelligently cache.
89
0.64.41 by Ian Clatworthy
update multiple working trees if requested
90
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
91
      By default, the importer updates the repository
92
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
93
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
94
95
    * checkpoint - automatically checkpoint every n commits over and
96
      above any checkpoints contained in the import stream.
97
      The default is 10000.
98
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
99
    * count - only import this many commits then exit. If not set
100
      or negative, all commits are imported.
101
    
102
    * inv-cache - number of inventories to cache.
103
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
104
105
    * experimental - enable experimental mode, i.e. use features
106
      not yet fully tested.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
107
108
    * import-marks - name of file to read to load mark information from
109
110
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
111
    """
112
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
113
    known_params = [
114
        'info',
115
        'trees',
116
        'checkpoint',
117
        'count',
118
        'inv-cache',
119
        'experimental',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
120
        'import-marks',
121
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
122
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
123
124
    def note(self, msg, *args):
125
        """Output a note but timestamp it."""
126
        msg = "%s %s" % (self._time_of_day(), msg)
127
        note(msg, *args)
128
129
    def warning(self, msg, *args):
130
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
131
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
132
        warning(msg, *args)
133
0.64.67 by James Westby
Add support for -Dfast-import.
134
    def debug(self, mgs, *args):
135
        """Output a debug message if the appropriate -D option was given."""
136
        if "fast-import" in debug.debug_flags:
137
            msg = "%s DEBUG: %s" % (self._time_of_day(), msg)
138
            mutter(msg, *args)
139
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
140
    def _time_of_day(self):
141
        """Time of day as a string."""
142
        # Note: this is a separate method so tests can patch in a fixed value
143
        return time.strftime("%H:%M:%S")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
144
145
    def _import_marks(self, filename):
146
        try:
147
            f = file(filename)
148
        except IOError:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
149
            self.warning(
150
                "Could not open import-marks file, not importing marks")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
151
            return
152
153
        firstline = f.readline()
154
        match = re.match(r'^format=(\d+)$', firstline)
155
        if not match:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
156
            print >>sys.stderr, "%r doesn't look like a mark file" % \
157
                (filename,)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
158
            sys.exit(1)
159
        elif match.group(1) != '1':
160
            print >>sys.stderr, 'format version in mark file not supported'
161
            sys.exit(1)
162
163
        for string in f.readline().rstrip('\n').split('\0'):
164
            if not string:
165
                continue
166
            name, integer = string.rsplit('.', 1)
167
            # We really can't do anything with the branch information, so we
168
            # just skip it
169
            
170
        self.cache_mgr.revision_ids = {}
171
        for line in f:
172
            line = line.rstrip('\n')
173
            mark, revid = line.split(' ', 1)
174
            self.cache_mgr.revision_ids[mark] = revid
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
175
        f.close()
0.64.67 by James Westby
Add support for -Dfast-import.
176
    
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
177
    def export_marks(self, filename):
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
178
        try:
179
            f = file(filename, 'w')
180
        except IOError:
181
            self.warning(
182
                "Could not open export-marks file, not exporting marks")
183
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
184
        f.write('format=1\n')
185
        f.write('\0tmp.0\n')
186
        for mark, revid in self.cache_mgr.revision_ids.iteritems():
187
            f.write('%s %s\n' % (mark, revid))
188
        f.close()
189
        
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
190
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
191
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
192
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
193
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
194
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
195
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
196
        if self.params.get("import-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
197
            self._import_marks(self.params.get("import-marks"))
198
            self.skip_total = False
199
            self.first_incremental_commit = True
200
        else:
201
            self.first_incremental_commit = False
202
            self.skip_total = self._init_id_map()
203
            if self.skip_total:
204
                self.note("Found %d commits already loaded - "
205
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
206
        self._revision_count = 0
207
208
        # mapping of tag name to revision_id
209
        self.tags = {}
210
211
        # Create the revision loader needed for committing
0.64.79 by Ian Clatworthy
support new Repository API
212
        new_repo_api = hasattr(self.repo, 'revisions')
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
213
        if new_repo_api:
214
            self.loader = revisionloader.RevisionLoader2(self.repo)
215
        elif not self._experimental:
216
            self.loader = revisionloader.RevisionLoader1(self.repo)
217
        else:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
218
            def fulltext_when(count):
219
                total = self.total_commits
220
                if total is not None and count == total:
221
                    fulltext = True
222
                else:
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
223
                    # Create an inventory fulltext every 200 revisions
224
                    fulltext = count % 200 == 0
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
225
                if fulltext:
226
                    self.note("%d commits - storing inventory as full-text",
227
                        count)
228
                return fulltext
229
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
230
            self.loader = revisionloader.ImportRevisionLoader1(
231
                self.repo, self.inventory_cache_size,
232
                fulltext_when=fulltext_when)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
233
0.64.51 by Ian Clatworthy
disable autopacking
234
        # Disable autopacking if the repo format supports it.
235
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
236
        if isinstance(self.repo, pack_repo.KnitPackRepository):
237
            self._original_max_pack_count = \
238
                self.repo._pack_collection._max_pack_count
239
            def _max_pack_count_for_import(total_revisions):
240
                return total_revisions + 1
241
            self.repo._pack_collection._max_pack_count = \
242
                _max_pack_count_for_import
243
        else:
244
            self._original_max_pack_count = None
245
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
246
        # Create a write group. This is committed at the end of the import.
247
        # Checkpointing closes the current one and starts a new one.
248
        self.repo.start_write_group()
249
250
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
251
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
252
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
253
        # This is currently hard-coded but might be configurable via
254
        # parameters one day if that's needed
255
        repo_transport = self.repo.control_files._transport
256
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
257
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
258
        # Load the info file, if any
259
        info_path = self.params.get('info')
260
        if info_path is not None:
261
            self.info = configobj.ConfigObj(info_path)
262
        else:
263
            self.info = None
264
0.64.41 by Ian Clatworthy
update multiple working trees if requested
265
        # Decide how often to automatically report progress
266
        # (not a parameter yet)
267
        self.progress_every = _DEFAULT_AUTO_PROGRESS
268
        if self.verbose:
269
            self.progress_every = self.progress_every / 10
270
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
271
        # Decide how often to automatically checkpoint
272
        self.checkpoint_every = int(self.params.get('checkpoint',
273
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
274
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
275
        # Decide how big to make the inventory cache
276
        self.inventory_cache_size = int(self.params.get('inv-cache',
277
            _DEFAULT_INV_CACHE_SIZE))
278
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
279
        # Find the maximum number of commits to import (None means all)
280
        # and prepare progress reporting. Just in case the info file
281
        # has an outdated count of commits, we store the max counts
282
        # at which we need to terminate separately to the total used
283
        # for progress tracking.
284
        try:
285
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
286
            if self.max_commits < 0:
287
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
288
        except KeyError:
289
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
290
        if self.info is not None:
291
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
292
            if (self.max_commits is not None and
293
                self.total_commits > self.max_commits):
294
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
295
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
296
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
297
0.64.27 by Ian Clatworthy
1st cut at performance tuning
298
    def _process(self, command_iter):
299
        # if anything goes wrong, abort the write group if any
300
        try:
301
            processor.ImportProcessor._process(self, command_iter)
302
        except:
303
            if self.repo is not None and self.repo.is_in_write_group():
304
                self.repo.abort_write_group()
305
            raise
306
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
308
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
309
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
310
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
311
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
312
        if self.params.get("export-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
313
            self.export_marks(self.params.get("export-marks"))
314
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
315
        # Update the branches
316
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
317
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
318
            helpers.invert_dictset(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
319
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
320
        branches_updated, branches_lost = updater.update()
321
        self._branch_count = len(branches_updated)
322
323
        # Tell the user about branches that were not created
324
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
325
            if not self.repo.is_shared():
326
                self.warning("Cannot import multiple branches into "
327
                    "an unshared repository")
328
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
329
            for lost_info in branches_lost:
330
                head_revision = lost_info[1]
331
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
332
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
333
334
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
335
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
336
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
337
        if self._branch_count == 0:
338
            self.note("no branches to update")
339
            self.note("no working trees to update")
340
            remind_about_update = False
341
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
342
            trees = self._get_working_trees(branches_updated)
343
            if trees:
344
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
345
                if self.verbose:
346
                    report = delta._ChangeReporter()
347
                else:
348
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
349
                for wt in trees:
350
                    wt.update(reporter)
351
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
352
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
353
            else:
354
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
355
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
356
357
        # Finish up by telling the user what to do next.
358
        if self._original_max_pack_count:
359
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
360
            # checkpoint instead. We now pack the repository to optimise
361
            # how data is stored.
362
            if self._revision_count > self.checkpoint_every:
363
                self.note("Packing repository ...")
364
                self.repo.pack()
365
                # To be conservative, packing puts the old packs and
366
                # indices in obsolete_packs. We err on the side of
367
                # optimism and clear out that directory to save space.
368
                self.note("Removing obsolete packs ...")
369
                # TODO: Use a public API for this once one exists
370
                repo_transport = self.repo._pack_collection.transport
371
                repo_transport.clone('obsolete_packs').delete_multi(
372
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
373
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
374
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
375
            note("To refresh the working tree for a branch, "
376
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
377
378
    def _get_working_trees(self, branches):
379
        """Get the working trees for branches in the repository."""
380
        result = []
381
        wt_expected = self.repo.make_working_trees()
382
        for br in branches:
383
            if br == self.branch and br is not None:
384
                wt = self.working_tree
385
            elif wt_expected:
386
                try:
387
                    wt = br.bzrdir.open_workingtree()
388
                except errors.NoWorkingTree:
389
                    self.warning("No working tree for branch %s", br)
390
                    continue
391
            else:
392
                continue
393
            result.append(wt)
394
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
395
396
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
397
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
398
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
399
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
400
        wtc = self._tree_count
401
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
402
            rc, helpers.single_plural(rc, "revision", "revisions"),
403
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
404
            wtc, helpers.single_plural(wtc, "tree", "trees"),
405
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
406
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
407
    def _init_id_map(self):
408
        """Load the id-map and check it matches the repository.
409
        
410
        :return: the number of entries in the map
411
        """
412
        # Currently, we just check the size. In the future, we might
413
        # decide to be more paranoid and check that the revision-ids
414
        # are identical as well.
415
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
416
            self.id_map_path)
417
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
418
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
419
            raise plugin_errors.BadRepositorySize(known, existing_count)
420
        return known
421
422
    def _save_id_map(self):
423
        """Save the id-map."""
424
        # Save the whole lot every time. If this proves a problem, we can
425
        # change to 'append just the new ones' at a later time.
426
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
427
0.64.5 by Ian Clatworthy
first cut at generic processing method
428
    def blob_handler(self, cmd):
429
        """Process a BlobCommand."""
430
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
431
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
432
        else:
433
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
434
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
435
436
    def checkpoint_handler(self, cmd):
437
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
438
        # Commit the current write group and start a new one
439
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
440
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
441
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
442
443
    def commit_handler(self, cmd):
444
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
445
        if self.skip_total and self._revision_count < self.skip_total:
446
            _track_heads(cmd, self.cache_mgr)
447
            # Check that we really do know about this commit-id
448
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
449
                raise plugin_errors.BadRestart(cmd.id)
450
            # Consume the file commands and free any non-sticky blobs
451
            for fc in cmd.file_iter():
452
                pass
453
            self.cache_mgr._blobs = {}
454
            self._revision_count += 1
455
            # If we're finished getting back to where we were,
456
            # load the file-ids cache
457
            if self._revision_count == self.skip_total:
458
                self._gen_file_ids_cache()
459
                self.note("Generated the file-ids cache - %d entries",
460
                    len(self.cache_mgr.file_ids.keys()))
461
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
462
        if self.first_incremental_commit:
463
            self.first_incremental_commit = None
464
            parents = _track_heads(cmd, self.cache_mgr)
465
            self._gen_file_ids_cache(parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
466
467
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
468
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
469
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
470
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
471
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
472
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
473
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
474
475
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
476
        if (self.max_commits is not None and
477
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
478
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
479
            self.finished = True
480
        elif self._revision_count % self.checkpoint_every == 0:
481
            self.note("%d commits - automatic checkpoint triggered",
482
                self._revision_count)
483
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
484
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
485
    def _gen_file_ids_cache(self, revs=False):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
486
        """Generate the file-id cache by searching repository inventories.
487
        """
488
        # Get the interesting revisions - the heads
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
489
        if revs:
490
            head_ids = revs
491
        else:
492
            head_ids = self.cache_mgr.heads.keys()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
493
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
494
495
        # Update the fileid cache
496
        file_ids = {}
497
        for revision_id in revision_ids:
498
            inv = self.repo.revision_tree(revision_id).inventory
0.64.93 by Ian Clatworthy
minor comment clean-ups
499
            # Cache the inventories while we're at it
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
500
            self.cache_mgr.inventories[revision_id] = inv
501
            for path, ie in inv.iter_entries():
502
                file_ids[path] = ie.file_id
503
        self.cache_mgr.file_ids = file_ids
504
0.64.25 by Ian Clatworthy
slightly better progress reporting
505
    def report_progress(self, details=''):
506
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
507
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
508
            if self.total_commits is not None:
509
                counts = "%d/%d" % (self._revision_count, self.total_commits)
510
                eta = progress.get_eta(self._start_time, self._revision_count,
511
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
512
                eta_str = progress.str_tdelta(eta)
513
                if eta_str.endswith('--'):
514
                    eta_str = ''
515
                else:
516
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
517
            else:
518
                counts = "%d" % (self._revision_count,)
519
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
520
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
521
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
522
    def progress_handler(self, cmd):
523
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
524
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
525
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
526
527
    def reset_handler(self, cmd):
528
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
529
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
530
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
531
            if cmd.from_ is not None:
532
                self._set_tag(tag_name, cmd.from_)
533
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
534
                self.warning("ignoring reset refs/tags/%s - no from clause"
535
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
536
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
537
0.75.2 by Brian de Alwis
Reset takes a <commitsh> and not just a revid; added note to
538
	# FIXME: cmd.from_ is a committish and thus could reference
0.64.109 by Ian Clatworthy
initial cut at reset support
539
	# another branch.  Create a method for resolving commitish's.
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
540
        if cmd.from_ is not None:
0.64.109 by Ian Clatworthy
initial cut at reset support
541
            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
542
            # Why is this required now vs at the end?
543
            #updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
544
            #    helpers.invert_dictset(self.cache_mgr.heads),
545
            #    self.cache_mgr.last_ref, self.tags)
546
            #updater.update()
0.64.5 by Ian Clatworthy
first cut at generic processing method
547
548
    def tag_handler(self, cmd):
549
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
550
        if cmd.from_ is not None:
551
            self._set_tag(cmd.id, cmd.from_)
552
        else:
553
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
554
555
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
556
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
557
        bzr_tag_name = name.decode('utf-8', 'replace')
558
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
559
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
560
561
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
562
class GenericCacheManager(object):
563
    """A manager of caches for the GenericProcessor."""
564
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
565
    def __init__(self, info, verbose=False, inventory_cache_size=10):
566
        """Create a manager of caches.
567
568
        :param info: a ConfigObj holding the output from
569
            the --info processor, or None if no hints are available
570
        """
571
        self.verbose = verbose
572
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
573
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
574
        # Sticky blobs aren't removed after being referenced.
575
        self._blobs = {}
576
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
577
578
        # revision-id -> Inventory cache
579
        # these are large and we probably don't need too many as
580
        # most parents are recent in history
581
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
582
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
583
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
584
        # we need to keep all of these but they are small
585
        self.revision_ids = {}
586
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
587
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
588
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
589
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
590
        # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
591
        self.last_ref = None
592
        self.last_ids = {}
593
        self.heads = {}
594
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
595
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
596
        self._blobs_to_keep = None
597
        if info is not None:
598
            try:
599
                self._blobs_to_keep = info['Blob usage tracking']['multi']
600
            except KeyError:
601
                # info not in file - possible when no blobs used
602
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
603
604
    def store_blob(self, id, data):
605
        """Store a blob of data."""
606
        if (self._blobs_to_keep is None or data == '' or
607
            id in self._blobs_to_keep):
608
            self._sticky_blobs[id] = data
609
        else:
610
            self._blobs[id] = data
611
612
    def fetch_blob(self, id):
613
        """Fetch a blob of data."""
614
        try:
615
            return self._sticky_blobs[id]
616
        except KeyError:
617
            return self._blobs.pop(id)
618
0.64.16 by Ian Clatworthy
safe processing tweaks
619
    def _delete_path(self, path):
620
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
621
        # we actually want to remember what file-id we gave a path,
622
        # even when that file is deleted, so doing nothing is correct
623
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
624
625
    def _rename_path(self, old_path, new_path):
626
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
627
        # In this case, we need to forget the file-id we gave a path,
628
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
629
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
630
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
631
0.64.109 by Ian Clatworthy
initial cut at reset support
632
    def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
633
        if parents is not None:
634
            for parent in parents:
635
                refs = self.heads.get(parent)
636
                if refs:
637
                    refs.discard(cmd_ref)
638
                    if not refs:
639
                        del self.heads[parent]
640
        self.heads.setdefault(cmd_id, set()).add(cmd_ref)
641
        self.last_ids[cmd_ref] = cmd_id
642
        self.last_ref = cmd_ref
643
0.64.16 by Ian Clatworthy
safe processing tweaks
644
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
645
def _track_heads(cmd, cache_mgr):
646
    """Track the repository heads given a CommitCommand.
647
    
648
    :return: the list of parents in terms of commit-ids
649
    """
650
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
651
    if cmd.from_ is not None:
652
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
653
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
654
        last_id = cache_mgr.last_ids.get(cmd.ref)
655
        if last_id is not None:
656
            parents = [last_id]
657
        else:
658
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
659
    parents.extend(cmd.merges)
0.64.109 by Ian Clatworthy
initial cut at reset support
660
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
661
    # Track the heads
0.64.109 by Ian Clatworthy
initial cut at reset support
662
    cache_mgr.track_heads_for_ref(cmd.ref, cmd.id, parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
663
    return parents
664
665
0.64.5 by Ian Clatworthy
first cut at generic processing method
666
class GenericCommitHandler(processor.CommitHandler):
667
0.64.48 by Ian Clatworthy
one revision loader instance
668
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
669
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
670
        processor.CommitHandler.__init__(self, command)
671
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
672
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
673
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
674
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
675
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
676
0.64.43 by Ian Clatworthy
verbose mode cleanup
677
    def note(self, msg, *args):
678
        """Output a note but add context."""
679
        msg = "%s (%s)" % (msg, self.command.id)
680
        note(msg, *args)
681
682
    def warning(self, msg, *args):
683
        """Output a warning but add context."""
684
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
685
        warning(msg, *args)
686
0.64.67 by James Westby
Add support for -Dfast-import.
687
    def debug(self, msg, *args):
688
        """Output a mutter if the appropriate -D option was given."""
689
        if "fast-import" in debug.debug_flags:
690
            msg = "%s (%s)" % (msg, self.command.id)
691
            mutter(msg, *args)
692
0.64.5 by Ian Clatworthy
first cut at generic processing method
693
    def pre_process_files(self):
694
        """Prepare for committing."""
695
        self.revision_id = self.gen_revision_id()
696
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
697
        self.lines_for_commit = {}
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
698
        if self.repo.supports_rich_root():
699
            self.lines_for_commit[inventory.ROOT_ID] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
700
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
701
        # Track the heads and get the real parent list
702
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
703
0.64.93 by Ian Clatworthy
minor comment clean-ups
704
        # Convert the parent commit-ids to bzr revision-ids
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
705
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
706
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
707
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
708
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
709
            self.parents = []
0.64.109 by Ian Clatworthy
initial cut at reset support
710
        self.debug("%s id: %s, parents: %s", self.command.id,
711
            self.revision_id, str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
712
0.64.14 by Ian Clatworthy
commit of modified files working
713
        # Seed the inventory from the previous one
714
        if len(self.parents) == 0:
715
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
716
        else:
717
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
718
            inv = self.get_inventory(self.parents[0])
719
            # TODO: Shallow copy - deep inventory copying is expensive
720
            self.inventory = inv.copy()
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
721
        if self.repo.supports_rich_root():
722
            self.inventory.revision_id = self.revision_id
723
        else:
0.64.13 by Ian Clatworthy
commit of new files working
724
            # In this repository, root entries have no knit or weave. When
725
            # serializing out to disk and back in, root.revision is always
726
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
727
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
728
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
729
        # directory-path -> inventory-entry for current inventory
730
        self.directory_entries = dict(self.inventory.directories())
731
0.64.14 by Ian Clatworthy
commit of modified files working
732
    def post_process_files(self):
733
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
734
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
735
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
736
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
737
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
738
        committer = self.command.committer
739
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
740
        author = self.command.author
741
        if author is not None:
742
            author_id = "%s <%s>" % (author[0],author[1])
743
            if author_id != who:
744
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
745
        rev = revision.Revision(
746
           timestamp=committer[2],
747
           timezone=committer[3],
748
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
749
           message=self._escape_commit_message(self.command.message),
750
           revision_id=self.revision_id,
751
           properties=rev_props,
752
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
753
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
754
            lambda file_id: self._get_lines(file_id),
755
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
756
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
757
    def _escape_commit_message(self, message):
758
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
759
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
760
        # Code copied from bzrlib.commit.
761
        
762
        # Python strings can include characters that can't be
763
        # represented in well-formed XML; escape characters that
764
        # aren't listed in the XML specification
765
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
766
        message, _ = re.subn(
767
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
768
            lambda match: match.group(0).encode('unicode_escape'),
769
            message)
770
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
771
772
    def modify_handler(self, filecmd):
773
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
774
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
775
        else:
776
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
777
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
778
        self._modify_inventory(filecmd.path, filecmd.kind,
779
            filecmd.is_executable, data)
780
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
781
    def _delete_recursive(self, path):
0.64.67 by James Westby
Add support for -Dfast-import.
782
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
783
        fileid = self.bzr_file_id(path)
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
784
        dirname, basename = osutils.split(path)
785
        if (fileid in self.inventory and
786
            isinstance(self.inventory[fileid], inventory.InventoryDirectory)):
787
            for child_path in self.inventory[fileid].children.keys():
788
                self._delete_recursive(os.utils.pathjoin(path, child_path))
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
789
        try:
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
790
            if self.inventory.id2path(fileid) == path:
791
                del self.inventory[fileid]
792
            else:
793
                # already added by some other name?
794
                if dirname in self.cache_mgr.file_ids:
795
                    parent_id = self.cache_mgr.file_ids[dirname]
796
                    del self.inventory[parent_id].children[basename]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
797
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
798
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
799
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
800
            self._warn_unless_in_merges(fileid, path)
0.64.102 by Ian Clatworthy
Handle a directory becoming a file and subsequent child deletes
801
        except AttributeError, ex:
802
            if ex.args[0] == 'children':
803
                # A directory has changed into a file and then one
804
                # of it's children is being deleted!
805
                self._warn_unless_in_merges(fileid, path)
806
            else:
807
                raise
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
808
        try:
809
            self.cache_mgr._delete_path(path)
810
        except KeyError:
811
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
812
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
813
    def delete_handler(self, filecmd):
814
        self._delete_recursive(filecmd.path)
815
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
816
    def _warn_unless_in_merges(self, fileid, path):
817
        if len(self.parents) <= 1:
818
            return
819
        for parent in self.parents[1:]:
820
            if fileid in self.get_inventory(parent):
821
                return
822
        self.warning("ignoring delete of %s as not in parent inventories", path)
823
0.64.5 by Ian Clatworthy
first cut at generic processing method
824
    def copy_handler(self, filecmd):
825
        raise NotImplementedError(self.copy_handler)
826
827
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
828
        old_path = filecmd.old_path
829
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
830
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
831
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
832
        basename, new_parent_ie = self._ensure_directory(new_path)
833
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
834
        existing_id = self.inventory.path2id(new_path)
835
        if existing_id is not None:
836
            self.inventory.remove_recursive_id(existing_id)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
837
        ie = self.inventory[file_id]
838
        lines = self.loader._get_lines(file_id, ie.revision)
839
        self.lines_for_commit[file_id] = lines
0.65.4 by James Westby
Make the rename handling more robust.
840
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
841
        self.cache_mgr._rename_path(old_path, new_path)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
842
        self.inventory[file_id].revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
843
844
    def deleteall_handler(self, filecmd):
0.73.1 by Miklos Vajna
Implement the 'deleteall' command.
845
        self.debug("deleting all files (and also all directories)")
0.64.110 by Ian Clatworthy
make deleteall less agressive in the files it tries to delete
846
        # Would be nice to have an inventory.clear() method here
847
        root_items = [ie for (name, ie) in
848
            self.inventory.root.children.iteritems()]
849
        for root_item in root_items:
850
            self.inventory.remove_recursive_id(root_item.file_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
851
0.64.16 by Ian Clatworthy
safe processing tweaks
852
    def bzr_file_id_and_new(self, path):
853
        """Get a Bazaar file identifier and new flag for a path.
854
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
855
        :return: file_id, is_new where
856
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
857
        """
858
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
859
            id = self.cache_mgr.file_ids[path]
860
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
861
        except KeyError:
862
            id = generate_ids.gen_file_id(path)
863
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
864
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
865
            return id, True
866
0.64.5 by Ian Clatworthy
first cut at generic processing method
867
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
868
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
869
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
870
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
871
    def gen_initial_inventory(self):
872
        """Generate an inventory for a parentless revision."""
873
        inv = inventory.Inventory(revision_id=self.revision_id)
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
874
        if self.repo.supports_rich_root():
875
            # The very first root needs to have the right revision
876
            inv.root.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
877
        return inv
878
0.64.5 by Ian Clatworthy
first cut at generic processing method
879
    def gen_revision_id(self):
880
        """Generate a revision id.
881
882
        Subclasses may override this to produce deterministic ids say.
883
        """
884
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
885
        # Perhaps 'who' being the person running the import is ok? If so,
886
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
887
        who = "%s <%s>" % (committer[0],committer[1])
888
        timestamp = committer[2]
889
        return generate_ids.gen_revision_id(who, timestamp)
890
0.64.7 by Ian Clatworthy
start of multiple commit handling
891
    def get_inventory(self, revision_id):
892
        """Get the inventory for a revision id."""
893
        try:
894
            inv = self.cache_mgr.inventories[revision_id]
895
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
896
            if self.verbose:
897
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
898
            # Not cached so reconstruct from repository
899
            inv = self.repo.revision_tree(revision_id).inventory
900
            self.cache_mgr.inventories[revision_id] = inv
901
        return inv
902
0.64.5 by Ian Clatworthy
first cut at generic processing method
903
    def _get_inventories(self, revision_ids):
904
        """Get the inventories for revision-ids.
905
        
906
        This is a callback used by the RepositoryLoader to
0.64.93 by Ian Clatworthy
minor comment clean-ups
907
        speed up inventory reconstruction.
908
        """
0.64.5 by Ian Clatworthy
first cut at generic processing method
909
        present = []
910
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
911
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
912
        # successfully loaded into the repsoitory
913
        for revision_id in revision_ids:
914
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
915
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
916
                present.append(revision_id)
917
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
918
                if self.verbose:
919
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
920
                # Not cached so reconstruct from repository
921
                if self.repo.has_revision(revision_id):
922
                    rev_tree = self.repo.revision_tree(revision_id)
923
                    present.append(revision_id)
924
                else:
925
                    rev_tree = self.repo.revision_tree(None)
926
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
927
                self.cache_mgr.inventories[revision_id] = inv
928
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
929
        return present, inventories
930
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
931
    def _get_lines(self, file_id):
932
        """Get the lines for a file-id."""
933
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
934
935
    def _modify_inventory(self, path, kind, is_executable, data):
936
        """Add to or change an item in the inventory."""
937
        # Create the new InventoryEntry
938
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
939
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
940
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
941
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
942
        if isinstance(ie, inventory.InventoryFile):
943
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
944
            lines = osutils.split_lines(data)
945
            ie.text_sha1 = osutils.sha_strings(lines)
946
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
947
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
948
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
949
            ie.symlink_target = data.encode('utf8')
950
            # There are no lines stored for a symlink so
951
            # make sure the cache used by get_lines knows that
952
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
953
        else:
954
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
955
                (kind,))
956
0.64.16 by Ian Clatworthy
safe processing tweaks
957
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
958
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
959
            # HACK: no API for this (del+add does more than it needs to)
960
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
961
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
962
        else:
963
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
964
965
    def _ensure_directory(self, path):
966
        """Ensure that the containing directory exists for 'path'"""
967
        dirname, basename = osutils.split(path)
968
        if dirname == '':
969
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
970
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
971
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
972
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
973
        except KeyError:
974
            # We will create this entry, since it doesn't exist
975
            pass
976
        else:
977
            return basename, ie
978
979
        # No directory existed, we will just create one, first, make sure
980
        # the parent exists
981
        dir_basename, parent_ie = self._ensure_directory(dirname)
982
        dir_file_id = self.bzr_file_id(dirname)
983
        ie = inventory.entry_factory['directory'](dir_file_id,
984
                                                  dir_basename,
985
                                                  parent_ie.file_id)
986
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
987
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
988
        # There are no lines stored for a directory so
989
        # make sure the cache used by get_lines knows that
990
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
991
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
992
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
993
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
994
995
0.64.34 by Ian Clatworthy
report lost branches
996
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
997
0.64.64 by Ian Clatworthy
save tags known about in each branch
998
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
999
        """Create an object responsible for updating branches.
1000
1001
        :param heads_by_ref: a dictionary where
1002
          names are git-style references like refs/heads/master;
1003
          values are one item lists of commits marks.
1004
        """
0.64.37 by Ian Clatworthy
create branches as required
1005
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1006
        self.branch = branch
1007
        self.cache_mgr = cache_mgr
1008
        self.heads_by_ref = heads_by_ref
1009
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
1010
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1011
1012
    def update(self):
1013
        """Update the Bazaar branches and tips matching the heads.
1014
1015
        If the repository is shared, this routine creates branches
1016
        as required. If it isn't, warnings are produced about the
1017
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
1018
0.64.34 by Ian Clatworthy
report lost branches
1019
        :return: updated, lost_heads where
1020
          updated = the list of branches updated
1021
          lost_heads = a list of (bazaar-name,revision) for branches that
1022
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1023
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
1024
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
1025
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1026
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1027
            if self._update_branch(br, tip):
1028
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
1029
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1030
1031
    def _get_matching_branches(self):
1032
        """Get the Bazaar branches.
1033
0.64.93 by Ian Clatworthy
minor comment clean-ups
1034
        :return: default_tip, branch_tips, lost_heads where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1035
          default_tip = the last commit mark for the default branch
1036
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
1037
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
1038
            would have been created had the repository been shared and
1039
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1040
        """
0.64.37 by Ian Clatworthy
create branches as required
1041
        branch_tips = []
1042
        lost_heads = []
1043
        ref_names = self.heads_by_ref.keys()
1044
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1045
            trunk = self.select_trunk(ref_names)
1046
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
1047
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1048
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
1049
1050
        # Convert the reference names into Bazaar speak
1051
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
1052
0.64.37 by Ian Clatworthy
create branches as required
1053
        # Policy for locating branches
1054
        def dir_under_current(name, ref_name):
1055
            # Using the Bazaar name, get a directory under the current one
1056
            return name
1057
        def dir_sister_branch(name, ref_name):
1058
            # Using the Bazaar name, get a sister directory to the branch
1059
            return osutils.pathjoin(self.branch.base, "..", name)
1060
        if self.branch is not None:
1061
            dir_policy = dir_sister_branch
1062
        else:
1063
            dir_policy = dir_under_current
1064
0.64.34 by Ian Clatworthy
report lost branches
1065
        # Create/track missing branches
1066
        shared_repo = self.repo.is_shared()
1067
        for name in sorted(bzr_names.keys()):
1068
            ref_name = bzr_names[name]
1069
            tip = self.heads_by_ref[ref_name][0]
1070
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
1071
                location = dir_policy(name, ref_name)
1072
                try:
1073
                    br = self.make_branch(location)
1074
                    branch_tips.append((br,tip))
1075
                    continue
1076
                except errors.BzrError, ex:
1077
                    error("ERROR: failed to create branch %s: %s",
1078
                        location, ex)
1079
            lost_head = self.cache_mgr.revision_ids[tip]
1080
            lost_info = (name, lost_head)
1081
            lost_heads.append(lost_info)
1082
        return branch_tips, lost_heads
1083
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1084
    def select_trunk(self, ref_names):
1085
        """Given a set of ref names, choose one as the trunk."""
1086
        for candidate in ['refs/heads/master']:
1087
            if candidate in ref_names:
1088
                return candidate
1089
        # Use the last reference in the import stream
1090
        return self.last_ref
1091
0.64.37 by Ian Clatworthy
create branches as required
1092
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1093
        """Make a branch in the repository if not already there."""
1094
        try:
1095
            return bzrdir.BzrDir.open(location).open_branch()
1096
        except errors.NotBranchError, ex:
1097
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
1098
1099
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
1100
        """Generate Bazaar branch names from import ref names.
1101
        
1102
        :return: a dictionary with Bazaar names as keys and
1103
          the original reference names as values.
1104
        """
0.64.34 by Ian Clatworthy
report lost branches
1105
        bazaar_names = {}
1106
        for ref_name in sorted(ref_names):
1107
            parts = ref_name.split('/')
1108
            if parts[0] == 'refs':
1109
                parts.pop(0)
1110
            full_name = "--".join(parts)
1111
            bazaar_name = parts[-1]
1112
            if bazaar_name in bazaar_names:
0.64.109 by Ian Clatworthy
initial cut at reset support
1113
                if parts[0] == 'remotes':
1114
                    bazaar_name += ".remote"
1115
                else:
1116
                    bazaar_name = full_name
0.64.34 by Ian Clatworthy
report lost branches
1117
            bazaar_names[bazaar_name] = ref_name
1118
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1119
1120
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1121
        """Update a branch with last revision and tag information.
1122
        
1123
        :return: whether the branch was changed or not
1124
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1125
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
1126
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1127
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1128
        existing_revno, existing_last_rev_id = br.last_revision_info()
1129
        changed = False
1130
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
1131
            br.set_last_revision_info(revno, last_rev_id)
1132
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
1133
        # apply tags known in this branch
1134
        my_tags = {}
1135
        if self.tags:
1136
            for tag,rev in self.tags.items():
1137
                if rev in revs:
1138
                    my_tags[tag] = rev
1139
            if my_tags:
1140
                br.tags._set_tag_dict(my_tags)
1141
                changed = True
1142
        if changed:
1143
            tagno = len(my_tags)
1144
            note("\t branch %s now has %d %s and %d %s", br.nick,
1145
                revno, helpers.single_plural(revno, "revision", "revisions"),
1146
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1147
        return changed