/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.51 by Ian Clatworthy
disable autopacking
36
from bzrlib.repofmt import pack_repo
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
37
from bzrlib.trace import (
0.64.67 by James Westby
Add support for -Dfast-import.
38
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
39
    note,
40
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
41
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
42
from bzrlib.plugins.fastimport import (
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
43
    errors as plugin_errors,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
44
    helpers,
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
45
    idmapfile,
0.64.5 by Ian Clatworthy
first cut at generic processing method
46
    processor,
47
    revisionloader,
48
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
50
0.64.41 by Ian Clatworthy
update multiple working trees if requested
51
# How many commits before automatically reporting progress
52
_DEFAULT_AUTO_PROGRESS = 1000
53
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
54
# How many commits before automatically checkpointing
55
_DEFAULT_AUTO_CHECKPOINT = 10000
56
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
57
# How many inventories to cache
58
_DEFAULT_INV_CACHE_SIZE = 10
59
0.64.41 by Ian Clatworthy
update multiple working trees if requested
60
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
61
class GenericProcessor(processor.ImportProcessor):
62
    """An import processor that handles basic imports.
63
64
    Current features supported:
65
0.64.16 by Ian Clatworthy
safe processing tweaks
66
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
67
    * files and symlinks commits are supported
68
    * checkpoints automatically happen at a configurable frequency
69
      over and above the stream requested checkpoints
70
    * timestamped progress reporting, both automatic and stream requested
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
71
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
72
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
73
    At checkpoints and on completion, the commit-id -> revision-id map is
74
    saved to a file called 'fastimport-id-map'. If the import crashes
75
    or is interrupted, it can be started again and this file will be
76
    used to skip over already loaded revisions. The format of each line
77
    is "commit-id revision-id" so commit-ids cannot include spaces.
78
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
79
    Here are the supported parameters:
80
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
81
    * info - name of a hints file holding the analysis generated
82
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
83
      importing large repositories, this parameter is needed so
84
      that the importer knows what blobs to intelligently cache.
85
0.64.41 by Ian Clatworthy
update multiple working trees if requested
86
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
87
      By default, the importer updates the repository
88
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
89
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
90
91
    * checkpoint - automatically checkpoint every n commits over and
92
      above any checkpoints contained in the import stream.
93
      The default is 10000.
94
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
95
    * count - only import this many commits then exit. If not set
96
      or negative, all commits are imported.
97
    
98
    * inv-cache - number of inventories to cache.
99
      If not set, the default is 10.
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
100
101
    * experimental - enable experimental mode, i.e. use features
102
      not yet fully tested.
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
103
104
    * import-marks - name of file to read to load mark information from
105
106
    * export-marks - name of file to write to save mark information to
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
107
    """
108
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
109
    known_params = [
110
        'info',
111
        'trees',
112
        'checkpoint',
113
        'count',
114
        'inv-cache',
115
        'experimental',
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
116
        'import-marks',
117
        'export-marks',
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
118
        ]
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
119
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
120
    def _import_marks(self, filename):
121
        try:
122
            f = file(filename)
123
        except IOError:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
124
            self.warning(
125
                "Could not open import-marks file, not importing marks")
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
126
            return
127
128
        firstline = f.readline()
129
        match = re.match(r'^format=(\d+)$', firstline)
130
        if not match:
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
131
            print >>sys.stderr, "%r doesn't look like a mark file" % \
132
                (filename,)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
133
            sys.exit(1)
134
        elif match.group(1) != '1':
135
            print >>sys.stderr, 'format version in mark file not supported'
136
            sys.exit(1)
137
138
        for string in f.readline().rstrip('\n').split('\0'):
139
            if not string:
140
                continue
141
            name, integer = string.rsplit('.', 1)
142
            # We really can't do anything with the branch information, so we
143
            # just skip it
144
            
145
        self.cache_mgr.revision_ids = {}
146
        for line in f:
147
            line = line.rstrip('\n')
148
            mark, revid = line.split(' ', 1)
149
            self.cache_mgr.revision_ids[mark] = revid
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
150
        f.close()
0.64.67 by James Westby
Add support for -Dfast-import.
151
    
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
152
    def export_marks(self, filename):
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
153
        try:
154
            f = file(filename, 'w')
155
        except IOError:
156
            self.warning(
157
                "Could not open export-marks file, not exporting marks")
158
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
159
        f.write('format=1\n')
160
        f.write('\0tmp.0\n')
161
        for mark, revid in self.cache_mgr.revision_ids.iteritems():
162
            f.write('%s %s\n' % (mark, revid))
163
        f.close()
164
        
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
165
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
166
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
167
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
168
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
169
            self.inventory_cache_size)
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
170
        
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
171
        if self.params.get("import-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
172
            self._import_marks(self.params.get("import-marks"))
173
            self.skip_total = False
174
            self.first_incremental_commit = True
175
        else:
176
            self.first_incremental_commit = False
177
            self.skip_total = self._init_id_map()
178
            if self.skip_total:
179
                self.note("Found %d commits already loaded - "
180
                    "skipping over these ...", self.skip_total)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
181
        self._revision_count = 0
182
183
        # mapping of tag name to revision_id
184
        self.tags = {}
185
186
        # Create the revision loader needed for committing
0.64.79 by Ian Clatworthy
support new Repository API
187
        new_repo_api = hasattr(self.repo, 'revisions')
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
188
        if new_repo_api:
189
            self.loader = revisionloader.RevisionLoader2(self.repo)
190
        elif not self._experimental:
191
            self.loader = revisionloader.RevisionLoader1(self.repo)
192
        else:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
193
            def fulltext_when(count):
194
                total = self.total_commits
195
                if total is not None and count == total:
196
                    fulltext = True
197
                else:
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
198
                    # Create an inventory fulltext every 200 revisions
199
                    fulltext = count % 200 == 0
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
200
                if fulltext:
201
                    self.note("%d commits - storing inventory as full-text",
202
                        count)
203
                return fulltext
204
0.64.99 by Ian Clatworthy
remove --inv-fulltext option
205
            self.loader = revisionloader.ImportRevisionLoader1(
206
                self.repo, self.inventory_cache_size,
207
                fulltext_when=fulltext_when)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
208
0.64.51 by Ian Clatworthy
disable autopacking
209
        # Disable autopacking if the repo format supports it.
210
        # THIS IS A HACK - there is no sanctioned way of doing this yet.
211
        if isinstance(self.repo, pack_repo.KnitPackRepository):
212
            self._original_max_pack_count = \
213
                self.repo._pack_collection._max_pack_count
214
            def _max_pack_count_for_import(total_revisions):
215
                return total_revisions + 1
216
            self.repo._pack_collection._max_pack_count = \
217
                _max_pack_count_for_import
218
        else:
219
            self._original_max_pack_count = None
220
            
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
221
        # Create a write group. This is committed at the end of the import.
222
        # Checkpointing closes the current one and starts a new one.
223
        self.repo.start_write_group()
224
225
    def _load_info_and_params(self):
0.64.52 by Ian Clatworthy
switch on experimental mode by default
226
        self._experimental = bool(self.params.get('experimental', False))
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
227
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
228
        # This is currently hard-coded but might be configurable via
229
        # parameters one day if that's needed
230
        repo_transport = self.repo.control_files._transport
231
        self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
232
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
233
        # Load the info file, if any
234
        info_path = self.params.get('info')
235
        if info_path is not None:
236
            self.info = configobj.ConfigObj(info_path)
237
        else:
238
            self.info = None
239
0.64.41 by Ian Clatworthy
update multiple working trees if requested
240
        # Decide how often to automatically report progress
241
        # (not a parameter yet)
242
        self.progress_every = _DEFAULT_AUTO_PROGRESS
243
        if self.verbose:
244
            self.progress_every = self.progress_every / 10
245
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
246
        # Decide how often to automatically checkpoint
247
        self.checkpoint_every = int(self.params.get('checkpoint',
248
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
249
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
250
        # Decide how big to make the inventory cache
251
        self.inventory_cache_size = int(self.params.get('inv-cache',
252
            _DEFAULT_INV_CACHE_SIZE))
253
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
254
        # Find the maximum number of commits to import (None means all)
255
        # and prepare progress reporting. Just in case the info file
256
        # has an outdated count of commits, we store the max counts
257
        # at which we need to terminate separately to the total used
258
        # for progress tracking.
259
        try:
260
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
261
            if self.max_commits < 0:
262
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
263
        except KeyError:
264
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
265
        if self.info is not None:
266
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
267
            if (self.max_commits is not None and
268
                self.total_commits > self.max_commits):
269
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
270
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
271
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
272
0.64.27 by Ian Clatworthy
1st cut at performance tuning
273
    def _process(self, command_iter):
274
        # if anything goes wrong, abort the write group if any
275
        try:
276
            processor.ImportProcessor._process(self, command_iter)
277
        except:
278
            if self.repo is not None and self.repo.is_in_write_group():
279
                self.repo.abort_write_group()
280
            raise
281
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
282
    def post_process(self):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
283
        # Commit the current write group and checkpoint the id map
0.64.27 by Ian Clatworthy
1st cut at performance tuning
284
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
285
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
286
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
287
        if self.params.get("export-marks") is not None:
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
288
            self.export_marks(self.params.get("export-marks"))
289
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
290
        # Update the branches
291
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
292
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
293
            helpers.invert_dictset(self.cache_mgr.heads),
0.64.64 by Ian Clatworthy
save tags known about in each branch
294
            self.cache_mgr.last_ref, self.tags)
0.64.34 by Ian Clatworthy
report lost branches
295
        branches_updated, branches_lost = updater.update()
296
        self._branch_count = len(branches_updated)
297
298
        # Tell the user about branches that were not created
299
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
300
            if not self.repo.is_shared():
301
                self.warning("Cannot import multiple branches into "
302
                    "an unshared repository")
303
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
304
            for lost_info in branches_lost:
305
                head_revision = lost_info[1]
306
                branch_name = lost_info[0]
0.64.67 by James Westby
Add support for -Dfast-import.
307
                self.note("\t %s = %s", head_revision, branch_name)
0.64.34 by Ian Clatworthy
report lost branches
308
309
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
310
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
311
        remind_about_update = True
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
312
        if self._branch_count == 0:
313
            self.note("no branches to update")
314
            self.note("no working trees to update")
315
            remind_about_update = False
316
        elif self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
317
            trees = self._get_working_trees(branches_updated)
318
            if trees:
319
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
320
                if self.verbose:
321
                    report = delta._ChangeReporter()
322
                else:
323
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
324
                for wt in trees:
325
                    wt.update(reporter)
326
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
327
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
328
            else:
329
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
330
        self.dump_stats()
0.64.51 by Ian Clatworthy
disable autopacking
331
332
        # Finish up by telling the user what to do next.
333
        if self._original_max_pack_count:
334
            # We earlier disabled autopacking, creating one pack every
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
335
            # checkpoint instead. We now pack the repository to optimise
336
            # how data is stored.
337
            if self._revision_count > self.checkpoint_every:
338
                self.note("Packing repository ...")
339
                self.repo.pack()
340
                # To be conservative, packing puts the old packs and
341
                # indices in obsolete_packs. We err on the side of
342
                # optimism and clear out that directory to save space.
343
                self.note("Removing obsolete packs ...")
344
                # TODO: Use a public API for this once one exists
345
                repo_transport = self.repo._pack_collection.transport
346
                repo_transport.clone('obsolete_packs').delete_multi(
347
                    repo_transport.list_dir('obsolete_packs'))
0.64.34 by Ian Clatworthy
report lost branches
348
        if remind_about_update:
0.64.75 by Ian Clatworthy
if checkpointed, pack repository and delete obsolete_packs
349
            # This message is explicitly not timestamped.
0.64.51 by Ian Clatworthy
disable autopacking
350
            note("To refresh the working tree for a branch, "
351
                "use 'bzr update'.")
0.64.41 by Ian Clatworthy
update multiple working trees if requested
352
353
    def _get_working_trees(self, branches):
354
        """Get the working trees for branches in the repository."""
355
        result = []
356
        wt_expected = self.repo.make_working_trees()
357
        for br in branches:
358
            if br == self.branch and br is not None:
359
                wt = self.working_tree
360
            elif wt_expected:
361
                try:
362
                    wt = br.bzrdir.open_workingtree()
363
                except errors.NoWorkingTree:
364
                    self.warning("No working tree for branch %s", br)
365
                    continue
366
            else:
367
                continue
368
            result.append(wt)
369
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
370
371
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
372
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
373
        rc = self._revision_count - self.skip_total
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
374
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
375
        wtc = self._tree_count
376
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
377
            rc, helpers.single_plural(rc, "revision", "revisions"),
378
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
379
            wtc, helpers.single_plural(wtc, "tree", "trees"),
380
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
381
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
382
    def _init_id_map(self):
383
        """Load the id-map and check it matches the repository.
384
        
385
        :return: the number of entries in the map
386
        """
387
        # Currently, we just check the size. In the future, we might
388
        # decide to be more paranoid and check that the revision-ids
389
        # are identical as well.
390
        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
391
            self.id_map_path)
392
        existing_count = len(self.repo.all_revision_ids())
0.64.106 by Ian Clatworthy
let the id-map file have more revisions than the repository
393
        if existing_count < known:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
394
            raise plugin_errors.BadRepositorySize(known, existing_count)
395
        return known
396
397
    def _save_id_map(self):
398
        """Save the id-map."""
399
        # Save the whole lot every time. If this proves a problem, we can
400
        # change to 'append just the new ones' at a later time.
401
        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
402
0.64.5 by Ian Clatworthy
first cut at generic processing method
403
    def blob_handler(self, cmd):
404
        """Process a BlobCommand."""
405
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
406
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
407
        else:
408
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
409
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
410
411
    def checkpoint_handler(self, cmd):
412
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
413
        # Commit the current write group and start a new one
414
        self.repo.commit_write_group()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
415
        self._save_id_map()
0.64.27 by Ian Clatworthy
1st cut at performance tuning
416
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
417
418
    def commit_handler(self, cmd):
419
        """Process a CommitCommand."""
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
420
        if self.skip_total and self._revision_count < self.skip_total:
421
            _track_heads(cmd, self.cache_mgr)
422
            # Check that we really do know about this commit-id
423
            if not self.cache_mgr.revision_ids.has_key(cmd.id):
424
                raise plugin_errors.BadRestart(cmd.id)
425
            # Consume the file commands and free any non-sticky blobs
426
            for fc in cmd.file_iter():
427
                pass
428
            self.cache_mgr._blobs = {}
429
            self._revision_count += 1
430
            # If we're finished getting back to where we were,
431
            # load the file-ids cache
432
            if self._revision_count == self.skip_total:
433
                self._gen_file_ids_cache()
434
                self.note("Generated the file-ids cache - %d entries",
435
                    len(self.cache_mgr.file_ids.keys()))
436
            return
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
437
        if self.first_incremental_commit:
438
            self.first_incremental_commit = None
439
            parents = _track_heads(cmd, self.cache_mgr)
440
            self._gen_file_ids_cache(parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
441
442
        # 'Commit' the revision and report progress
0.64.7 by Ian Clatworthy
start of multiple commit handling
443
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.48 by Ian Clatworthy
one revision loader instance
444
            self.loader, self.verbose, self._experimental)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
445
        handler.process()
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
446
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
0.64.27 by Ian Clatworthy
1st cut at performance tuning
447
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
448
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
449
450
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
451
        if (self.max_commits is not None and
452
            self._revision_count >= self.max_commits):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
453
            self.note("Stopping after reaching requested count of commits")
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
454
            self.finished = True
455
        elif self._revision_count % self.checkpoint_every == 0:
456
            self.note("%d commits - automatic checkpoint triggered",
457
                self._revision_count)
458
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
459
0.64.82 by Ian Clatworthy
Merge Pieter de Bie's export-fixes branch
460
    def _gen_file_ids_cache(self, revs=False):
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
461
        """Generate the file-id cache by searching repository inventories.
462
        """
463
        # Get the interesting revisions - the heads
0.68.7 by Pieter de Bie
Add importing and exporting of marks to bzr-fastimport
464
        if revs:
465
            head_ids = revs
466
        else:
467
            head_ids = self.cache_mgr.heads.keys()
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
468
        revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
469
470
        # Update the fileid cache
471
        file_ids = {}
472
        for revision_id in revision_ids:
473
            inv = self.repo.revision_tree(revision_id).inventory
0.64.93 by Ian Clatworthy
minor comment clean-ups
474
            # Cache the inventories while we're at it
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
475
            self.cache_mgr.inventories[revision_id] = inv
476
            for path, ie in inv.iter_entries():
477
                file_ids[path] = ie.file_id
478
        self.cache_mgr.file_ids = file_ids
479
0.64.25 by Ian Clatworthy
slightly better progress reporting
480
    def report_progress(self, details=''):
481
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
482
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
483
            if self.total_commits is not None:
484
                counts = "%d/%d" % (self._revision_count, self.total_commits)
485
                eta = progress.get_eta(self._start_time, self._revision_count,
486
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
487
                eta_str = progress.str_tdelta(eta)
488
                if eta_str.endswith('--'):
489
                    eta_str = ''
490
                else:
491
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
492
            else:
493
                counts = "%d" % (self._revision_count,)
494
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
495
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
496
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
497
    def progress_handler(self, cmd):
498
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
499
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
500
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
501
502
    def reset_handler(self, cmd):
503
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
504
        if cmd.ref.startswith('refs/tags/'):
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
505
            tag_name = cmd.ref[len('refs/tags/'):]
0.64.95 by Ian Clatworthy
only output warning about missing from clause for lightweight tags in verbose mode
506
            if cmd.from_ is not None:
507
                self._set_tag(tag_name, cmd.from_)
508
            elif self.verbose:
0.64.94 by Ian Clatworthy
ignore lightweight tags without a from clause
509
                self.warning("ignoring reset refs/tags/%s - no from clause"
510
                    % tag_name)
0.64.109 by Ian Clatworthy
initial cut at reset support
511
            return
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
512
0.75.2 by Brian de Alwis
Reset takes a <commitsh> and not just a revid; added note to
513
	# FIXME: cmd.from_ is a committish and thus could reference
0.64.109 by Ian Clatworthy
initial cut at reset support
514
	# another branch.  Create a method for resolving commitish's.
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
515
        if cmd.from_ is not None:
0.64.109 by Ian Clatworthy
initial cut at reset support
516
            self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
517
            # Why is this required now vs at the end?
518
            #updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
519
            #    helpers.invert_dictset(self.cache_mgr.heads),
520
            #    self.cache_mgr.last_ref, self.tags)
521
            #updater.update()
0.64.5 by Ian Clatworthy
first cut at generic processing method
522
523
    def tag_handler(self, cmd):
524
        """Process a TagCommand."""
0.64.107 by Ian Clatworthy
warn on tags with a missing from clause
525
        if cmd.from_ is not None:
526
            self._set_tag(cmd.id, cmd.from_)
527
        else:
528
            self.warning("ignoring tag %s - no from clause" % cmd.id)
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
529
530
    def _set_tag(self, name, from_):
0.64.93 by Ian Clatworthy
minor comment clean-ups
531
        """Define a tag given a name and import 'from' reference."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
532
        bzr_tag_name = name.decode('utf-8', 'replace')
533
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
534
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
535
536
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
537
class GenericCacheManager(object):
538
    """A manager of caches for the GenericProcessor."""
539
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
540
    def __init__(self, info, verbose=False, inventory_cache_size=10):
541
        """Create a manager of caches.
542
543
        :param info: a ConfigObj holding the output from
544
            the --info processor, or None if no hints are available
545
        """
546
        self.verbose = verbose
547
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
548
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
549
        # Sticky blobs aren't removed after being referenced.
550
        self._blobs = {}
551
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
552
553
        # revision-id -> Inventory cache
554
        # these are large and we probably don't need too many as
555
        # most parents are recent in history
556
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
557
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
558
        # import commmit-ids -> revision-id lookup table
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
559
        # we need to keep all of these but they are small
560
        self.revision_ids = {}
561
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
562
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
563
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
564
0.75.1 by Brian de Alwis
Add support for multiple branches by supporting the 'reset' command.
565
        # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
566
        self.last_ref = None
567
        self.last_ids = {}
568
        self.heads = {}
569
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
570
        # Work out the blobs to make sticky - None means all
0.64.25 by Ian Clatworthy
slightly better progress reporting
571
        self._blobs_to_keep = None
572
        if info is not None:
573
            try:
574
                self._blobs_to_keep = info['Blob usage tracking']['multi']
575
            except KeyError:
576
                # info not in file - possible when no blobs used
577
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
578
579
    def store_blob(self, id, data):
580
        """Store a blob of data."""
581
        if (self._blobs_to_keep is None or data == '' or
582
            id in self._blobs_to_keep):
583
            self._sticky_blobs[id] = data
584
        else:
585
            self._blobs[id] = data
586
587
    def fetch_blob(self, id):
588
        """Fetch a blob of data."""
589
        try:
590
            return self._sticky_blobs[id]
591
        except KeyError:
592
            return self._blobs.pop(id)
593
0.64.16 by Ian Clatworthy
safe processing tweaks
594
    def _delete_path(self, path):
595
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
596
        # we actually want to remember what file-id we gave a path,
597
        # even when that file is deleted, so doing nothing is correct
598
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
599
600
    def _rename_path(self, old_path, new_path):
601
        """Rename a path in the caches."""
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
602
        # In this case, we need to forget the file-id we gave a path,
603
        # otherwise, we'll get duplicate file-ids in the repository.
0.64.16 by Ian Clatworthy
safe processing tweaks
604
        self.file_ids[new_path] = self.file_ids[old_path]
0.64.66 by Ian Clatworthy
fix a duplicate file-id after rename bug
605
        del self.file_ids[old_path]
0.64.16 by Ian Clatworthy
safe processing tweaks
606
0.64.109 by Ian Clatworthy
initial cut at reset support
607
    def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
608
        if parents is not None:
609
            for parent in parents:
610
                refs = self.heads.get(parent)
611
                if refs:
612
                    refs.discard(cmd_ref)
613
                    if not refs:
614
                        del self.heads[parent]
615
        self.heads.setdefault(cmd_id, set()).add(cmd_ref)
616
        self.last_ids[cmd_ref] = cmd_id
617
        self.last_ref = cmd_ref
618
0.64.16 by Ian Clatworthy
safe processing tweaks
619
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
620
def _track_heads(cmd, cache_mgr):
621
    """Track the repository heads given a CommitCommand.
622
    
623
    :return: the list of parents in terms of commit-ids
624
    """
625
    # Get the true set of parents
0.64.60 by Ian Clatworthy
support merges when from clause implicit
626
    if cmd.from_ is not None:
627
        parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
628
    else:
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
629
        last_id = cache_mgr.last_ids.get(cmd.ref)
630
        if last_id is not None:
631
            parents = [last_id]
632
        else:
633
            parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
634
    parents.extend(cmd.merges)
0.64.109 by Ian Clatworthy
initial cut at reset support
635
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
636
    # Track the heads
0.64.109 by Ian Clatworthy
initial cut at reset support
637
    cache_mgr.track_heads_for_ref(cmd.ref, cmd.id, parents)
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
638
    return parents
639
640
0.64.5 by Ian Clatworthy
first cut at generic processing method
641
class GenericCommitHandler(processor.CommitHandler):
642
0.64.48 by Ian Clatworthy
one revision loader instance
643
    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
644
        _experimental=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
645
        processor.CommitHandler.__init__(self, command)
646
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
647
        self.cache_mgr = cache_mgr
0.64.48 by Ian Clatworthy
one revision loader instance
648
        self.loader = loader
0.64.14 by Ian Clatworthy
commit of modified files working
649
        self.verbose = verbose
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
650
        self._experimental = _experimental
0.64.5 by Ian Clatworthy
first cut at generic processing method
651
652
    def pre_process_files(self):
653
        """Prepare for committing."""
654
        self.revision_id = self.gen_revision_id()
655
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
656
        self.lines_for_commit = {}
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
657
        if self.repo.supports_rich_root():
658
            self.lines_for_commit[inventory.ROOT_ID] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
659
0.64.50 by Ian Clatworthy
cleanly restart after an interruption - basic mirroring
660
        # Track the heads and get the real parent list
661
        parents = _track_heads(self.command, self.cache_mgr)
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
662
0.64.93 by Ian Clatworthy
minor comment clean-ups
663
        # Convert the parent commit-ids to bzr revision-ids
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
664
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
665
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
666
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
667
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
668
            self.parents = []
0.64.109 by Ian Clatworthy
initial cut at reset support
669
        self.debug("%s id: %s, parents: %s", self.command.id,
670
            self.revision_id, str(self.parents))
0.64.7 by Ian Clatworthy
start of multiple commit handling
671
0.64.14 by Ian Clatworthy
commit of modified files working
672
        # Seed the inventory from the previous one
673
        if len(self.parents) == 0:
674
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
675
        else:
676
            # use the bzr_revision_id to lookup the inv cache
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
677
            inv = self.get_inventory(self.parents[0])
678
            # TODO: Shallow copy - deep inventory copying is expensive
679
            self.inventory = inv.copy()
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
680
        if self.repo.supports_rich_root():
681
            self.inventory.revision_id = self.revision_id
682
        else:
0.64.13 by Ian Clatworthy
commit of new files working
683
            # In this repository, root entries have no knit or weave. When
684
            # serializing out to disk and back in, root.revision is always
685
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
686
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
687
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
688
        # directory-path -> inventory-entry for current inventory
689
        self.directory_entries = dict(self.inventory.directories())
690
0.64.14 by Ian Clatworthy
commit of modified files working
691
    def post_process_files(self):
692
        """Save the revision."""
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
693
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
694
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
695
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
696
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
697
        committer = self.command.committer
698
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
699
        author = self.command.author
700
        if author is not None:
701
            author_id = "%s <%s>" % (author[0],author[1])
702
            if author_id != who:
703
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
704
        rev = revision.Revision(
705
           timestamp=committer[2],
706
           timezone=committer[3],
707
           committer=who,
0.78.2 by Ian Clatworthy
move escape_commit_message into helpers
708
           message=helpers.escape_commit_message(self.command.message),
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
709
           revision_id=self.revision_id,
710
           properties=rev_props,
711
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
712
        self.loader.load(rev, self.inventory, None,
0.64.48 by Ian Clatworthy
one revision loader instance
713
            lambda file_id: self._get_lines(file_id),
714
            lambda revision_ids: self._get_inventories(revision_ids))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
715
0.64.5 by Ian Clatworthy
first cut at generic processing method
716
    def modify_handler(self, filecmd):
717
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
718
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
719
        else:
720
            data = filecmd.data
0.64.67 by James Westby
Add support for -Dfast-import.
721
        self.debug("modifying %s", filecmd.path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
722
        self._modify_inventory(filecmd.path, filecmd.kind,
723
            filecmd.is_executable, data)
724
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
725
    def _delete_recursive(self, path):
0.64.67 by James Westby
Add support for -Dfast-import.
726
        self.debug("deleting %s", path)
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
727
        fileid = self.bzr_file_id(path)
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
728
        dirname, basename = osutils.split(path)
729
        if (fileid in self.inventory and
730
            isinstance(self.inventory[fileid], inventory.InventoryDirectory)):
731
            for child_path in self.inventory[fileid].children.keys():
732
                self._delete_recursive(os.utils.pathjoin(path, child_path))
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
733
        try:
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
734
            if self.inventory.id2path(fileid) == path:
735
                del self.inventory[fileid]
736
            else:
737
                # already added by some other name?
738
                if dirname in self.cache_mgr.file_ids:
739
                    parent_id = self.cache_mgr.file_ids[dirname]
740
                    del self.inventory[parent_id].children[basename]
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
741
        except KeyError:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
742
            self._warn_unless_in_merges(fileid, path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
743
        except errors.NoSuchId:
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
744
            self._warn_unless_in_merges(fileid, path)
0.64.102 by Ian Clatworthy
Handle a directory becoming a file and subsequent child deletes
745
        except AttributeError, ex:
746
            if ex.args[0] == 'children':
747
                # A directory has changed into a file and then one
748
                # of it's children is being deleted!
749
                self._warn_unless_in_merges(fileid, path)
750
            else:
751
                raise
0.64.45 by Ian Clatworthy
fix compatibility with Python 2.4
752
        try:
753
            self.cache_mgr._delete_path(path)
754
        except KeyError:
755
            pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
756
0.64.108 by Ian Clatworthy
recursively delete children when a directory is deleted
757
    def delete_handler(self, filecmd):
758
        self._delete_recursive(filecmd.path)
759
0.64.63 by Ian Clatworthy
remove warning about delete iff file is in a merge parent
760
    def _warn_unless_in_merges(self, fileid, path):
761
        if len(self.parents) <= 1:
762
            return
763
        for parent in self.parents[1:]:
764
            if fileid in self.get_inventory(parent):
765
                return
766
        self.warning("ignoring delete of %s as not in parent inventories", path)
767
0.64.5 by Ian Clatworthy
first cut at generic processing method
768
    def copy_handler(self, filecmd):
0.76.2 by Ian Clatworthy
code & tests for file copying
769
        src_path = filecmd.src_path
770
        dest_path = filecmd.dest_path
771
        self.debug("copying %s to %s", src_path, dest_path)
772
        if not self.parents:
773
            self.warning("ignoring copy of %s to %s - no parent revisions",
774
                src_path, dest_path)
775
            return
776
        file_id = self.inventory.path2id(src_path)
777
        if file_id is None:
778
            self.warning("ignoring copy of %s to %s - source does not exist",
779
                src_path, dest_path)
780
            return
781
        ie = self.inventory[file_id]
782
        kind = ie.kind
783
        if kind == 'file':
784
            content = self._get_content_from_repo(self.parents[0], file_id)
785
            self._modify_inventory(dest_path, kind, ie.executable, content)
786
        elif kind == 'symlink':
787
            self._modify_inventory(dest_path, kind, False, ie.symlink_target)
788
        else:
789
            self.warning("ignoring copy of %s %s - feature not yet supported",
790
                kind, path)
791
792
    def _get_content_from_repo(self, revision_id, file_id):
793
        """Get the content of a file for a revision-id."""
794
        revtree = self.repo.revision_tree(revision_id)
795
        return revtree.get_file_text(file_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
796
797
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
798
        old_path = filecmd.old_path
799
        new_path = filecmd.new_path
0.64.67 by James Westby
Add support for -Dfast-import.
800
        self.debug("renaming %s to %s", old_path, new_path)
0.64.16 by Ian Clatworthy
safe processing tweaks
801
        file_id = self.bzr_file_id(old_path)
0.65.4 by James Westby
Make the rename handling more robust.
802
        basename, new_parent_ie = self._ensure_directory(new_path)
803
        new_parent_id = new_parent_ie.file_id
0.64.67 by James Westby
Add support for -Dfast-import.
804
        existing_id = self.inventory.path2id(new_path)
805
        if existing_id is not None:
806
            self.inventory.remove_recursive_id(existing_id)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
807
        ie = self.inventory[file_id]
808
        lines = self.loader._get_lines(file_id, ie.revision)
809
        self.lines_for_commit[file_id] = lines
0.65.4 by James Westby
Make the rename handling more robust.
810
        self.inventory.rename(file_id, new_parent_id, basename)
0.64.16 by Ian Clatworthy
safe processing tweaks
811
        self.cache_mgr._rename_path(old_path, new_path)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
812
        self.inventory[file_id].revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
813
814
    def deleteall_handler(self, filecmd):
0.73.1 by Miklos Vajna
Implement the 'deleteall' command.
815
        self.debug("deleting all files (and also all directories)")
0.64.110 by Ian Clatworthy
make deleteall less agressive in the files it tries to delete
816
        # Would be nice to have an inventory.clear() method here
817
        root_items = [ie for (name, ie) in
818
            self.inventory.root.children.iteritems()]
819
        for root_item in root_items:
820
            self.inventory.remove_recursive_id(root_item.file_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
821
0.64.16 by Ian Clatworthy
safe processing tweaks
822
    def bzr_file_id_and_new(self, path):
823
        """Get a Bazaar file identifier and new flag for a path.
824
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
825
        :return: file_id, is_new where
826
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
827
        """
828
        try:
0.64.67 by James Westby
Add support for -Dfast-import.
829
            id = self.cache_mgr.file_ids[path]
830
            return id, False
0.64.16 by Ian Clatworthy
safe processing tweaks
831
        except KeyError:
832
            id = generate_ids.gen_file_id(path)
833
            self.cache_mgr.file_ids[path] = id
0.64.67 by James Westby
Add support for -Dfast-import.
834
            self.debug("Generated new file id %s for '%s'", id, path)
0.64.16 by Ian Clatworthy
safe processing tweaks
835
            return id, True
836
0.64.5 by Ian Clatworthy
first cut at generic processing method
837
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
838
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
839
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
840
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
841
    def gen_initial_inventory(self):
842
        """Generate an inventory for a parentless revision."""
843
        inv = inventory.Inventory(revision_id=self.revision_id)
0.64.96 by Ian Clatworthy
fix support for rich-root (and subtree) repositories
844
        if self.repo.supports_rich_root():
845
            # The very first root needs to have the right revision
846
            inv.root.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
847
        return inv
848
0.64.5 by Ian Clatworthy
first cut at generic processing method
849
    def gen_revision_id(self):
850
        """Generate a revision id.
851
852
        Subclasses may override this to produce deterministic ids say.
853
        """
854
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
855
        # Perhaps 'who' being the person running the import is ok? If so,
856
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
857
        who = "%s <%s>" % (committer[0],committer[1])
858
        timestamp = committer[2]
859
        return generate_ids.gen_revision_id(who, timestamp)
860
0.64.7 by Ian Clatworthy
start of multiple commit handling
861
    def get_inventory(self, revision_id):
862
        """Get the inventory for a revision id."""
863
        try:
864
            inv = self.cache_mgr.inventories[revision_id]
865
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
866
            if self.verbose:
867
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
868
            # Not cached so reconstruct from repository
869
            inv = self.repo.revision_tree(revision_id).inventory
870
            self.cache_mgr.inventories[revision_id] = inv
871
        return inv
872
0.64.5 by Ian Clatworthy
first cut at generic processing method
873
    def _get_inventories(self, revision_ids):
874
        """Get the inventories for revision-ids.
875
        
876
        This is a callback used by the RepositoryLoader to
0.64.93 by Ian Clatworthy
minor comment clean-ups
877
        speed up inventory reconstruction.
878
        """
0.64.5 by Ian Clatworthy
first cut at generic processing method
879
        present = []
880
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
881
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
882
        # successfully loaded into the repsoitory
883
        for revision_id in revision_ids:
884
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
885
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
886
                present.append(revision_id)
887
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
888
                if self.verbose:
889
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
890
                # Not cached so reconstruct from repository
891
                if self.repo.has_revision(revision_id):
892
                    rev_tree = self.repo.revision_tree(revision_id)
893
                    present.append(revision_id)
894
                else:
895
                    rev_tree = self.repo.revision_tree(None)
896
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
897
                self.cache_mgr.inventories[revision_id] = inv
898
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
899
        return present, inventories
900
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
901
    def _get_lines(self, file_id):
902
        """Get the lines for a file-id."""
903
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
904
905
    def _modify_inventory(self, path, kind, is_executable, data):
906
        """Add to or change an item in the inventory."""
907
        # Create the new InventoryEntry
908
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
909
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
910
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
911
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
912
        if isinstance(ie, inventory.InventoryFile):
913
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
914
            lines = osutils.split_lines(data)
915
            ie.text_sha1 = osutils.sha_strings(lines)
916
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
917
            self.lines_for_commit[file_id] = lines
0.64.73 by James Westby
Correct typo: InventoryLnk -> InventoryLink
918
        elif isinstance(ie, inventory.InventoryLink):
0.64.74 by Ian Clatworthy
fix symlink importing
919
            ie.symlink_target = data.encode('utf8')
920
            # There are no lines stored for a symlink so
921
            # make sure the cache used by get_lines knows that
922
            self.lines_for_commit[file_id] = []
0.64.5 by Ian Clatworthy
first cut at generic processing method
923
        else:
924
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
925
                (kind,))
926
0.64.16 by Ian Clatworthy
safe processing tweaks
927
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
928
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
929
            # HACK: no API for this (del+add does more than it needs to)
930
            self.inventory._byid[file_id] = ie
0.64.61 by Ian Clatworthy
fix missing revisions bug
931
            parent_ie.children[basename] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
932
        else:
933
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
934
935
    def _ensure_directory(self, path):
936
        """Ensure that the containing directory exists for 'path'"""
937
        dirname, basename = osutils.split(path)
938
        if dirname == '':
939
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
940
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
941
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
942
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
943
        except KeyError:
944
            # We will create this entry, since it doesn't exist
945
            pass
946
        else:
947
            return basename, ie
948
949
        # No directory existed, we will just create one, first, make sure
950
        # the parent exists
951
        dir_basename, parent_ie = self._ensure_directory(dirname)
952
        dir_file_id = self.bzr_file_id(dirname)
953
        ie = inventory.entry_factory['directory'](dir_file_id,
954
                                                  dir_basename,
955
                                                  parent_ie.file_id)
956
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
957
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
958
        # There are no lines stored for a directory so
959
        # make sure the cache used by get_lines knows that
960
        self.lines_for_commit[dir_file_id] = []
0.64.47 by Ian Clatworthy
add option for enabling experimental stuff
961
        #print "adding dir for %s" % path
0.64.16 by Ian Clatworthy
safe processing tweaks
962
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
963
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
964
965
0.64.34 by Ian Clatworthy
report lost branches
966
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
967
0.64.64 by Ian Clatworthy
save tags known about in each branch
968
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
969
        """Create an object responsible for updating branches.
970
971
        :param heads_by_ref: a dictionary where
972
          names are git-style references like refs/heads/master;
973
          values are one item lists of commits marks.
974
        """
0.64.37 by Ian Clatworthy
create branches as required
975
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
976
        self.branch = branch
977
        self.cache_mgr = cache_mgr
978
        self.heads_by_ref = heads_by_ref
979
        self.last_ref = last_ref
0.64.64 by Ian Clatworthy
save tags known about in each branch
980
        self.tags = tags
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
981
982
    def update(self):
983
        """Update the Bazaar branches and tips matching the heads.
984
985
        If the repository is shared, this routine creates branches
986
        as required. If it isn't, warnings are produced about the
987
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
988
0.64.34 by Ian Clatworthy
report lost branches
989
        :return: updated, lost_heads where
990
          updated = the list of branches updated
991
          lost_heads = a list of (bazaar-name,revision) for branches that
992
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
993
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
994
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
995
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
996
        for br, tip in branch_tips:
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
997
            if self._update_branch(br, tip):
998
                updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
999
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1000
1001
    def _get_matching_branches(self):
1002
        """Get the Bazaar branches.
1003
0.64.93 by Ian Clatworthy
minor comment clean-ups
1004
        :return: default_tip, branch_tips, lost_heads where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1005
          default_tip = the last commit mark for the default branch
1006
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
1007
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
1008
            would have been created had the repository been shared and
1009
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1010
        """
0.64.37 by Ian Clatworthy
create branches as required
1011
        branch_tips = []
1012
        lost_heads = []
1013
        ref_names = self.heads_by_ref.keys()
1014
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1015
            trunk = self.select_trunk(ref_names)
1016
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
1017
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1018
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
1019
1020
        # Convert the reference names into Bazaar speak
1021
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
1022
0.64.37 by Ian Clatworthy
create branches as required
1023
        # Policy for locating branches
1024
        def dir_under_current(name, ref_name):
1025
            # Using the Bazaar name, get a directory under the current one
1026
            return name
1027
        def dir_sister_branch(name, ref_name):
1028
            # Using the Bazaar name, get a sister directory to the branch
1029
            return osutils.pathjoin(self.branch.base, "..", name)
1030
        if self.branch is not None:
1031
            dir_policy = dir_sister_branch
1032
        else:
1033
            dir_policy = dir_under_current
1034
0.64.34 by Ian Clatworthy
report lost branches
1035
        # Create/track missing branches
1036
        shared_repo = self.repo.is_shared()
1037
        for name in sorted(bzr_names.keys()):
1038
            ref_name = bzr_names[name]
1039
            tip = self.heads_by_ref[ref_name][0]
1040
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
1041
                location = dir_policy(name, ref_name)
1042
                try:
1043
                    br = self.make_branch(location)
1044
                    branch_tips.append((br,tip))
1045
                    continue
1046
                except errors.BzrError, ex:
1047
                    error("ERROR: failed to create branch %s: %s",
1048
                        location, ex)
1049
            lost_head = self.cache_mgr.revision_ids[tip]
1050
            lost_info = (name, lost_head)
1051
            lost_heads.append(lost_info)
1052
        return branch_tips, lost_heads
1053
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
1054
    def select_trunk(self, ref_names):
1055
        """Given a set of ref names, choose one as the trunk."""
1056
        for candidate in ['refs/heads/master']:
1057
            if candidate in ref_names:
1058
                return candidate
1059
        # Use the last reference in the import stream
1060
        return self.last_ref
1061
0.64.37 by Ian Clatworthy
create branches as required
1062
    def make_branch(self, location):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1063
        """Make a branch in the repository if not already there."""
1064
        try:
1065
            return bzrdir.BzrDir.open(location).open_branch()
1066
        except errors.NotBranchError, ex:
1067
            return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
1068
1069
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
1070
        """Generate Bazaar branch names from import ref names.
1071
        
1072
        :return: a dictionary with Bazaar names as keys and
1073
          the original reference names as values.
1074
        """
0.64.34 by Ian Clatworthy
report lost branches
1075
        bazaar_names = {}
1076
        for ref_name in sorted(ref_names):
1077
            parts = ref_name.split('/')
1078
            if parts[0] == 'refs':
1079
                parts.pop(0)
1080
            full_name = "--".join(parts)
1081
            bazaar_name = parts[-1]
1082
            if bazaar_name in bazaar_names:
0.64.109 by Ian Clatworthy
initial cut at reset support
1083
                if parts[0] == 'remotes':
1084
                    bazaar_name += ".remote"
1085
                else:
1086
                    bazaar_name = full_name
0.64.34 by Ian Clatworthy
report lost branches
1087
            bazaar_names[bazaar_name] = ref_name
1088
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1089
1090
    def _update_branch(self, br, last_mark):
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1091
        """Update a branch with last revision and tag information.
1092
        
1093
        :return: whether the branch was changed or not
1094
        """
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
1095
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
0.64.64 by Ian Clatworthy
save tags known about in each branch
1096
        revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1097
        revno = len(revs)
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1098
        existing_revno, existing_last_rev_id = br.last_revision_info()
1099
        changed = False
1100
        if revno != existing_revno or last_rev_id != existing_last_rev_id:
1101
            br.set_last_revision_info(revno, last_rev_id)
1102
            changed = True
0.64.64 by Ian Clatworthy
save tags known about in each branch
1103
        # apply tags known in this branch
1104
        my_tags = {}
1105
        if self.tags:
1106
            for tag,rev in self.tags.items():
1107
                if rev in revs:
1108
                    my_tags[tag] = rev
1109
            if my_tags:
1110
                br.tags._set_tag_dict(my_tags)
1111
                changed = True
1112
        if changed:
1113
            tagno = len(my_tags)
1114
            note("\t branch %s now has %d %s and %d %s", br.nick,
1115
                revno, helpers.single_plural(revno, "revision", "revisions"),
1116
                tagno, helpers.single_plural(tagno, "tag", "tags"))
0.64.54 by Ian Clatworthy
handle existing branches and only count the branches really updated
1117
        return changed