/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
36
from bzrlib.trace import (
37
    note,
38
    warning,
0.64.37 by Ian Clatworthy
create branches as required
39
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
40
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
41
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
42
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
43
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
44
    processor,
45
    revisionloader,
46
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
48
0.64.41 by Ian Clatworthy
update multiple working trees if requested
49
# How many commits before automatically reporting progress
50
_DEFAULT_AUTO_PROGRESS = 1000
51
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
52
# How many commits before automatically checkpointing
53
_DEFAULT_AUTO_CHECKPOINT = 10000
54
0.64.41 by Ian Clatworthy
update multiple working trees if requested
55
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
56
class GenericProcessor(processor.ImportProcessor):
57
    """An import processor that handles basic imports.
58
59
    Current features supported:
60
0.64.16 by Ian Clatworthy
safe processing tweaks
61
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
62
    * files and symlinks commits are supported
63
    * checkpoints automatically happen at a configurable frequency
64
      over and above the stream requested checkpoints
65
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
66
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
68
69
    Here are the supported parameters:
70
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
71
    * info - name of a hints file holding the analysis generated
72
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
73
      importing large repositories, this parameter is needed so
74
      that the importer knows what blobs to intelligently cache.
75
0.64.41 by Ian Clatworthy
update multiple working trees if requested
76
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
77
      By default, the importer updates the repository
78
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
79
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
80
81
    * checkpoint - automatically checkpoint every n commits over and
82
      above any checkpoints contained in the import stream.
83
      The default is 10000.
84
85
    * count - only import this many commits then exit. If not set,
86
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
87
    """
88
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
89
    known_params = ['info', 'trees', 'checkpoint', 'count']
90
91
    def note(self, msg, *args):
92
        """Output a note but timestamp it."""
93
        msg = "%s %s" % (self._time_of_day(), msg)
94
        note(msg, *args)
95
96
    def warning(self, msg, *args):
97
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
98
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
99
        warning(msg, *args)
100
101
    def _time_of_day(self):
102
        """Time of day as a string."""
103
        # Note: this is a separate method so tests can patch in a fixed value
104
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
105
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
106
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
107
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
108
        self._load_info_and_params()
109
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
110
        self.init_stats()
111
112
        # mapping of tag name to revision_id
113
        self.tags = {}
114
115
        # Create a write group. This is committed at the end of the import.
116
        # Checkpointing closes the current one and starts a new one.
117
        self.repo.start_write_group()
118
119
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
120
        # Load the info file, if any
121
        info_path = self.params.get('info')
122
        if info_path is not None:
123
            self.info = configobj.ConfigObj(info_path)
124
        else:
125
            self.info = None
126
0.64.41 by Ian Clatworthy
update multiple working trees if requested
127
        # Decide how often to automatically report progress
128
        # (not a parameter yet)
129
        self.progress_every = _DEFAULT_AUTO_PROGRESS
130
        if self.verbose:
131
            self.progress_every = self.progress_every / 10
132
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
133
        # Decide how often to automatically checkpoint
134
        self.checkpoint_every = int(self.params.get('checkpoint',
135
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
136
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
137
        # Find the maximum number of commits to import (None means all)
138
        # and prepare progress reporting. Just in case the info file
139
        # has an outdated count of commits, we store the max counts
140
        # at which we need to terminate separately to the total used
141
        # for progress tracking.
142
        try:
143
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
144
            if self.max_commits < 0:
145
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
146
        except KeyError:
147
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
148
        if self.info is not None:
149
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
150
            if (self.max_commits is not None and
151
                self.total_commits > self.max_commits):
152
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
153
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
154
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
155
0.64.27 by Ian Clatworthy
1st cut at performance tuning
156
157
    def _process(self, command_iter):
158
        # if anything goes wrong, abort the write group if any
159
        try:
160
            processor.ImportProcessor._process(self, command_iter)
161
        except:
162
            if self.repo is not None and self.repo.is_in_write_group():
163
                self.repo.abort_write_group()
164
            raise
165
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
166
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
167
        # Commit the current write group.
168
        self.repo.commit_write_group()
169
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
170
        # Update the branches
171
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
172
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
173
            helpers.invert_dict(self.cache_mgr.heads),
174
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
175
        branches_updated, branches_lost = updater.update()
176
        self._branch_count = len(branches_updated)
177
178
        # Tell the user about branches that were not created
179
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
180
            if not self.repo.is_shared():
181
                self.warning("Cannot import multiple branches into "
182
                    "an unshared repository")
183
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
184
            for lost_info in branches_lost:
185
                head_revision = lost_info[1]
186
                branch_name = lost_info[0]
187
                note("\t %s = %s", head_revision, branch_name)
188
189
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
190
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
191
        remind_about_update = True
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
192
        if self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
193
            trees = self._get_working_trees(branches_updated)
194
            if trees:
195
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
196
                if self.verbose:
197
                    report = delta._ChangeReporter()
198
                else:
199
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
200
                for wt in trees:
201
                    wt.update(reporter)
202
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
203
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
204
            else:
205
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
206
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
207
        if remind_about_update:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
208
            self.note("To refresh the working tree for a branch, "
209
                "use 'bzr update'")
210
211
    def _get_working_trees(self, branches):
212
        """Get the working trees for branches in the repository."""
213
        result = []
214
        wt_expected = self.repo.make_working_trees()
215
        for br in branches:
216
            if br == self.branch and br is not None:
217
                wt = self.working_tree
218
            elif wt_expected:
219
                try:
220
                    wt = br.bzrdir.open_workingtree()
221
                except errors.NoWorkingTree:
222
                    self.warning("No working tree for branch %s", br)
223
                    continue
224
            else:
225
                continue
226
            result.append(wt)
227
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
228
229
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
230
        self._revision_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
231
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
232
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
233
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
234
        rc = self._revision_count
235
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
236
        wtc = self._tree_count
237
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
238
            rc, helpers.single_plural(rc, "revision", "revisions"),
239
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
240
            wtc, helpers.single_plural(wtc, "tree", "trees"),
241
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
242
0.64.5 by Ian Clatworthy
first cut at generic processing method
243
    def blob_handler(self, cmd):
244
        """Process a BlobCommand."""
245
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
246
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
247
        else:
248
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
249
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
250
251
    def checkpoint_handler(self, cmd):
252
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
253
        # Commit the current write group and start a new one
254
        self.repo.commit_write_group()
255
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
256
257
    def commit_handler(self, cmd):
258
        """Process a CommitCommand."""
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
259
        # 'Commit' the revision
0.64.7 by Ian Clatworthy
start of multiple commit handling
260
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
261
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
262
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
263
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
264
        # Update caches
265
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
266
        self.cache_mgr.last_ids[cmd.ref] = cmd.id
267
        self.cache_mgr.last_ref = cmd.ref
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
268
269
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
270
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
271
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
272
273
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
274
        if (self.max_commits is not None and
275
            self._revision_count >= self.max_commits):
276
            self.note("stopping after reaching requested count of commits")
277
            self.finished = True
278
        elif self._revision_count % self.checkpoint_every == 0:
279
            self.note("%d commits - automatic checkpoint triggered",
280
                self._revision_count)
281
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
282
0.64.25 by Ian Clatworthy
slightly better progress reporting
283
    def report_progress(self, details=''):
284
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
285
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
286
            if self.total_commits is not None:
287
                counts = "%d/%d" % (self._revision_count, self.total_commits)
288
                eta = progress.get_eta(self._start_time, self._revision_count,
289
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
290
                eta_str = progress.str_tdelta(eta)
291
                if eta_str.endswith('--'):
292
                    eta_str = ''
293
                else:
294
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
295
            else:
296
                counts = "%d" % (self._revision_count,)
297
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
298
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
299
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
300
    def progress_handler(self, cmd):
301
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
302
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
303
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
304
305
    def reset_handler(self, cmd):
306
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
307
        if cmd.ref.startswith('refs/tags/'):
308
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
309
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
310
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
311
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
312
313
    def tag_handler(self, cmd):
314
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
315
        self._set_tag(cmd.id, cmd.from_)
316
317
    def _set_tag(self, name, from_):
318
        """Define a tag given a name an import 'from' reference."""
319
        bzr_tag_name = name.decode('utf-8', 'replace')
320
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
321
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
322
323
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
324
class GenericCacheManager(object):
325
    """A manager of caches for the GenericProcessor."""
326
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
327
    def __init__(self, info, verbose=False, inventory_cache_size=10):
328
        """Create a manager of caches.
329
330
        :param info: a ConfigObj holding the output from
331
            the --info processor, or None if no hints are available
332
        """
333
        self.verbose = verbose
334
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
335
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
336
        # Sticky blobs aren't removed after being referenced.
337
        self._blobs = {}
338
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
339
340
        # revision-id -> Inventory cache
341
        # these are large and we probably don't need too many as
342
        # most parents are recent in history
343
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
344
345
        # import-ref -> revision-id lookup table
346
        # we need to keep all of these but they are small
347
        self.revision_ids = {}
348
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
349
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
350
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
351
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
352
        # Head tracking: last ref, last id per ref & map of commit mark to ref
353
        self.last_ref = None
354
        self.last_ids = {}
355
        self.heads = {}
356
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
357
        # Work out the blobs to make sticky - None means all
358
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
359
        self._blobs_to_keep = None
360
        if info is not None:
361
            try:
362
                self._blobs_to_keep = info['Blob usage tracking']['multi']
363
            except KeyError:
364
                # info not in file - possible when no blobs used
365
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
366
367
    def store_blob(self, id, data):
368
        """Store a blob of data."""
369
        if (self._blobs_to_keep is None or data == '' or
370
            id in self._blobs_to_keep):
371
            self._sticky_blobs[id] = data
372
        else:
373
            self._blobs[id] = data
374
375
    def fetch_blob(self, id):
376
        """Fetch a blob of data."""
377
        try:
378
            return self._sticky_blobs[id]
379
        except KeyError:
380
            return self._blobs.pop(id)
381
0.64.16 by Ian Clatworthy
safe processing tweaks
382
    def _delete_path(self, path):
383
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
384
        # we actually want to remember what file-id we gave a path,
385
        # even when that file is deleted, so doing nothing is correct
386
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
387
388
    def _rename_path(self, old_path, new_path):
389
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
390
        # we actually want to remember what file-id we gave a path,
391
        # even when that file is renamed, so both paths should have
392
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
393
        self.file_ids[new_path] = self.file_ids[old_path]
394
395
0.64.5 by Ian Clatworthy
first cut at generic processing method
396
class GenericCommitHandler(processor.CommitHandler):
397
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
398
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
399
        processor.CommitHandler.__init__(self, command)
400
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
401
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
402
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
403
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
404
        self.loader = revisionloader.RevisionLoader(repo,
405
            lambda revision_ids: self._get_inventories(revision_ids))
406
0.64.43 by Ian Clatworthy
verbose mode cleanup
407
    def note(self, msg, *args):
408
        """Output a note but add context."""
409
        msg = "%s (%s)" % (msg, self.command.id)
410
        note(msg, *args)
411
412
    def warning(self, msg, *args):
413
        """Output a warning but add context."""
414
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
415
        warning(msg, *args)
416
0.64.5 by Ian Clatworthy
first cut at generic processing method
417
    def pre_process_files(self):
418
        """Prepare for committing."""
419
        self.revision_id = self.gen_revision_id()
420
        self.inv_delta = []
421
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
422
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
423
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
424
        # Work out the true set of parents
425
        cmd = self.command
426
        if cmd.mark is None:
427
            last_id = self.cache_mgr.last_ids.get(cmd.ref)
428
            if last_id is not None:
429
                parents = [last_id]
430
            else:
431
                parents = []
432
        else:
433
            parents = cmd.parents
434
435
        # Track the heads
436
        for parent in parents:
437
            try:
438
                del self.cache_mgr.heads[parent]
439
            except KeyError:
0.64.42 by Ian Clatworthy
removed parent not found warnings as not a problem
440
                # it's ok if the parent isn't there - another
441
                # commit may have already removed it
442
                pass
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
443
        self.cache_mgr.heads[cmd.id] = cmd.ref
444
0.64.14 by Ian Clatworthy
commit of modified files working
445
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
446
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
447
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
448
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
449
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
450
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
451
0.64.14 by Ian Clatworthy
commit of modified files working
452
        # Seed the inventory from the previous one
453
        if len(self.parents) == 0:
454
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
455
        else:
456
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
457
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
458
        if not self.repo.supports_rich_root():
459
            # In this repository, root entries have no knit or weave. When
460
            # serializing out to disk and back in, root.revision is always
461
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
462
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
463
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
464
        # directory-path -> inventory-entry for current inventory
465
        self.directory_entries = dict(self.inventory.directories())
466
0.64.14 by Ian Clatworthy
commit of modified files working
467
    def post_process_files(self):
468
        """Save the revision."""
469
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
470
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
471
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
472
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
473
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
474
        committer = self.command.committer
475
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
476
        author = self.command.author
477
        if author is not None:
478
            author_id = "%s <%s>" % (author[0],author[1])
479
            if author_id != who:
480
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
481
        rev = revision.Revision(
482
           timestamp=committer[2],
483
           timezone=committer[3],
484
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
485
           message=self._escape_commit_message(self.command.message),
486
           revision_id=self.revision_id,
487
           properties=rev_props,
488
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
489
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
490
            lambda file_id: self._get_lines(file_id))
491
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
492
    def _escape_commit_message(self, message):
493
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
494
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
495
        # Code copied from bzrlib.commit.
496
        
497
        # Python strings can include characters that can't be
498
        # represented in well-formed XML; escape characters that
499
        # aren't listed in the XML specification
500
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
501
        message, _ = re.subn(
502
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
503
            lambda match: match.group(0).encode('unicode_escape'),
504
            message)
505
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
506
507
    def modify_handler(self, filecmd):
508
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
509
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
510
        else:
511
            data = filecmd.data
512
        self._modify_inventory(filecmd.path, filecmd.kind,
513
            filecmd.is_executable, data)
514
515
    def delete_handler(self, filecmd):
516
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
517
        try:
518
            del self.inventory[self.bzr_file_id(path)]
519
        except errors.NoSuchId:
0.64.43 by Ian Clatworthy
verbose mode cleanup
520
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
521
        finally:
522
            try:
523
                self.cache_mgr._delete_path(path)
524
            except KeyError:
525
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
526
527
    def copy_handler(self, filecmd):
528
        raise NotImplementedError(self.copy_handler)
529
530
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
531
        old_path = filecmd.old_path
532
        new_path = filecmd.new_path
533
        file_id = self.bzr_file_id(old_path)
534
        ie = self.inventory[file_id]
535
        self.inv_delta.append((old_path, new_path, file_id, ie))
536
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
537
538
    def deleteall_handler(self, filecmd):
539
        raise NotImplementedError(self.deleteall_handler)
540
0.64.16 by Ian Clatworthy
safe processing tweaks
541
    def bzr_file_id_and_new(self, path):
542
        """Get a Bazaar file identifier and new flag for a path.
543
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
544
        :return: file_id, is_new where
545
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
546
        """
547
        try:
548
            return self.cache_mgr.file_ids[path], False
549
        except KeyError:
550
            id = generate_ids.gen_file_id(path)
551
            self.cache_mgr.file_ids[path] = id
552
            return id, True
553
0.64.5 by Ian Clatworthy
first cut at generic processing method
554
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
555
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
556
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
557
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
558
    def gen_initial_inventory(self):
559
        """Generate an inventory for a parentless revision."""
560
        inv = inventory.Inventory(revision_id=self.revision_id)
561
        return inv
562
0.64.5 by Ian Clatworthy
first cut at generic processing method
563
    def gen_revision_id(self):
564
        """Generate a revision id.
565
566
        Subclasses may override this to produce deterministic ids say.
567
        """
568
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
569
        # Perhaps 'who' being the person running the import is ok? If so,
570
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
571
        who = "%s <%s>" % (committer[0],committer[1])
572
        timestamp = committer[2]
573
        return generate_ids.gen_revision_id(who, timestamp)
574
0.64.7 by Ian Clatworthy
start of multiple commit handling
575
    def get_inventory(self, revision_id):
576
        """Get the inventory for a revision id."""
577
        try:
578
            inv = self.cache_mgr.inventories[revision_id]
579
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
580
            if self.verbose:
581
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
582
            # Not cached so reconstruct from repository
583
            inv = self.repo.revision_tree(revision_id).inventory
584
            self.cache_mgr.inventories[revision_id] = inv
585
        return inv
586
0.64.5 by Ian Clatworthy
first cut at generic processing method
587
    def _get_inventories(self, revision_ids):
588
        """Get the inventories for revision-ids.
589
        
590
        This is a callback used by the RepositoryLoader to
591
        speed up inventory reconstruction."""
592
        present = []
593
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
594
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
595
        # successfully loaded into the repsoitory
596
        for revision_id in revision_ids:
597
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
598
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
599
                present.append(revision_id)
600
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
601
                if self.verbose:
602
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
603
                # Not cached so reconstruct from repository
604
                if self.repo.has_revision(revision_id):
605
                    rev_tree = self.repo.revision_tree(revision_id)
606
                    present.append(revision_id)
607
                else:
608
                    rev_tree = self.repo.revision_tree(None)
609
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
610
                self.cache_mgr.inventories[revision_id] = inv
611
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
612
        return present, inventories
613
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
614
    def _get_lines(self, file_id):
615
        """Get the lines for a file-id."""
616
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
617
618
    def _modify_inventory(self, path, kind, is_executable, data):
619
        """Add to or change an item in the inventory."""
620
        # Create the new InventoryEntry
621
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
622
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
623
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
624
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
625
        if isinstance(ie, inventory.InventoryFile):
626
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
627
            lines = osutils.split_lines(data)
628
            ie.text_sha1 = osutils.sha_strings(lines)
629
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
630
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
631
        elif isinstance(ie, inventory.InventoryLnk):
632
            ie.symlink_target = data
633
        else:
634
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
635
                (kind,))
636
0.64.16 by Ian Clatworthy
safe processing tweaks
637
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
638
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
639
            # HACK: no API for this (del+add does more than it needs to)
640
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
641
        else:
642
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
643
644
    def _ensure_directory(self, path):
645
        """Ensure that the containing directory exists for 'path'"""
646
        dirname, basename = osutils.split(path)
647
        if dirname == '':
648
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
649
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
650
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
651
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
652
        except KeyError:
653
            # We will create this entry, since it doesn't exist
654
            pass
655
        else:
656
            return basename, ie
657
658
        # No directory existed, we will just create one, first, make sure
659
        # the parent exists
660
        dir_basename, parent_ie = self._ensure_directory(dirname)
661
        dir_file_id = self.bzr_file_id(dirname)
662
        ie = inventory.entry_factory['directory'](dir_file_id,
663
                                                  dir_basename,
664
                                                  parent_ie.file_id)
665
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
666
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
667
        # There are no lines stored for a directory so
668
        # make sure the cache used by get_lines knows that
669
        self.lines_for_commit[dir_file_id] = []
670
        #print "adding dir %s" % path
671
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
672
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
673
674
0.64.34 by Ian Clatworthy
report lost branches
675
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
676
0.64.37 by Ian Clatworthy
create branches as required
677
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
678
        """Create an object responsible for updating branches.
679
680
        :param heads_by_ref: a dictionary where
681
          names are git-style references like refs/heads/master;
682
          values are one item lists of commits marks.
683
        """
0.64.37 by Ian Clatworthy
create branches as required
684
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
685
        self.branch = branch
686
        self.cache_mgr = cache_mgr
687
        self.heads_by_ref = heads_by_ref
688
        self.last_ref = last_ref
689
690
    def update(self):
691
        """Update the Bazaar branches and tips matching the heads.
692
693
        If the repository is shared, this routine creates branches
694
        as required. If it isn't, warnings are produced about the
695
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
696
0.64.34 by Ian Clatworthy
report lost branches
697
        :return: updated, lost_heads where
698
          updated = the list of branches updated
699
          lost_heads = a list of (bazaar-name,revision) for branches that
700
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
701
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
702
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
703
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
704
        for br, tip in branch_tips:
705
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
706
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
707
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
708
709
    def _get_matching_branches(self):
710
        """Get the Bazaar branches.
711
0.64.34 by Ian Clatworthy
report lost branches
712
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
713
          default_tip = the last commit mark for the default branch
714
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
715
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
716
            would have been created had the repository been shared and
717
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
718
        """
0.64.37 by Ian Clatworthy
create branches as required
719
        branch_tips = []
720
        lost_heads = []
721
        ref_names = self.heads_by_ref.keys()
722
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
723
            trunk = self.select_trunk(ref_names)
724
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
725
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
726
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
727
728
        # Convert the reference names into Bazaar speak
729
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
730
0.64.37 by Ian Clatworthy
create branches as required
731
        # Policy for locating branches
732
        def dir_under_current(name, ref_name):
733
            # Using the Bazaar name, get a directory under the current one
734
            return name
735
        def dir_sister_branch(name, ref_name):
736
            # Using the Bazaar name, get a sister directory to the branch
737
            return osutils.pathjoin(self.branch.base, "..", name)
738
        if self.branch is not None:
739
            dir_policy = dir_sister_branch
740
        else:
741
            dir_policy = dir_under_current
742
0.64.34 by Ian Clatworthy
report lost branches
743
        # Create/track missing branches
744
        shared_repo = self.repo.is_shared()
745
        for name in sorted(bzr_names.keys()):
746
            ref_name = bzr_names[name]
747
            tip = self.heads_by_ref[ref_name][0]
748
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
749
                location = dir_policy(name, ref_name)
750
                try:
751
                    br = self.make_branch(location)
752
                    branch_tips.append((br,tip))
753
                    continue
754
                except errors.BzrError, ex:
755
                    error("ERROR: failed to create branch %s: %s",
756
                        location, ex)
757
            lost_head = self.cache_mgr.revision_ids[tip]
758
            lost_info = (name, lost_head)
759
            lost_heads.append(lost_info)
760
        return branch_tips, lost_heads
761
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
762
    def select_trunk(self, ref_names):
763
        """Given a set of ref names, choose one as the trunk."""
764
        for candidate in ['refs/heads/master']:
765
            if candidate in ref_names:
766
                return candidate
767
        # Use the last reference in the import stream
768
        return self.last_ref
769
0.64.37 by Ian Clatworthy
create branches as required
770
    def make_branch(self, location):
771
        """Create a branch in the repository."""
772
        return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
773
774
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
775
        """Generate Bazaar branch names from import ref names.
776
        
777
        :return: a dictionary with Bazaar names as keys and
778
          the original reference names as values.
779
        """
0.64.34 by Ian Clatworthy
report lost branches
780
        bazaar_names = {}
781
        for ref_name in sorted(ref_names):
782
            parts = ref_name.split('/')
783
            if parts[0] == 'refs':
784
                parts.pop(0)
785
            full_name = "--".join(parts)
786
            bazaar_name = parts[-1]
787
            if bazaar_name in bazaar_names:
788
                bazaar_name = full_name
789
            bazaar_names[bazaar_name] = ref_name
790
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
791
792
    def _update_branch(self, br, last_mark):
793
        """Update a branch with last revision and tag information."""
794
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
795
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
796
        br.set_last_revision_info(revno, last_rev_id)
797
        # TODO: apply tags known in this branch
798
        #if self.tags:
799
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
800
        note("\t branch %s has %d revisions", br.nick, revno)