/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
36
from bzrlib.trace import (
37
    note,
38
    warning,
0.64.37 by Ian Clatworthy
create branches as required
39
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
40
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
41
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
42
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
43
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
44
    processor,
45
    revisionloader,
46
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
48
0.64.41 by Ian Clatworthy
update multiple working trees if requested
49
# How many commits before automatically reporting progress
50
_DEFAULT_AUTO_PROGRESS = 1000
51
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
52
# How many commits before automatically checkpointing
53
_DEFAULT_AUTO_CHECKPOINT = 10000
54
0.64.41 by Ian Clatworthy
update multiple working trees if requested
55
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
56
class GenericProcessor(processor.ImportProcessor):
57
    """An import processor that handles basic imports.
58
59
    Current features supported:
60
0.64.16 by Ian Clatworthy
safe processing tweaks
61
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
62
    * files and symlinks commits are supported
63
    * checkpoints automatically happen at a configurable frequency
64
      over and above the stream requested checkpoints
65
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
66
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
68
69
    Here are the supported parameters:
70
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
71
    * info - name of a hints file holding the analysis generated
72
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
73
      importing large repositories, this parameter is needed so
74
      that the importer knows what blobs to intelligently cache.
75
0.64.41 by Ian Clatworthy
update multiple working trees if requested
76
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
77
      By default, the importer updates the repository
78
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
79
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
80
81
    * checkpoint - automatically checkpoint every n commits over and
82
      above any checkpoints contained in the import stream.
83
      The default is 10000.
84
85
    * count - only import this many commits then exit. If not set,
86
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
87
    """
88
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
89
    known_params = ['info', 'trees', 'checkpoint', 'count']
90
91
    def note(self, msg, *args):
92
        """Output a note but timestamp it."""
93
        msg = "%s %s" % (self._time_of_day(), msg)
94
        note(msg, *args)
95
96
    def warning(self, msg, *args):
97
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
98
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
99
        warning(msg, *args)
100
101
    def _time_of_day(self):
102
        """Time of day as a string."""
103
        # Note: this is a separate method so tests can patch in a fixed value
104
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
105
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
106
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
107
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
108
        self._load_info_and_params()
109
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
110
        self.init_stats()
111
112
        # mapping of tag name to revision_id
113
        self.tags = {}
114
115
        # Create a write group. This is committed at the end of the import.
116
        # Checkpointing closes the current one and starts a new one.
117
        self.repo.start_write_group()
118
119
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
120
        # Load the info file, if any
121
        info_path = self.params.get('info')
122
        if info_path is not None:
123
            self.info = configobj.ConfigObj(info_path)
124
        else:
125
            self.info = None
126
0.64.41 by Ian Clatworthy
update multiple working trees if requested
127
        # Decide how often to automatically report progress
128
        # (not a parameter yet)
129
        self.progress_every = _DEFAULT_AUTO_PROGRESS
130
        if self.verbose:
131
            self.progress_every = self.progress_every / 10
132
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
133
        # Decide how often to automatically checkpoint
134
        self.checkpoint_every = int(self.params.get('checkpoint',
135
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
136
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
137
        # Find the maximum number of commits to import (None means all)
138
        # and prepare progress reporting. Just in case the info file
139
        # has an outdated count of commits, we store the max counts
140
        # at which we need to terminate separately to the total used
141
        # for progress tracking.
142
        try:
143
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
144
            if self.max_commits < 0:
145
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
146
        except KeyError:
147
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
148
        if self.info is not None:
149
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
150
            if (self.max_commits is not None and
151
                self.total_commits > self.max_commits):
152
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
153
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
154
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
155
0.64.27 by Ian Clatworthy
1st cut at performance tuning
156
157
    def _process(self, command_iter):
158
        # if anything goes wrong, abort the write group if any
159
        try:
160
            processor.ImportProcessor._process(self, command_iter)
161
        except:
162
            if self.repo is not None and self.repo.is_in_write_group():
163
                self.repo.abort_write_group()
164
            raise
165
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
166
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
167
        # Commit the current write group.
168
        self.repo.commit_write_group()
169
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
170
        # Update the branches
171
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
172
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
173
            helpers.invert_dict(self.cache_mgr.heads),
174
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
175
        branches_updated, branches_lost = updater.update()
176
        self._branch_count = len(branches_updated)
177
178
        # Tell the user about branches that were not created
179
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
180
            if not self.repo.is_shared():
181
                self.warning("Cannot import multiple branches into "
182
                    "an unshared repository")
183
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
184
            for lost_info in branches_lost:
185
                head_revision = lost_info[1]
186
                branch_name = lost_info[0]
187
                note("\t %s = %s", head_revision, branch_name)
188
189
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
190
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
191
        remind_about_update = True
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
192
        if self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
193
            trees = self._get_working_trees(branches_updated)
194
            if trees:
195
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
196
                if self.verbose:
197
                    report = delta._ChangeReporter()
198
                else:
199
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
200
                for wt in trees:
201
                    wt.update(reporter)
202
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
203
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
204
            else:
205
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
206
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
207
        if remind_about_update:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
208
            self.note("To refresh the working tree for a branch, "
209
                "use 'bzr update'")
210
211
    def _get_working_trees(self, branches):
212
        """Get the working trees for branches in the repository."""
213
        result = []
214
        wt_expected = self.repo.make_working_trees()
215
        for br in branches:
216
            if br == self.branch and br is not None:
217
                wt = self.working_tree
218
            elif wt_expected:
219
                try:
220
                    wt = br.bzrdir.open_workingtree()
221
                except errors.NoWorkingTree:
222
                    self.warning("No working tree for branch %s", br)
223
                    continue
224
            else:
225
                continue
226
            result.append(wt)
227
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
228
229
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
230
        self._revision_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
231
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
232
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
233
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
234
        rc = self._revision_count
235
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
236
        wtc = self._tree_count
237
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
238
            rc, helpers.single_plural(rc, "revision", "revisions"),
239
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
240
            wtc, helpers.single_plural(wtc, "tree", "trees"),
241
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
242
0.64.5 by Ian Clatworthy
first cut at generic processing method
243
    def blob_handler(self, cmd):
244
        """Process a BlobCommand."""
245
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
246
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
247
        else:
248
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
249
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
250
251
    def checkpoint_handler(self, cmd):
252
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
253
        # Commit the current write group and start a new one
254
        self.repo.commit_write_group()
255
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
256
257
    def commit_handler(self, cmd):
258
        """Process a CommitCommand."""
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
259
        # 'Commit' the revision
0.64.7 by Ian Clatworthy
start of multiple commit handling
260
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
261
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
262
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
263
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
264
        # Update caches
265
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
266
        self.cache_mgr.last_ids[cmd.ref] = cmd.id
267
        self.cache_mgr.last_ref = cmd.ref
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
268
269
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
270
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
271
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
272
273
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
274
        if (self.max_commits is not None and
275
            self._revision_count >= self.max_commits):
276
            self.note("stopping after reaching requested count of commits")
277
            self.finished = True
278
        elif self._revision_count % self.checkpoint_every == 0:
279
            self.note("%d commits - automatic checkpoint triggered",
280
                self._revision_count)
281
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
282
0.64.25 by Ian Clatworthy
slightly better progress reporting
283
    def report_progress(self, details=''):
284
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
285
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
286
            if self.total_commits is not None:
287
                counts = "%d/%d" % (self._revision_count, self.total_commits)
288
                eta = progress.get_eta(self._start_time, self._revision_count,
289
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
290
                eta_str = progress.str_tdelta(eta)
291
                if eta_str.endswith('--'):
292
                    eta_str = ''
293
                else:
294
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
295
            else:
296
                counts = "%d" % (self._revision_count,)
297
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
298
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
299
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
300
    def progress_handler(self, cmd):
301
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
302
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
303
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
304
305
    def reset_handler(self, cmd):
306
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
307
        if cmd.ref.startswith('refs/tags/'):
308
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
309
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
310
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
311
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
312
313
    def tag_handler(self, cmd):
314
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
315
        self._set_tag(cmd.id, cmd.from_)
316
317
    def _set_tag(self, name, from_):
318
        """Define a tag given a name an import 'from' reference."""
319
        bzr_tag_name = name.decode('utf-8', 'replace')
320
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
321
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
322
323
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
324
class GenericCacheManager(object):
325
    """A manager of caches for the GenericProcessor."""
326
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
327
    def __init__(self, info, verbose=False, inventory_cache_size=10):
328
        """Create a manager of caches.
329
330
        :param info: a ConfigObj holding the output from
331
            the --info processor, or None if no hints are available
332
        """
333
        self.verbose = verbose
334
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
335
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
336
        # Sticky blobs aren't removed after being referenced.
337
        self._blobs = {}
338
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
339
340
        # revision-id -> Inventory cache
341
        # these are large and we probably don't need too many as
342
        # most parents are recent in history
343
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
344
345
        # import-ref -> revision-id lookup table
346
        # we need to keep all of these but they are small
347
        self.revision_ids = {}
348
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
349
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
350
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
351
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
352
        # Head tracking: last ref, last id per ref & map of commit mark to ref
353
        self.last_ref = None
354
        self.last_ids = {}
355
        self.heads = {}
356
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
357
        # Work out the blobs to make sticky - None means all
358
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
359
        self._blobs_to_keep = None
360
        if info is not None:
361
            try:
362
                self._blobs_to_keep = info['Blob usage tracking']['multi']
363
            except KeyError:
364
                # info not in file - possible when no blobs used
365
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
366
367
    def store_blob(self, id, data):
368
        """Store a blob of data."""
369
        if (self._blobs_to_keep is None or data == '' or
370
            id in self._blobs_to_keep):
371
            self._sticky_blobs[id] = data
372
            if self.verbose:
373
                print "making blob %s sticky" % (id,)
374
        else:
375
            self._blobs[id] = data
376
377
    def fetch_blob(self, id):
378
        """Fetch a blob of data."""
379
        try:
380
            return self._sticky_blobs[id]
381
        except KeyError:
382
            return self._blobs.pop(id)
383
0.64.16 by Ian Clatworthy
safe processing tweaks
384
    def _delete_path(self, path):
385
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
386
        # we actually want to remember what file-id we gave a path,
387
        # even when that file is deleted, so doing nothing is correct
388
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
389
390
    def _rename_path(self, old_path, new_path):
391
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
392
        # we actually want to remember what file-id we gave a path,
393
        # even when that file is renamed, so both paths should have
394
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
395
        self.file_ids[new_path] = self.file_ids[old_path]
396
397
0.64.5 by Ian Clatworthy
first cut at generic processing method
398
class GenericCommitHandler(processor.CommitHandler):
399
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
400
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
401
        processor.CommitHandler.__init__(self, command)
402
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
403
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
404
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
405
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
406
        self.loader = revisionloader.RevisionLoader(repo,
407
            lambda revision_ids: self._get_inventories(revision_ids))
408
409
    def pre_process_files(self):
410
        """Prepare for committing."""
411
        self.revision_id = self.gen_revision_id()
412
        self.inv_delta = []
413
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
414
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
415
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
416
        # Work out the true set of parents
417
        cmd = self.command
418
        if cmd.mark is None:
419
            last_id = self.cache_mgr.last_ids.get(cmd.ref)
420
            if last_id is not None:
421
                parents = [last_id]
422
            else:
423
                parents = []
424
        else:
425
            parents = cmd.parents
426
427
        # Track the heads
428
        for parent in parents:
429
            try:
430
                del self.cache_mgr.heads[parent]
431
            except KeyError:
432
                warning("didn't find parent %s while tracking heads",
433
                    parent)
434
        self.cache_mgr.heads[cmd.id] = cmd.ref
435
0.64.14 by Ian Clatworthy
commit of modified files working
436
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
437
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
438
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
439
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
440
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
441
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
442
0.64.14 by Ian Clatworthy
commit of modified files working
443
        # Seed the inventory from the previous one
444
        if len(self.parents) == 0:
445
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
446
        else:
447
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
448
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
449
        if not self.repo.supports_rich_root():
450
            # In this repository, root entries have no knit or weave. When
451
            # serializing out to disk and back in, root.revision is always
452
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
453
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
454
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
455
        # directory-path -> inventory-entry for current inventory
456
        self.directory_entries = dict(self.inventory.directories())
457
0.64.14 by Ian Clatworthy
commit of modified files working
458
    def post_process_files(self):
459
        """Save the revision."""
460
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
461
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
462
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
463
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
464
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
465
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
466
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
467
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
468
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
469
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
470
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
471
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
472
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
473
        committer = self.command.committer
474
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
475
        author = self.command.author
476
        if author is not None:
477
            author_id = "%s <%s>" % (author[0],author[1])
478
            if author_id != who:
479
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
480
        rev = revision.Revision(
481
           timestamp=committer[2],
482
           timezone=committer[3],
483
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
484
           message=self._escape_commit_message(self.command.message),
485
           revision_id=self.revision_id,
486
           properties=rev_props,
487
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
488
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
489
            lambda file_id: self._get_lines(file_id))
490
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
491
    def _escape_commit_message(self, message):
492
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
493
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
494
        # Code copied from bzrlib.commit.
495
        
496
        # Python strings can include characters that can't be
497
        # represented in well-formed XML; escape characters that
498
        # aren't listed in the XML specification
499
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
500
        message, _ = re.subn(
501
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
502
            lambda match: match.group(0).encode('unicode_escape'),
503
            message)
504
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
505
506
    def modify_handler(self, filecmd):
507
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
508
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
509
        else:
510
            data = filecmd.data
511
        self._modify_inventory(filecmd.path, filecmd.kind,
512
            filecmd.is_executable, data)
513
514
    def delete_handler(self, filecmd):
515
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
516
        try:
517
            del self.inventory[self.bzr_file_id(path)]
518
        except errors.NoSuchId:
519
            warning("ignoring delete of %s - not in inventory" % (path,))
520
        finally:
521
            try:
522
                self.cache_mgr._delete_path(path)
523
            except KeyError:
524
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
525
526
    def copy_handler(self, filecmd):
527
        raise NotImplementedError(self.copy_handler)
528
529
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
530
        old_path = filecmd.old_path
531
        new_path = filecmd.new_path
532
        file_id = self.bzr_file_id(old_path)
533
        ie = self.inventory[file_id]
534
        self.inv_delta.append((old_path, new_path, file_id, ie))
535
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
536
537
    def deleteall_handler(self, filecmd):
538
        raise NotImplementedError(self.deleteall_handler)
539
0.64.16 by Ian Clatworthy
safe processing tweaks
540
    def bzr_file_id_and_new(self, path):
541
        """Get a Bazaar file identifier and new flag for a path.
542
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
543
        :return: file_id, is_new where
544
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
545
        """
546
        try:
547
            return self.cache_mgr.file_ids[path], False
548
        except KeyError:
549
            id = generate_ids.gen_file_id(path)
550
            self.cache_mgr.file_ids[path] = id
551
            return id, True
552
0.64.5 by Ian Clatworthy
first cut at generic processing method
553
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
554
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
555
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
556
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
557
    def gen_initial_inventory(self):
558
        """Generate an inventory for a parentless revision."""
559
        inv = inventory.Inventory(revision_id=self.revision_id)
560
        return inv
561
0.64.5 by Ian Clatworthy
first cut at generic processing method
562
    def gen_revision_id(self):
563
        """Generate a revision id.
564
565
        Subclasses may override this to produce deterministic ids say.
566
        """
567
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
568
        # Perhaps 'who' being the person running the import is ok? If so,
569
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
570
        who = "%s <%s>" % (committer[0],committer[1])
571
        timestamp = committer[2]
572
        return generate_ids.gen_revision_id(who, timestamp)
573
0.64.7 by Ian Clatworthy
start of multiple commit handling
574
    def get_inventory(self, revision_id):
575
        """Get the inventory for a revision id."""
576
        try:
577
            inv = self.cache_mgr.inventories[revision_id]
578
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
579
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
580
            # Not cached so reconstruct from repository
581
            inv = self.repo.revision_tree(revision_id).inventory
582
            self.cache_mgr.inventories[revision_id] = inv
583
        return inv
584
0.64.5 by Ian Clatworthy
first cut at generic processing method
585
    def _get_inventories(self, revision_ids):
586
        """Get the inventories for revision-ids.
587
        
588
        This is a callback used by the RepositoryLoader to
589
        speed up inventory reconstruction."""
590
        present = []
591
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
592
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
593
        # successfully loaded into the repsoitory
594
        for revision_id in revision_ids:
595
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
596
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
597
                present.append(revision_id)
598
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
599
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
600
                # Not cached so reconstruct from repository
601
                if self.repo.has_revision(revision_id):
602
                    rev_tree = self.repo.revision_tree(revision_id)
603
                    present.append(revision_id)
604
                else:
605
                    rev_tree = self.repo.revision_tree(None)
606
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
607
                self.cache_mgr.inventories[revision_id] = inv
608
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
609
        return present, inventories
610
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
611
    def _get_lines(self, file_id):
612
        """Get the lines for a file-id."""
613
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
614
615
    def _modify_inventory(self, path, kind, is_executable, data):
616
        """Add to or change an item in the inventory."""
617
        # Create the new InventoryEntry
618
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
619
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
620
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
621
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
622
        if isinstance(ie, inventory.InventoryFile):
623
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
624
            lines = osutils.split_lines(data)
625
            ie.text_sha1 = osutils.sha_strings(lines)
626
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
627
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
628
        elif isinstance(ie, inventory.InventoryLnk):
629
            ie.symlink_target = data
630
        else:
631
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
632
                (kind,))
633
0.64.16 by Ian Clatworthy
safe processing tweaks
634
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
635
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
636
            # HACK: no API for this (del+add does more than it needs to)
637
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
638
        else:
639
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
640
641
    def _ensure_directory(self, path):
642
        """Ensure that the containing directory exists for 'path'"""
643
        dirname, basename = osutils.split(path)
644
        if dirname == '':
645
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
646
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
647
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
648
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
649
        except KeyError:
650
            # We will create this entry, since it doesn't exist
651
            pass
652
        else:
653
            return basename, ie
654
655
        # No directory existed, we will just create one, first, make sure
656
        # the parent exists
657
        dir_basename, parent_ie = self._ensure_directory(dirname)
658
        dir_file_id = self.bzr_file_id(dirname)
659
        ie = inventory.entry_factory['directory'](dir_file_id,
660
                                                  dir_basename,
661
                                                  parent_ie.file_id)
662
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
663
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
664
        # There are no lines stored for a directory so
665
        # make sure the cache used by get_lines knows that
666
        self.lines_for_commit[dir_file_id] = []
667
        #print "adding dir %s" % path
668
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
669
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
670
671
0.64.34 by Ian Clatworthy
report lost branches
672
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
673
0.64.37 by Ian Clatworthy
create branches as required
674
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
675
        """Create an object responsible for updating branches.
676
677
        :param heads_by_ref: a dictionary where
678
          names are git-style references like refs/heads/master;
679
          values are one item lists of commits marks.
680
        """
0.64.37 by Ian Clatworthy
create branches as required
681
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
682
        self.branch = branch
683
        self.cache_mgr = cache_mgr
684
        self.heads_by_ref = heads_by_ref
685
        self.last_ref = last_ref
686
687
    def update(self):
688
        """Update the Bazaar branches and tips matching the heads.
689
690
        If the repository is shared, this routine creates branches
691
        as required. If it isn't, warnings are produced about the
692
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
693
0.64.34 by Ian Clatworthy
report lost branches
694
        :return: updated, lost_heads where
695
          updated = the list of branches updated
696
          lost_heads = a list of (bazaar-name,revision) for branches that
697
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
698
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
699
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
700
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
701
        for br, tip in branch_tips:
702
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
703
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
704
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
705
706
    def _get_matching_branches(self):
707
        """Get the Bazaar branches.
708
0.64.34 by Ian Clatworthy
report lost branches
709
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
710
          default_tip = the last commit mark for the default branch
711
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
712
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
713
            would have been created had the repository been shared and
714
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
715
        """
0.64.37 by Ian Clatworthy
create branches as required
716
        branch_tips = []
717
        lost_heads = []
718
        ref_names = self.heads_by_ref.keys()
719
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
720
            trunk = self.select_trunk(ref_names)
721
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
722
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
723
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
724
725
        # Convert the reference names into Bazaar speak
726
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
727
0.64.37 by Ian Clatworthy
create branches as required
728
        # Policy for locating branches
729
        def dir_under_current(name, ref_name):
730
            # Using the Bazaar name, get a directory under the current one
731
            return name
732
        def dir_sister_branch(name, ref_name):
733
            # Using the Bazaar name, get a sister directory to the branch
734
            return osutils.pathjoin(self.branch.base, "..", name)
735
        if self.branch is not None:
736
            dir_policy = dir_sister_branch
737
        else:
738
            dir_policy = dir_under_current
739
0.64.34 by Ian Clatworthy
report lost branches
740
        # Create/track missing branches
741
        shared_repo = self.repo.is_shared()
742
        for name in sorted(bzr_names.keys()):
743
            ref_name = bzr_names[name]
744
            tip = self.heads_by_ref[ref_name][0]
745
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
746
                location = dir_policy(name, ref_name)
747
                try:
748
                    br = self.make_branch(location)
749
                    branch_tips.append((br,tip))
750
                    continue
751
                except errors.BzrError, ex:
752
                    error("ERROR: failed to create branch %s: %s",
753
                        location, ex)
754
            lost_head = self.cache_mgr.revision_ids[tip]
755
            lost_info = (name, lost_head)
756
            lost_heads.append(lost_info)
757
        return branch_tips, lost_heads
758
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
759
    def select_trunk(self, ref_names):
760
        """Given a set of ref names, choose one as the trunk."""
761
        for candidate in ['refs/heads/master']:
762
            if candidate in ref_names:
763
                return candidate
764
        # Use the last reference in the import stream
765
        return self.last_ref
766
0.64.37 by Ian Clatworthy
create branches as required
767
    def make_branch(self, location):
768
        """Create a branch in the repository."""
769
        return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
770
771
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
772
        """Generate Bazaar branch names from import ref names.
773
        
774
        :return: a dictionary with Bazaar names as keys and
775
          the original reference names as values.
776
        """
0.64.34 by Ian Clatworthy
report lost branches
777
        bazaar_names = {}
778
        for ref_name in sorted(ref_names):
779
            parts = ref_name.split('/')
780
            if parts[0] == 'refs':
781
                parts.pop(0)
782
            full_name = "--".join(parts)
783
            bazaar_name = parts[-1]
784
            if bazaar_name in bazaar_names:
785
                bazaar_name = full_name
786
            bazaar_names[bazaar_name] = ref_name
787
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
788
789
    def _update_branch(self, br, last_mark):
790
        """Update a branch with last revision and tag information."""
791
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
792
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
793
        br.set_last_revision_info(revno, last_rev_id)
794
        # TODO: apply tags known in this branch
795
        #if self.tags:
796
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
797
        note("\t branch %s has %d revisions", br.nick, revno)