/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
29
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
    revision,
31
    revisiontree,
32
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
33
from bzrlib.trace import (
34
    note,
35
    warning,
36
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
37
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
39
    processor,
40
    revisionloader,
41
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
42
43
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
44
# How many commits before automatically checkpointing
45
_DEFAULT_AUTO_CHECKPOINT = 10000
46
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
47
def _single_plural(n, single, plural):
48
    """Return a single or plural form of a noun based on number."""
49
    if n == 1:
50
        return single
51
    else:
52
        return plural
53
54
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
55
class GenericProcessor(processor.ImportProcessor):
56
    """An import processor that handles basic imports.
57
58
    Current features supported:
59
0.64.16 by Ian Clatworthy
safe processing tweaks
60
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
61
    * files and symlinks commits are supported
0.64.16 by Ian Clatworthy
safe processing tweaks
62
    * tags are stored in the current branch
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
63
    * checkpoints automatically happen at a configurable frequency
64
      over and above the stream requested checkpoints
65
    * timestamped progress reporting, both automatic and stream requested
0.64.16 by Ian Clatworthy
safe processing tweaks
66
    * LATER: named branch support
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
68
69
    Here are the supported parameters:
70
71
    * info - name of a config file holding the analysis generated
72
      by running the --info processor (this is important for knowing
73
      what to intelligently cache)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
74
75
    * checkpoint - automatically checkpoint every n commits over and
76
      above any checkpoints contained in the import stream.
77
      The default is 10000.
78
79
    * count - only import this many commits then exit. If not set,
80
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
81
    """
82
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
83
    known_params = ['info', 'checkpoint', 'count']
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
84
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
85
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
86
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
87
        self._load_info_and_params()
88
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
89
        self.active_branch = self.branch
90
        self.init_stats()
91
92
        # mapping of tag name to revision_id
93
        self.tags = {}
94
95
        # Create a write group. This is committed at the end of the import.
96
        # Checkpointing closes the current one and starts a new one.
97
        self.repo.start_write_group()
98
99
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
100
        # Load the info file, if any
101
        info_path = self.params.get('info')
102
        if info_path is not None:
103
            self.info = configobj.ConfigObj(info_path)
104
        else:
105
            self.info = None
106
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
107
        # Decide how often to automatically checkpoint
108
        self.checkpoint_every = int(self.params.get('checkpoint',
109
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
110
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
111
        # Find the maximum number of commits to import (None means all)
112
        # and prepare progress reporting. Just in case the info file
113
        # has an outdated count of commits, we store the max counts
114
        # at which we need to terminate separately to the total used
115
        # for progress tracking.
116
        try:
117
            self.max_commits = int(self.params['count'])
118
        except KeyError:
119
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
120
        if self.info is not None:
121
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
122
            if (self.max_commits is not None and
123
                self.total_commits > self.max_commits):
124
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
125
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
126
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
127
0.64.27 by Ian Clatworthy
1st cut at performance tuning
128
129
    def _process(self, command_iter):
130
        # if anything goes wrong, abort the write group if any
131
        try:
132
            processor.ImportProcessor._process(self, command_iter)
133
        except:
134
            if self.repo is not None and self.repo.is_in_write_group():
135
                self.repo.abort_write_group()
136
            raise
137
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
138
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
139
        # Commit the current write group.
140
        self.repo.commit_write_group()
141
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
142
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
143
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
144
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
145
        # TODO - loop over the branches created/modified
146
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
147
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
148
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
149
        if self.tags:
150
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
151
        # Update the working tree, if any
152
        if self.working_tree:
153
            self.working_tree.update(delta._ChangeReporter())
154
155
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
156
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
157
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
158
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
159
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
160
    def dump_stats(self):
161
        rc = self._revision_count
162
        bc = self._branch_count
163
        tc = self._tag_count
164
        note("Imported %d %s into %d %s with %d %s.",
165
            rc, _single_plural(rc, "revision", "revisions"),
166
            bc, _single_plural(bc, "branch", "branches"),
167
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
168
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
169
    def note(self, msg, *args):
170
        """Output a note but timestamp it."""
171
        msg = "%s %s" % (self._time_of_day(), msg)
172
        note(msg, *args)
173
0.64.5 by Ian Clatworthy
first cut at generic processing method
174
    def blob_handler(self, cmd):
175
        """Process a BlobCommand."""
176
        if cmd.mark is not None:
177
            dataref = ":%s" % (cmd.mark,)
178
        else:
179
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
180
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
181
182
    def checkpoint_handler(self, cmd):
183
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
184
        # Commit the current write group and start a new one
185
        self.repo.commit_write_group()
186
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
187
188
    def commit_handler(self, cmd):
189
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
190
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.27 by Ian Clatworthy
1st cut at performance tuning
191
        self.active_branch, self.verbose)
192
        handler.process()
193
        rev_id = handler.revision_id
194
        self.cache_mgr.revision_ids[cmd.ref] = rev_id
195
        if cmd.mark is not None:
196
            self.cache_mgr.revision_ids[":" + cmd.mark] = rev_id
197
        self.cache_mgr.last_revision_ids[self.active_branch] = rev_id
198
        self._revision_count += 1
199
        self.report_progress("(:%s)" % cmd.mark)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
200
        if (self.max_commits is not None and
201
            self._revision_count >= self.max_commits):
202
            self.note("stopping after reaching requested count of commits")
203
            self.finished = True
204
        elif self._revision_count % self.checkpoint_every == 0:
205
            self.note("%d commits - automatic checkpoint triggered",
206
                self._revision_count)
207
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
208
0.64.25 by Ian Clatworthy
slightly better progress reporting
209
    def report_progress(self, details=''):
210
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
211
        if self.verbose or self._revision_count % 10 == 0:
212
            if self.total_commits is not None:
213
                counts = "%d/%d" % (self._revision_count, self.total_commits)
214
                eta = progress.get_eta(self._start_time, self._revision_count,
215
                    self.total_commits)
216
                eta_str = '[%s] ' % progress.str_tdelta(eta)
217
            else:
218
                counts = "%d" % (self._revision_count,)
219
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
220
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
221
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
222
    def progress_handler(self, cmd):
223
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
224
        # We could use a progress bar here but timestamped messages
225
        # is more useful for determining when things might complete
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
226
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
227
228
    def _time_of_day(self):
229
        """Time of day as a string."""
230
        # Note: this is a separate method so tests can patch in a fixed value
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
231
        return time.strftime("%H:%M:%S")
0.64.5 by Ian Clatworthy
first cut at generic processing method
232
233
    def reset_handler(self, cmd):
234
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
235
        if cmd.ref.startswith('refs/tags/'):
236
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
237
        else:
0.64.16 by Ian Clatworthy
safe processing tweaks
238
            warning("named branches are not supported yet"
239
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
240
241
    def tag_handler(self, cmd):
242
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
243
        self._set_tag(cmd.id, cmd.from_)
244
245
    def _set_tag(self, name, from_):
246
        """Define a tag given a name an import 'from' reference."""
247
        bzr_tag_name = name.decode('utf-8', 'replace')
248
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
249
        self.tags[bzr_tag_name] = bzr_rev_id
250
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
251
252
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
253
class GenericCacheManager(object):
254
    """A manager of caches for the GenericProcessor."""
255
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
256
    def __init__(self, info, verbose=False, inventory_cache_size=10):
257
        """Create a manager of caches.
258
259
        :param info: a ConfigObj holding the output from
260
            the --info processor, or None if no hints are available
261
        """
262
        self.verbose = verbose
263
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
264
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
265
        # Sticky blobs aren't removed after being referenced.
266
        self._blobs = {}
267
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
268
269
        # revision-id -> Inventory cache
270
        # these are large and we probably don't need too many as
271
        # most parents are recent in history
272
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
273
274
        # import-ref -> revision-id lookup table
275
        # we need to keep all of these but they are small
276
        self.revision_ids = {}
277
0.64.7 by Ian Clatworthy
start of multiple commit handling
278
        # branch -> last revision-id lookup table
279
        self.last_revision_ids = {}
280
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
281
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
282
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
283
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
284
        # Work out the blobs to make sticky - None means all
285
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
286
        self._blobs_to_keep = None
287
        if info is not None:
288
            try:
289
                self._blobs_to_keep = info['Blob usage tracking']['multi']
290
            except KeyError:
291
                # info not in file - possible when no blobs used
292
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
293
294
    def store_blob(self, id, data):
295
        """Store a blob of data."""
296
        if (self._blobs_to_keep is None or data == '' or
297
            id in self._blobs_to_keep):
298
            self._sticky_blobs[id] = data
299
            if self.verbose:
300
                print "making blob %s sticky" % (id,)
301
        else:
302
            self._blobs[id] = data
303
304
    def fetch_blob(self, id):
305
        """Fetch a blob of data."""
306
        try:
307
            return self._sticky_blobs[id]
308
        except KeyError:
309
            return self._blobs.pop(id)
310
0.64.16 by Ian Clatworthy
safe processing tweaks
311
    def _delete_path(self, path):
312
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
313
        # we actually want to remember what file-id we gave a path,
314
        # even when that file is deleted, so doing nothing is correct
315
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
316
317
    def _rename_path(self, old_path, new_path):
318
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
319
        # we actually want to remember what file-id we gave a path,
320
        # even when that file is renamed, so both paths should have
321
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
322
        self.file_ids[new_path] = self.file_ids[old_path]
323
324
0.64.5 by Ian Clatworthy
first cut at generic processing method
325
class GenericCommitHandler(processor.CommitHandler):
326
0.64.14 by Ian Clatworthy
commit of modified files working
327
    def __init__(self, command, repo, cache_mgr, active_branch, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
328
        processor.CommitHandler.__init__(self, command)
329
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
330
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
331
        self.active_branch = active_branch
0.64.14 by Ian Clatworthy
commit of modified files working
332
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
333
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
334
        self.loader = revisionloader.RevisionLoader(repo,
335
            lambda revision_ids: self._get_inventories(revision_ids))
336
337
    def pre_process_files(self):
338
        """Prepare for committing."""
339
        self.revision_id = self.gen_revision_id()
340
        self.inv_delta = []
341
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
342
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
343
0.64.14 by Ian Clatworthy
commit of modified files working
344
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
345
        if self.command.parents:
0.64.14 by Ian Clatworthy
commit of modified files working
346
            self.parents = [self.cache_mgr.revision_ids[ref]
0.64.7 by Ian Clatworthy
start of multiple commit handling
347
                for ref in self.command.parents]
348
        else:
349
            # if no parents are given, the last revision on
350
            # the current branch is assumed according to the spec
351
            last_rev = self.cache_mgr.last_revision_ids.get(
352
                    self.active_branch)
353
            if last_rev:
0.64.14 by Ian Clatworthy
commit of modified files working
354
                self.parents = [last_rev]
0.64.7 by Ian Clatworthy
start of multiple commit handling
355
            else:
0.64.14 by Ian Clatworthy
commit of modified files working
356
                self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
357
0.64.14 by Ian Clatworthy
commit of modified files working
358
        # Seed the inventory from the previous one
359
        if len(self.parents) == 0:
360
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
361
        else:
362
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
363
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
364
        if not self.repo.supports_rich_root():
365
            # In this repository, root entries have no knit or weave. When
366
            # serializing out to disk and back in, root.revision is always
367
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
368
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
369
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
370
        # directory-path -> inventory-entry for current inventory
371
        self.directory_entries = dict(self.inventory.directories())
372
0.64.14 by Ian Clatworthy
commit of modified files working
373
    def post_process_files(self):
374
        """Save the revision."""
375
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
376
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
377
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
378
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
379
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
380
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
381
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
382
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
383
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
384
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
385
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
386
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
387
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
388
        committer = self.command.committer
389
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
390
        author = self.command.author
391
        if author is not None:
392
            author_id = "%s <%s>" % (author[0],author[1])
393
            if author_id != who:
394
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
395
        rev = revision.Revision(
396
           timestamp=committer[2],
397
           timezone=committer[3],
398
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
399
           message=self._escape_commit_message(self.command.message),
400
           revision_id=self.revision_id,
401
           properties=rev_props,
402
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
403
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
404
            lambda file_id: self._get_lines(file_id))
405
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
406
    def _escape_commit_message(self, message):
407
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
408
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
409
        # Code copied from bzrlib.commit.
410
        
411
        # Python strings can include characters that can't be
412
        # represented in well-formed XML; escape characters that
413
        # aren't listed in the XML specification
414
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
415
        message, _ = re.subn(
416
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
417
            lambda match: match.group(0).encode('unicode_escape'),
418
            message)
419
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
420
421
    def modify_handler(self, filecmd):
422
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
423
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
424
        else:
425
            data = filecmd.data
426
        self._modify_inventory(filecmd.path, filecmd.kind,
427
            filecmd.is_executable, data)
428
429
    def delete_handler(self, filecmd):
430
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
431
        try:
432
            del self.inventory[self.bzr_file_id(path)]
433
        except errors.NoSuchId:
434
            warning("ignoring delete of %s - not in inventory" % (path,))
435
        finally:
436
            try:
437
                self.cache_mgr._delete_path(path)
438
            except KeyError:
439
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
440
441
    def copy_handler(self, filecmd):
442
        raise NotImplementedError(self.copy_handler)
443
444
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
445
        old_path = filecmd.old_path
446
        new_path = filecmd.new_path
447
        file_id = self.bzr_file_id(old_path)
448
        ie = self.inventory[file_id]
449
        self.inv_delta.append((old_path, new_path, file_id, ie))
450
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
451
452
    def deleteall_handler(self, filecmd):
453
        raise NotImplementedError(self.deleteall_handler)
454
0.64.16 by Ian Clatworthy
safe processing tweaks
455
    def bzr_file_id_and_new(self, path):
456
        """Get a Bazaar file identifier and new flag for a path.
457
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
458
        :return: file_id, is_new where
459
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
460
        """
461
        try:
462
            return self.cache_mgr.file_ids[path], False
463
        except KeyError:
464
            id = generate_ids.gen_file_id(path)
465
            self.cache_mgr.file_ids[path] = id
466
            return id, True
467
0.64.5 by Ian Clatworthy
first cut at generic processing method
468
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
469
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
470
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
471
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
472
    def gen_initial_inventory(self):
473
        """Generate an inventory for a parentless revision."""
474
        inv = inventory.Inventory(revision_id=self.revision_id)
475
        return inv
476
0.64.5 by Ian Clatworthy
first cut at generic processing method
477
    def gen_revision_id(self):
478
        """Generate a revision id.
479
480
        Subclasses may override this to produce deterministic ids say.
481
        """
482
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
483
        # Perhaps 'who' being the person running the import is ok? If so,
484
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
485
        who = "%s <%s>" % (committer[0],committer[1])
486
        timestamp = committer[2]
487
        return generate_ids.gen_revision_id(who, timestamp)
488
0.64.7 by Ian Clatworthy
start of multiple commit handling
489
    def get_inventory(self, revision_id):
490
        """Get the inventory for a revision id."""
491
        try:
492
            inv = self.cache_mgr.inventories[revision_id]
493
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
494
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
495
            # Not cached so reconstruct from repository
496
            inv = self.repo.revision_tree(revision_id).inventory
497
            self.cache_mgr.inventories[revision_id] = inv
498
        return inv
499
0.64.5 by Ian Clatworthy
first cut at generic processing method
500
    def _get_inventories(self, revision_ids):
501
        """Get the inventories for revision-ids.
502
        
503
        This is a callback used by the RepositoryLoader to
504
        speed up inventory reconstruction."""
505
        present = []
506
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
507
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
508
        # successfully loaded into the repsoitory
509
        for revision_id in revision_ids:
510
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
511
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
512
                present.append(revision_id)
513
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
514
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
515
                # Not cached so reconstruct from repository
516
                if self.repo.has_revision(revision_id):
517
                    rev_tree = self.repo.revision_tree(revision_id)
518
                    present.append(revision_id)
519
                else:
520
                    rev_tree = self.repo.revision_tree(None)
521
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
522
                self.cache_mgr.inventories[revision_id] = inv
523
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
524
        return present, inventories
525
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
526
    def _get_lines(self, file_id):
527
        """Get the lines for a file-id."""
528
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
529
530
    def _modify_inventory(self, path, kind, is_executable, data):
531
        """Add to or change an item in the inventory."""
532
        # Create the new InventoryEntry
533
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
534
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
535
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
536
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
537
        if isinstance(ie, inventory.InventoryFile):
538
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
539
            lines = osutils.split_lines(data)
540
            ie.text_sha1 = osutils.sha_strings(lines)
541
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
542
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
543
        elif isinstance(ie, inventory.InventoryLnk):
544
            ie.symlink_target = data
545
        else:
546
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
547
                (kind,))
548
0.64.16 by Ian Clatworthy
safe processing tweaks
549
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
550
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
551
            # HACK: no API for this (del+add does more than it needs to)
552
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
553
        else:
554
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
556
    def _ensure_directory(self, path):
557
        """Ensure that the containing directory exists for 'path'"""
558
        dirname, basename = osutils.split(path)
559
        if dirname == '':
560
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
561
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
562
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
563
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
564
        except KeyError:
565
            # We will create this entry, since it doesn't exist
566
            pass
567
        else:
568
            return basename, ie
569
570
        # No directory existed, we will just create one, first, make sure
571
        # the parent exists
572
        dir_basename, parent_ie = self._ensure_directory(dirname)
573
        dir_file_id = self.bzr_file_id(dirname)
574
        ie = inventory.entry_factory['directory'](dir_file_id,
575
                                                  dir_basename,
576
                                                  parent_ie.file_id)
577
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
578
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
579
        # There are no lines stored for a directory so
580
        # make sure the cache used by get_lines knows that
581
        self.lines_for_commit[dir_file_id] = []
582
        #print "adding dir %s" % path
583
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
584
        return basename, ie