/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
29
    revision,
30
    revisiontree,
31
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
32
from bzrlib.trace import (
33
    note,
34
    warning,
35
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
36
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
37
from bzrlib.plugins.fastimport import (
38
    processor,
39
    revisionloader,
40
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
42
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
43
def _single_plural(n, single, plural):
44
    """Return a single or plural form of a noun based on number."""
45
    if n == 1:
46
        return single
47
    else:
48
        return plural
49
50
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
51
class GenericProcessor(processor.ImportProcessor):
52
    """An import processor that handles basic imports.
53
54
    Current features supported:
55
0.64.5 by Ian Clatworthy
first cut at generic processing method
56
    * timestamped progress reporting
0.64.16 by Ian Clatworthy
safe processing tweaks
57
    * blobs are cached in memory
58
    * commits are processed
59
    * tags are stored in the current branch
60
    * LATER: named branch support
61
    * checkpoints are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
62
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
63
64
    Here are the supported parameters:
65
66
    * info - name of a config file holding the analysis generated
67
      by running the --info processor (this is important for knowing
68
      what to intelligently cache)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
69
    """
70
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
71
    known_params = ['info']
72
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
73
    def pre_process(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
74
        # Load the info file, if any
75
        info_path = self.params.get('info')
76
        if info_path is not None:
77
            self.info = configobj.ConfigObj(info_path)
78
        else:
79
            self.info = None
80
81
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
0.64.7 by Ian Clatworthy
start of multiple commit handling
82
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
83
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
84
        # mapping of tag name to revision_id
85
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
86
87
    def post_process(self):
88
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
89
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
90
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
91
        # TODO - loop over the branches created/modified
92
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
93
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
94
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
95
        if self.tags:
96
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
97
        # Update the working tree, if any
98
        if self.working_tree:
99
            self.working_tree.update(delta._ChangeReporter())
100
101
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
102
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
103
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
104
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
105
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
106
    def dump_stats(self):
107
        rc = self._revision_count
108
        bc = self._branch_count
109
        tc = self._tag_count
110
        note("Imported %d %s into %d %s with %d %s.",
111
            rc, _single_plural(rc, "revision", "revisions"),
112
            bc, _single_plural(bc, "branch", "branches"),
113
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
114
115
    def blob_handler(self, cmd):
116
        """Process a BlobCommand."""
117
        if cmd.mark is not None:
118
            dataref = ":%s" % (cmd.mark,)
119
        else:
120
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
121
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
122
123
    def checkpoint_handler(self, cmd):
124
        """Process a CheckpointCommand."""
125
        warning("ignoring checkpoint")
126
127
    def commit_handler(self, cmd):
128
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
129
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.14 by Ian Clatworthy
commit of modified files working
130
            self.active_branch, self.verbose)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
131
        # For now, put a write group around every commit. In the future,
132
        # we might only start/commit one every N to sppeed things up
133
        self.repo.start_write_group()
134
        try:
135
            handler.process()
0.64.16 by Ian Clatworthy
safe processing tweaks
136
            rev_id = handler.revision_id
137
            self.cache_mgr.revision_ids[cmd.ref] = rev_id
138
            if cmd.mark is not None:
139
                self.cache_mgr.revision_ids[":" + cmd.mark] = rev_id
140
            self.cache_mgr.last_revision_ids[self.active_branch] = rev_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
141
            self._revision_count += 1
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
142
            note("%s loaded commit %d (%s)" % (self._time_of_day(),
143
                self._revision_count, cmd.mark or cmd.ref))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
144
        except:
145
            self.repo.abort_write_group()
146
            raise
147
        else:
148
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
149
150
    def progress_handler(self, cmd):
151
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
152
        # We could use a progress bar here but timestamped messages
153
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
154
        note("%s progress %s" % (self._time_of_day(), cmd.message))
155
156
    def _time_of_day(self):
157
        """Time of day as a string."""
158
        # Note: this is a separate method so tests can patch in a fixed value
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
159
        return time.strftime("%H:%M:%S")
0.64.5 by Ian Clatworthy
first cut at generic processing method
160
161
    def reset_handler(self, cmd):
162
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
163
        if cmd.ref.startswith('refs/tags/'):
164
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
165
        else:
0.64.16 by Ian Clatworthy
safe processing tweaks
166
            warning("named branches are not supported yet"
167
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
168
169
    def tag_handler(self, cmd):
170
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
171
        self._set_tag(cmd.id, cmd.from_)
172
173
    def _set_tag(self, name, from_):
174
        """Define a tag given a name an import 'from' reference."""
175
        bzr_tag_name = name.decode('utf-8', 'replace')
176
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
177
        self.tags[bzr_tag_name] = bzr_rev_id
178
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
179
180
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
181
class GenericCacheManager(object):
182
    """A manager of caches for the GenericProcessor."""
183
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
184
    def __init__(self, info, verbose=False, inventory_cache_size=10):
185
        """Create a manager of caches.
186
187
        :param info: a ConfigObj holding the output from
188
            the --info processor, or None if no hints are available
189
        """
190
        self.verbose = verbose
191
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
192
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
193
        # Sticky blobs aren't removed after being referenced.
194
        self._blobs = {}
195
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
196
197
        # revision-id -> Inventory cache
198
        # these are large and we probably don't need too many as
199
        # most parents are recent in history
200
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
201
202
        # import-ref -> revision-id lookup table
203
        # we need to keep all of these but they are small
204
        self.revision_ids = {}
205
0.64.7 by Ian Clatworthy
start of multiple commit handling
206
        # branch -> last revision-id lookup table
207
        self.last_revision_ids = {}
208
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
209
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
210
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
211
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
212
        # Work out the blobs to make sticky - None means all
213
        #print "%r" % (info,)
214
        try:
215
            self._blobs_to_keep = info['Blob usage tracking']['multi']
216
        except KeyError:
217
            # No safe choice but to do the lot
218
            self._blobs_to_keep = None
219
220
    def store_blob(self, id, data):
221
        """Store a blob of data."""
222
        if (self._blobs_to_keep is None or data == '' or
223
            id in self._blobs_to_keep):
224
            self._sticky_blobs[id] = data
225
            if self.verbose:
226
                print "making blob %s sticky" % (id,)
227
        else:
228
            self._blobs[id] = data
229
230
    def fetch_blob(self, id):
231
        """Fetch a blob of data."""
232
        try:
233
            return self._sticky_blobs[id]
234
        except KeyError:
235
            return self._blobs.pop(id)
236
0.64.16 by Ian Clatworthy
safe processing tweaks
237
    def _delete_path(self, path):
238
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
239
        # we actually want to remember what file-id we gave a path,
240
        # even when that file is deleted, so doing nothing is correct
241
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
242
243
    def _rename_path(self, old_path, new_path):
244
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
245
        # we actually want to remember what file-id we gave a path,
246
        # even when that file is renamed, so both paths should have
247
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
248
        self.file_ids[new_path] = self.file_ids[old_path]
249
250
0.64.5 by Ian Clatworthy
first cut at generic processing method
251
class GenericCommitHandler(processor.CommitHandler):
252
0.64.14 by Ian Clatworthy
commit of modified files working
253
    def __init__(self, command, repo, cache_mgr, active_branch, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
254
        processor.CommitHandler.__init__(self, command)
255
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
256
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
257
        self.active_branch = active_branch
0.64.14 by Ian Clatworthy
commit of modified files working
258
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
259
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
260
        self.loader = revisionloader.RevisionLoader(repo,
261
            lambda revision_ids: self._get_inventories(revision_ids))
262
263
    def pre_process_files(self):
264
        """Prepare for committing."""
265
        self.revision_id = self.gen_revision_id()
266
        self.inv_delta = []
267
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
268
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
269
0.64.14 by Ian Clatworthy
commit of modified files working
270
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
271
        if self.command.parents:
0.64.14 by Ian Clatworthy
commit of modified files working
272
            self.parents = [self.cache_mgr.revision_ids[ref]
0.64.7 by Ian Clatworthy
start of multiple commit handling
273
                for ref in self.command.parents]
274
        else:
275
            # if no parents are given, the last revision on
276
            # the current branch is assumed according to the spec
277
            last_rev = self.cache_mgr.last_revision_ids.get(
278
                    self.active_branch)
279
            if last_rev:
0.64.14 by Ian Clatworthy
commit of modified files working
280
                self.parents = [last_rev]
0.64.7 by Ian Clatworthy
start of multiple commit handling
281
            else:
0.64.14 by Ian Clatworthy
commit of modified files working
282
                self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
283
0.64.14 by Ian Clatworthy
commit of modified files working
284
        # Seed the inventory from the previous one
285
        if len(self.parents) == 0:
286
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
287
        else:
288
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
289
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
290
        if not self.repo.supports_rich_root():
291
            # In this repository, root entries have no knit or weave. When
292
            # serializing out to disk and back in, root.revision is always
293
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
294
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
295
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
296
        # directory-path -> inventory-entry for current inventory
297
        self.directory_entries = dict(self.inventory.directories())
298
0.64.14 by Ian Clatworthy
commit of modified files working
299
    def post_process_files(self):
300
        """Save the revision."""
301
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
302
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
303
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
304
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
305
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
306
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
307
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
308
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
309
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
310
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
311
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
312
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
313
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
314
        committer = self.command.committer
315
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
316
        author = self.command.author
317
        if author is not None:
318
            author_id = "%s <%s>" % (author[0],author[1])
319
            if author_id != who:
320
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
321
        rev = revision.Revision(
322
           timestamp=committer[2],
323
           timezone=committer[3],
324
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
325
           message=self._escape_commit_message(self.command.message),
326
           revision_id=self.revision_id,
327
           properties=rev_props,
328
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
329
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
330
            lambda file_id: self._get_lines(file_id))
331
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
332
    def _escape_commit_message(self, message):
333
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
334
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
335
        # Code copied from bzrlib.commit.
336
        
337
        # Python strings can include characters that can't be
338
        # represented in well-formed XML; escape characters that
339
        # aren't listed in the XML specification
340
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
341
        message, _ = re.subn(
342
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
343
            lambda match: match.group(0).encode('unicode_escape'),
344
            message)
345
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
346
347
    def modify_handler(self, filecmd):
348
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
349
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
350
        else:
351
            data = filecmd.data
352
        self._modify_inventory(filecmd.path, filecmd.kind,
353
            filecmd.is_executable, data)
354
355
    def delete_handler(self, filecmd):
356
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
357
        try:
358
            del self.inventory[self.bzr_file_id(path)]
359
        except errors.NoSuchId:
360
            warning("ignoring delete of %s - not in inventory" % (path,))
361
        finally:
362
            try:
363
                self.cache_mgr._delete_path(path)
364
            except KeyError:
365
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
366
367
    def copy_handler(self, filecmd):
368
        raise NotImplementedError(self.copy_handler)
369
370
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
371
        old_path = filecmd.old_path
372
        new_path = filecmd.new_path
373
        file_id = self.bzr_file_id(old_path)
374
        ie = self.inventory[file_id]
375
        self.inv_delta.append((old_path, new_path, file_id, ie))
376
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
377
378
    def deleteall_handler(self, filecmd):
379
        raise NotImplementedError(self.deleteall_handler)
380
0.64.16 by Ian Clatworthy
safe processing tweaks
381
    def bzr_file_id_and_new(self, path):
382
        """Get a Bazaar file identifier and new flag for a path.
383
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
384
        :return: file_id, is_new where
385
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
386
        """
387
        try:
388
            return self.cache_mgr.file_ids[path], False
389
        except KeyError:
390
            id = generate_ids.gen_file_id(path)
391
            self.cache_mgr.file_ids[path] = id
392
            return id, True
393
0.64.5 by Ian Clatworthy
first cut at generic processing method
394
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
395
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
396
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
397
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
398
    def gen_initial_inventory(self):
399
        """Generate an inventory for a parentless revision."""
400
        inv = inventory.Inventory(revision_id=self.revision_id)
401
        return inv
402
0.64.5 by Ian Clatworthy
first cut at generic processing method
403
    def gen_revision_id(self):
404
        """Generate a revision id.
405
406
        Subclasses may override this to produce deterministic ids say.
407
        """
408
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
409
        # Perhaps 'who' being the person running the import is ok? If so,
410
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
411
        who = "%s <%s>" % (committer[0],committer[1])
412
        timestamp = committer[2]
413
        return generate_ids.gen_revision_id(who, timestamp)
414
0.64.7 by Ian Clatworthy
start of multiple commit handling
415
    def get_inventory(self, revision_id):
416
        """Get the inventory for a revision id."""
417
        try:
418
            inv = self.cache_mgr.inventories[revision_id]
419
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
420
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
421
            # Not cached so reconstruct from repository
422
            inv = self.repo.revision_tree(revision_id).inventory
423
            self.cache_mgr.inventories[revision_id] = inv
424
        return inv
425
0.64.5 by Ian Clatworthy
first cut at generic processing method
426
    def _get_inventories(self, revision_ids):
427
        """Get the inventories for revision-ids.
428
        
429
        This is a callback used by the RepositoryLoader to
430
        speed up inventory reconstruction."""
431
        present = []
432
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
433
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
434
        # successfully loaded into the repsoitory
435
        for revision_id in revision_ids:
436
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
437
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
438
                present.append(revision_id)
439
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
440
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
441
                # Not cached so reconstruct from repository
442
                if self.repo.has_revision(revision_id):
443
                    rev_tree = self.repo.revision_tree(revision_id)
444
                    present.append(revision_id)
445
                else:
446
                    rev_tree = self.repo.revision_tree(None)
447
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
448
                self.cache_mgr.inventories[revision_id] = inv
449
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
450
        return present, inventories
451
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
452
    def _get_lines(self, file_id):
453
        """Get the lines for a file-id."""
454
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
455
456
    def _modify_inventory(self, path, kind, is_executable, data):
457
        """Add to or change an item in the inventory."""
458
        # Create the new InventoryEntry
459
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
460
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
461
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
462
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
463
        if isinstance(ie, inventory.InventoryFile):
464
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
465
            lines = osutils.split_lines(data)
466
            ie.text_sha1 = osutils.sha_strings(lines)
467
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
468
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
469
        elif isinstance(ie, inventory.InventoryLnk):
470
            ie.symlink_target = data
471
        else:
472
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
473
                (kind,))
474
0.64.16 by Ian Clatworthy
safe processing tweaks
475
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
476
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
477
            # HACK: no API for this (del+add does more than it needs to)
478
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
479
        else:
480
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
481
482
    def _ensure_directory(self, path):
483
        """Ensure that the containing directory exists for 'path'"""
484
        dirname, basename = osutils.split(path)
485
        if dirname == '':
486
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
487
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
488
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
489
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
490
        except KeyError:
491
            # We will create this entry, since it doesn't exist
492
            pass
493
        else:
494
            return basename, ie
495
496
        # No directory existed, we will just create one, first, make sure
497
        # the parent exists
498
        dir_basename, parent_ie = self._ensure_directory(dirname)
499
        dir_file_id = self.bzr_file_id(dirname)
500
        ie = inventory.entry_factory['directory'](dir_file_id,
501
                                                  dir_basename,
502
                                                  parent_ie.file_id)
503
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
504
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
505
        # There are no lines stored for a directory so
506
        # make sure the cache used by get_lines knows that
507
        self.lines_for_commit[dir_file_id] = []
508
        #print "adding dir %s" % path
509
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
510
        return basename, ie