/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
29
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
    revision,
31
    revisiontree,
32
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
33
from bzrlib.trace import (
34
    note,
35
    warning,
36
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
37
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
39
    processor,
40
    revisionloader,
41
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
42
43
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
44
def _single_plural(n, single, plural):
45
    """Return a single or plural form of a noun based on number."""
46
    if n == 1:
47
        return single
48
    else:
49
        return plural
50
51
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
class GenericProcessor(processor.ImportProcessor):
53
    """An import processor that handles basic imports.
54
55
    Current features supported:
56
0.64.5 by Ian Clatworthy
first cut at generic processing method
57
    * timestamped progress reporting
0.64.16 by Ian Clatworthy
safe processing tweaks
58
    * blobs are cached in memory
59
    * commits are processed
60
    * tags are stored in the current branch
61
    * LATER: named branch support
62
    * checkpoints are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
63
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
64
65
    Here are the supported parameters:
66
67
    * info - name of a config file holding the analysis generated
68
      by running the --info processor (this is important for knowing
69
      what to intelligently cache)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
70
    """
71
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
72
    known_params = ['info']
73
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
74
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
75
        self._start_time = time.time()
76
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
77
        # Load the info file, if any
78
        info_path = self.params.get('info')
79
        if info_path is not None:
80
            self.info = configobj.ConfigObj(info_path)
81
        else:
82
            self.info = None
83
84
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
0.64.7 by Ian Clatworthy
start of multiple commit handling
85
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
86
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
87
        # mapping of tag name to revision_id
88
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
89
0.64.25 by Ian Clatworthy
slightly better progress reporting
90
        # Prepare progress reporting
91
        if self.info is not None:
92
            self.total_commits = int(self.info['Command counts']['commit'])
93
        else:
94
            self.total_commits = None
95
0.64.27 by Ian Clatworthy
1st cut at performance tuning
96
        # Create a write group. This is committed at the end of the import.
97
        # Checkpointing closes the current one and starts a new one.
98
        self.repo.start_write_group()
99
100
    def _process(self, command_iter):
101
        # if anything goes wrong, abort the write group if any
102
        try:
103
            processor.ImportProcessor._process(self, command_iter)
104
        except:
105
            if self.repo is not None and self.repo.is_in_write_group():
106
                self.repo.abort_write_group()
107
            raise
108
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
109
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
110
        # Commit the current write group.
111
        self.repo.commit_write_group()
112
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
113
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
114
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
115
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
116
        # TODO - loop over the branches created/modified
117
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
118
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
119
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
120
        if self.tags:
121
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
122
        # Update the working tree, if any
123
        if self.working_tree:
124
            self.working_tree.update(delta._ChangeReporter())
125
126
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
127
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
128
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
129
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
130
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
131
    def dump_stats(self):
132
        rc = self._revision_count
133
        bc = self._branch_count
134
        tc = self._tag_count
135
        note("Imported %d %s into %d %s with %d %s.",
136
            rc, _single_plural(rc, "revision", "revisions"),
137
            bc, _single_plural(bc, "branch", "branches"),
138
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
139
140
    def blob_handler(self, cmd):
141
        """Process a BlobCommand."""
142
        if cmd.mark is not None:
143
            dataref = ":%s" % (cmd.mark,)
144
        else:
145
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
146
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
147
148
    def checkpoint_handler(self, cmd):
149
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
150
        # Commit the current write group and start a new one
151
        self.repo.commit_write_group()
152
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
153
154
    def commit_handler(self, cmd):
155
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
156
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.27 by Ian Clatworthy
1st cut at performance tuning
157
        self.active_branch, self.verbose)
158
        handler.process()
159
        rev_id = handler.revision_id
160
        self.cache_mgr.revision_ids[cmd.ref] = rev_id
161
        if cmd.mark is not None:
162
            self.cache_mgr.revision_ids[":" + cmd.mark] = rev_id
163
        self.cache_mgr.last_revision_ids[self.active_branch] = rev_id
164
        self._revision_count += 1
165
        self.report_progress("(:%s)" % cmd.mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
166
0.64.25 by Ian Clatworthy
slightly better progress reporting
167
    def report_progress(self, details=''):
168
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
169
        if self.verbose or self._revision_count % 10 == 0:
170
            if self.total_commits is not None:
171
                counts = "%d/%d" % (self._revision_count, self.total_commits)
172
                eta = progress.get_eta(self._start_time, self._revision_count,
173
                    self.total_commits)
174
                eta_str = '[%s] ' % progress.str_tdelta(eta)
175
            else:
176
                counts = "%d" % (self._revision_count,)
177
                eta_str = ''
0.64.27 by Ian Clatworthy
1st cut at performance tuning
178
            note("%s %s commits processed %s%s" % (self._time_of_day(),
0.64.26 by Ian Clatworthy
more progress reporting tweaks
179
                counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
180
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
181
    def progress_handler(self, cmd):
182
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
183
        # We could use a progress bar here but timestamped messages
184
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
185
        note("%s progress %s" % (self._time_of_day(), cmd.message))
186
187
    def _time_of_day(self):
188
        """Time of day as a string."""
189
        # Note: this is a separate method so tests can patch in a fixed value
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
190
        return time.strftime("%H:%M:%S")
0.64.5 by Ian Clatworthy
first cut at generic processing method
191
192
    def reset_handler(self, cmd):
193
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
194
        if cmd.ref.startswith('refs/tags/'):
195
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
196
        else:
0.64.16 by Ian Clatworthy
safe processing tweaks
197
            warning("named branches are not supported yet"
198
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
199
200
    def tag_handler(self, cmd):
201
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
202
        self._set_tag(cmd.id, cmd.from_)
203
204
    def _set_tag(self, name, from_):
205
        """Define a tag given a name an import 'from' reference."""
206
        bzr_tag_name = name.decode('utf-8', 'replace')
207
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
208
        self.tags[bzr_tag_name] = bzr_rev_id
209
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
210
211
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
212
class GenericCacheManager(object):
213
    """A manager of caches for the GenericProcessor."""
214
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
215
    def __init__(self, info, verbose=False, inventory_cache_size=10):
216
        """Create a manager of caches.
217
218
        :param info: a ConfigObj holding the output from
219
            the --info processor, or None if no hints are available
220
        """
221
        self.verbose = verbose
222
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
223
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
224
        # Sticky blobs aren't removed after being referenced.
225
        self._blobs = {}
226
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
227
228
        # revision-id -> Inventory cache
229
        # these are large and we probably don't need too many as
230
        # most parents are recent in history
231
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
232
233
        # import-ref -> revision-id lookup table
234
        # we need to keep all of these but they are small
235
        self.revision_ids = {}
236
0.64.7 by Ian Clatworthy
start of multiple commit handling
237
        # branch -> last revision-id lookup table
238
        self.last_revision_ids = {}
239
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
240
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
241
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
242
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
243
        # Work out the blobs to make sticky - None means all
244
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
245
        self._blobs_to_keep = None
246
        if info is not None:
247
            try:
248
                self._blobs_to_keep = info['Blob usage tracking']['multi']
249
            except KeyError:
250
                # info not in file - possible when no blobs used
251
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
252
253
    def store_blob(self, id, data):
254
        """Store a blob of data."""
255
        if (self._blobs_to_keep is None or data == '' or
256
            id in self._blobs_to_keep):
257
            self._sticky_blobs[id] = data
258
            if self.verbose:
259
                print "making blob %s sticky" % (id,)
260
        else:
261
            self._blobs[id] = data
262
263
    def fetch_blob(self, id):
264
        """Fetch a blob of data."""
265
        try:
266
            return self._sticky_blobs[id]
267
        except KeyError:
268
            return self._blobs.pop(id)
269
0.64.16 by Ian Clatworthy
safe processing tweaks
270
    def _delete_path(self, path):
271
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
272
        # we actually want to remember what file-id we gave a path,
273
        # even when that file is deleted, so doing nothing is correct
274
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
275
276
    def _rename_path(self, old_path, new_path):
277
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
278
        # we actually want to remember what file-id we gave a path,
279
        # even when that file is renamed, so both paths should have
280
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
281
        self.file_ids[new_path] = self.file_ids[old_path]
282
283
0.64.5 by Ian Clatworthy
first cut at generic processing method
284
class GenericCommitHandler(processor.CommitHandler):
285
0.64.14 by Ian Clatworthy
commit of modified files working
286
    def __init__(self, command, repo, cache_mgr, active_branch, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
287
        processor.CommitHandler.__init__(self, command)
288
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
289
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
290
        self.active_branch = active_branch
0.64.14 by Ian Clatworthy
commit of modified files working
291
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
292
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
293
        self.loader = revisionloader.RevisionLoader(repo,
294
            lambda revision_ids: self._get_inventories(revision_ids))
295
296
    def pre_process_files(self):
297
        """Prepare for committing."""
298
        self.revision_id = self.gen_revision_id()
299
        self.inv_delta = []
300
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
301
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
302
0.64.14 by Ian Clatworthy
commit of modified files working
303
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
304
        if self.command.parents:
0.64.14 by Ian Clatworthy
commit of modified files working
305
            self.parents = [self.cache_mgr.revision_ids[ref]
0.64.7 by Ian Clatworthy
start of multiple commit handling
306
                for ref in self.command.parents]
307
        else:
308
            # if no parents are given, the last revision on
309
            # the current branch is assumed according to the spec
310
            last_rev = self.cache_mgr.last_revision_ids.get(
311
                    self.active_branch)
312
            if last_rev:
0.64.14 by Ian Clatworthy
commit of modified files working
313
                self.parents = [last_rev]
0.64.7 by Ian Clatworthy
start of multiple commit handling
314
            else:
0.64.14 by Ian Clatworthy
commit of modified files working
315
                self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
316
0.64.14 by Ian Clatworthy
commit of modified files working
317
        # Seed the inventory from the previous one
318
        if len(self.parents) == 0:
319
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
320
        else:
321
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
322
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
323
        if not self.repo.supports_rich_root():
324
            # In this repository, root entries have no knit or weave. When
325
            # serializing out to disk and back in, root.revision is always
326
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
327
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
328
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
329
        # directory-path -> inventory-entry for current inventory
330
        self.directory_entries = dict(self.inventory.directories())
331
0.64.14 by Ian Clatworthy
commit of modified files working
332
    def post_process_files(self):
333
        """Save the revision."""
334
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
335
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
336
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
337
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
338
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
339
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
340
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
341
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
342
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
343
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
344
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
345
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
346
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
347
        committer = self.command.committer
348
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
349
        author = self.command.author
350
        if author is not None:
351
            author_id = "%s <%s>" % (author[0],author[1])
352
            if author_id != who:
353
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
354
        rev = revision.Revision(
355
           timestamp=committer[2],
356
           timezone=committer[3],
357
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
358
           message=self._escape_commit_message(self.command.message),
359
           revision_id=self.revision_id,
360
           properties=rev_props,
361
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
362
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
363
            lambda file_id: self._get_lines(file_id))
364
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
365
    def _escape_commit_message(self, message):
366
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
367
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
368
        # Code copied from bzrlib.commit.
369
        
370
        # Python strings can include characters that can't be
371
        # represented in well-formed XML; escape characters that
372
        # aren't listed in the XML specification
373
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
374
        message, _ = re.subn(
375
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
376
            lambda match: match.group(0).encode('unicode_escape'),
377
            message)
378
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
379
380
    def modify_handler(self, filecmd):
381
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
382
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
383
        else:
384
            data = filecmd.data
385
        self._modify_inventory(filecmd.path, filecmd.kind,
386
            filecmd.is_executable, data)
387
388
    def delete_handler(self, filecmd):
389
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
390
        try:
391
            del self.inventory[self.bzr_file_id(path)]
392
        except errors.NoSuchId:
393
            warning("ignoring delete of %s - not in inventory" % (path,))
394
        finally:
395
            try:
396
                self.cache_mgr._delete_path(path)
397
            except KeyError:
398
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
399
400
    def copy_handler(self, filecmd):
401
        raise NotImplementedError(self.copy_handler)
402
403
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
404
        old_path = filecmd.old_path
405
        new_path = filecmd.new_path
406
        file_id = self.bzr_file_id(old_path)
407
        ie = self.inventory[file_id]
408
        self.inv_delta.append((old_path, new_path, file_id, ie))
409
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
410
411
    def deleteall_handler(self, filecmd):
412
        raise NotImplementedError(self.deleteall_handler)
413
0.64.16 by Ian Clatworthy
safe processing tweaks
414
    def bzr_file_id_and_new(self, path):
415
        """Get a Bazaar file identifier and new flag for a path.
416
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
417
        :return: file_id, is_new where
418
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
419
        """
420
        try:
421
            return self.cache_mgr.file_ids[path], False
422
        except KeyError:
423
            id = generate_ids.gen_file_id(path)
424
            self.cache_mgr.file_ids[path] = id
425
            return id, True
426
0.64.5 by Ian Clatworthy
first cut at generic processing method
427
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
428
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
429
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
430
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
431
    def gen_initial_inventory(self):
432
        """Generate an inventory for a parentless revision."""
433
        inv = inventory.Inventory(revision_id=self.revision_id)
434
        return inv
435
0.64.5 by Ian Clatworthy
first cut at generic processing method
436
    def gen_revision_id(self):
437
        """Generate a revision id.
438
439
        Subclasses may override this to produce deterministic ids say.
440
        """
441
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
442
        # Perhaps 'who' being the person running the import is ok? If so,
443
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
444
        who = "%s <%s>" % (committer[0],committer[1])
445
        timestamp = committer[2]
446
        return generate_ids.gen_revision_id(who, timestamp)
447
0.64.7 by Ian Clatworthy
start of multiple commit handling
448
    def get_inventory(self, revision_id):
449
        """Get the inventory for a revision id."""
450
        try:
451
            inv = self.cache_mgr.inventories[revision_id]
452
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
453
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
454
            # Not cached so reconstruct from repository
455
            inv = self.repo.revision_tree(revision_id).inventory
456
            self.cache_mgr.inventories[revision_id] = inv
457
        return inv
458
0.64.5 by Ian Clatworthy
first cut at generic processing method
459
    def _get_inventories(self, revision_ids):
460
        """Get the inventories for revision-ids.
461
        
462
        This is a callback used by the RepositoryLoader to
463
        speed up inventory reconstruction."""
464
        present = []
465
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
466
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
467
        # successfully loaded into the repsoitory
468
        for revision_id in revision_ids:
469
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
470
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
471
                present.append(revision_id)
472
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
473
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
474
                # Not cached so reconstruct from repository
475
                if self.repo.has_revision(revision_id):
476
                    rev_tree = self.repo.revision_tree(revision_id)
477
                    present.append(revision_id)
478
                else:
479
                    rev_tree = self.repo.revision_tree(None)
480
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
481
                self.cache_mgr.inventories[revision_id] = inv
482
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
483
        return present, inventories
484
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
485
    def _get_lines(self, file_id):
486
        """Get the lines for a file-id."""
487
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
488
489
    def _modify_inventory(self, path, kind, is_executable, data):
490
        """Add to or change an item in the inventory."""
491
        # Create the new InventoryEntry
492
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
493
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
494
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
495
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
496
        if isinstance(ie, inventory.InventoryFile):
497
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
498
            lines = osutils.split_lines(data)
499
            ie.text_sha1 = osutils.sha_strings(lines)
500
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
501
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
502
        elif isinstance(ie, inventory.InventoryLnk):
503
            ie.symlink_target = data
504
        else:
505
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
506
                (kind,))
507
0.64.16 by Ian Clatworthy
safe processing tweaks
508
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
509
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
510
            # HACK: no API for this (del+add does more than it needs to)
511
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
512
        else:
513
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
514
515
    def _ensure_directory(self, path):
516
        """Ensure that the containing directory exists for 'path'"""
517
        dirname, basename = osutils.split(path)
518
        if dirname == '':
519
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
520
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
521
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
522
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
523
        except KeyError:
524
            # We will create this entry, since it doesn't exist
525
            pass
526
        else:
527
            return basename, ie
528
529
        # No directory existed, we will just create one, first, make sure
530
        # the parent exists
531
        dir_basename, parent_ie = self._ensure_directory(dirname)
532
        dir_file_id = self.bzr_file_id(dirname)
533
        ie = inventory.entry_factory['directory'](dir_file_id,
534
                                                  dir_basename,
535
                                                  parent_ie.file_id)
536
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
537
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
538
        # There are no lines stored for a directory so
539
        # make sure the cache used by get_lines knows that
540
        self.lines_for_commit[dir_file_id] = []
541
        #print "adding dir %s" % path
542
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
543
        return basename, ie