/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
29
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
    revision,
31
    revisiontree,
32
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
33
from bzrlib.trace import (
34
    note,
35
    warning,
36
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
37
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
39
    processor,
40
    revisionloader,
41
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
42
43
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
44
def _single_plural(n, single, plural):
45
    """Return a single or plural form of a noun based on number."""
46
    if n == 1:
47
        return single
48
    else:
49
        return plural
50
51
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
class GenericProcessor(processor.ImportProcessor):
53
    """An import processor that handles basic imports.
54
55
    Current features supported:
56
0.64.5 by Ian Clatworthy
first cut at generic processing method
57
    * timestamped progress reporting
0.64.16 by Ian Clatworthy
safe processing tweaks
58
    * blobs are cached in memory
59
    * commits are processed
60
    * tags are stored in the current branch
61
    * LATER: named branch support
62
    * checkpoints are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
63
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
64
65
    Here are the supported parameters:
66
67
    * info - name of a config file holding the analysis generated
68
      by running the --info processor (this is important for knowing
69
      what to intelligently cache)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
70
    """
71
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
72
    known_params = ['info']
73
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
74
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
75
        self._start_time = time.time()
76
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
77
        # Load the info file, if any
78
        info_path = self.params.get('info')
79
        if info_path is not None:
80
            self.info = configobj.ConfigObj(info_path)
81
        else:
82
            self.info = None
83
84
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
0.64.7 by Ian Clatworthy
start of multiple commit handling
85
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
86
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
87
        # mapping of tag name to revision_id
88
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
89
0.64.25 by Ian Clatworthy
slightly better progress reporting
90
        # Prepare progress reporting
91
        if self.info is not None:
92
            self.total_commits = int(self.info['Command counts']['commit'])
93
        else:
94
            self.total_commits = None
95
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
96
    def post_process(self):
97
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
98
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
99
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
100
        # TODO - loop over the branches created/modified
101
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
102
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
103
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
104
        if self.tags:
105
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
106
        # Update the working tree, if any
107
        if self.working_tree:
108
            self.working_tree.update(delta._ChangeReporter())
109
110
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
111
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
112
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
113
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
114
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
115
    def dump_stats(self):
116
        rc = self._revision_count
117
        bc = self._branch_count
118
        tc = self._tag_count
119
        note("Imported %d %s into %d %s with %d %s.",
120
            rc, _single_plural(rc, "revision", "revisions"),
121
            bc, _single_plural(bc, "branch", "branches"),
122
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
123
124
    def blob_handler(self, cmd):
125
        """Process a BlobCommand."""
126
        if cmd.mark is not None:
127
            dataref = ":%s" % (cmd.mark,)
128
        else:
129
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
130
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
131
132
    def checkpoint_handler(self, cmd):
133
        """Process a CheckpointCommand."""
134
        warning("ignoring checkpoint")
135
136
    def commit_handler(self, cmd):
137
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
138
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.14 by Ian Clatworthy
commit of modified files working
139
            self.active_branch, self.verbose)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
140
        # For now, put a write group around every commit. In the future,
141
        # we might only start/commit one every N to sppeed things up
142
        self.repo.start_write_group()
143
        try:
144
            handler.process()
0.64.16 by Ian Clatworthy
safe processing tweaks
145
            rev_id = handler.revision_id
146
            self.cache_mgr.revision_ids[cmd.ref] = rev_id
147
            if cmd.mark is not None:
148
                self.cache_mgr.revision_ids[":" + cmd.mark] = rev_id
149
            self.cache_mgr.last_revision_ids[self.active_branch] = rev_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
150
            self._revision_count += 1
0.64.26 by Ian Clatworthy
more progress reporting tweaks
151
            self.report_progress("(:%s)" % cmd.mark)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
152
        except:
153
            self.repo.abort_write_group()
154
            raise
155
        else:
156
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
157
0.64.25 by Ian Clatworthy
slightly better progress reporting
158
    def report_progress(self, details=''):
159
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
160
        if self.verbose or self._revision_count % 10 == 0:
161
            if self.total_commits is not None:
162
                counts = "%d/%d" % (self._revision_count, self.total_commits)
163
                eta = progress.get_eta(self._start_time, self._revision_count,
164
                    self.total_commits)
165
                eta_str = '[%s] ' % progress.str_tdelta(eta)
166
            else:
167
                counts = "%d" % (self._revision_count,)
168
                eta_str = ''
169
            note("%s %s commits loaded %s%s" % (self._time_of_day(),
170
                counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
171
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
172
    def progress_handler(self, cmd):
173
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
174
        # We could use a progress bar here but timestamped messages
175
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
176
        note("%s progress %s" % (self._time_of_day(), cmd.message))
177
178
    def _time_of_day(self):
179
        """Time of day as a string."""
180
        # Note: this is a separate method so tests can patch in a fixed value
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
181
        return time.strftime("%H:%M:%S")
0.64.5 by Ian Clatworthy
first cut at generic processing method
182
183
    def reset_handler(self, cmd):
184
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
185
        if cmd.ref.startswith('refs/tags/'):
186
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
187
        else:
0.64.16 by Ian Clatworthy
safe processing tweaks
188
            warning("named branches are not supported yet"
189
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
190
191
    def tag_handler(self, cmd):
192
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
193
        self._set_tag(cmd.id, cmd.from_)
194
195
    def _set_tag(self, name, from_):
196
        """Define a tag given a name an import 'from' reference."""
197
        bzr_tag_name = name.decode('utf-8', 'replace')
198
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
199
        self.tags[bzr_tag_name] = bzr_rev_id
200
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
201
202
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
203
class GenericCacheManager(object):
204
    """A manager of caches for the GenericProcessor."""
205
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
206
    def __init__(self, info, verbose=False, inventory_cache_size=10):
207
        """Create a manager of caches.
208
209
        :param info: a ConfigObj holding the output from
210
            the --info processor, or None if no hints are available
211
        """
212
        self.verbose = verbose
213
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
214
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
215
        # Sticky blobs aren't removed after being referenced.
216
        self._blobs = {}
217
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
218
219
        # revision-id -> Inventory cache
220
        # these are large and we probably don't need too many as
221
        # most parents are recent in history
222
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
223
224
        # import-ref -> revision-id lookup table
225
        # we need to keep all of these but they are small
226
        self.revision_ids = {}
227
0.64.7 by Ian Clatworthy
start of multiple commit handling
228
        # branch -> last revision-id lookup table
229
        self.last_revision_ids = {}
230
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
231
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
232
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
233
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
234
        # Work out the blobs to make sticky - None means all
235
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
236
        self._blobs_to_keep = None
237
        if info is not None:
238
            try:
239
                self._blobs_to_keep = info['Blob usage tracking']['multi']
240
            except KeyError:
241
                # info not in file - possible when no blobs used
242
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
243
244
    def store_blob(self, id, data):
245
        """Store a blob of data."""
246
        if (self._blobs_to_keep is None or data == '' or
247
            id in self._blobs_to_keep):
248
            self._sticky_blobs[id] = data
249
            if self.verbose:
250
                print "making blob %s sticky" % (id,)
251
        else:
252
            self._blobs[id] = data
253
254
    def fetch_blob(self, id):
255
        """Fetch a blob of data."""
256
        try:
257
            return self._sticky_blobs[id]
258
        except KeyError:
259
            return self._blobs.pop(id)
260
0.64.16 by Ian Clatworthy
safe processing tweaks
261
    def _delete_path(self, path):
262
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
263
        # we actually want to remember what file-id we gave a path,
264
        # even when that file is deleted, so doing nothing is correct
265
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
266
267
    def _rename_path(self, old_path, new_path):
268
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
269
        # we actually want to remember what file-id we gave a path,
270
        # even when that file is renamed, so both paths should have
271
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
272
        self.file_ids[new_path] = self.file_ids[old_path]
273
274
0.64.5 by Ian Clatworthy
first cut at generic processing method
275
class GenericCommitHandler(processor.CommitHandler):
276
0.64.14 by Ian Clatworthy
commit of modified files working
277
    def __init__(self, command, repo, cache_mgr, active_branch, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
278
        processor.CommitHandler.__init__(self, command)
279
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
280
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
281
        self.active_branch = active_branch
0.64.14 by Ian Clatworthy
commit of modified files working
282
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
283
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
284
        self.loader = revisionloader.RevisionLoader(repo,
285
            lambda revision_ids: self._get_inventories(revision_ids))
286
287
    def pre_process_files(self):
288
        """Prepare for committing."""
289
        self.revision_id = self.gen_revision_id()
290
        self.inv_delta = []
291
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
292
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
293
0.64.14 by Ian Clatworthy
commit of modified files working
294
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
295
        if self.command.parents:
0.64.14 by Ian Clatworthy
commit of modified files working
296
            self.parents = [self.cache_mgr.revision_ids[ref]
0.64.7 by Ian Clatworthy
start of multiple commit handling
297
                for ref in self.command.parents]
298
        else:
299
            # if no parents are given, the last revision on
300
            # the current branch is assumed according to the spec
301
            last_rev = self.cache_mgr.last_revision_ids.get(
302
                    self.active_branch)
303
            if last_rev:
0.64.14 by Ian Clatworthy
commit of modified files working
304
                self.parents = [last_rev]
0.64.7 by Ian Clatworthy
start of multiple commit handling
305
            else:
0.64.14 by Ian Clatworthy
commit of modified files working
306
                self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
307
0.64.14 by Ian Clatworthy
commit of modified files working
308
        # Seed the inventory from the previous one
309
        if len(self.parents) == 0:
310
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
311
        else:
312
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
313
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
314
        if not self.repo.supports_rich_root():
315
            # In this repository, root entries have no knit or weave. When
316
            # serializing out to disk and back in, root.revision is always
317
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
318
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
319
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
320
        # directory-path -> inventory-entry for current inventory
321
        self.directory_entries = dict(self.inventory.directories())
322
0.64.14 by Ian Clatworthy
commit of modified files working
323
    def post_process_files(self):
324
        """Save the revision."""
325
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
326
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
327
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
328
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
329
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
330
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
331
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
332
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
333
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
334
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
335
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
336
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
337
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
338
        committer = self.command.committer
339
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
340
        author = self.command.author
341
        if author is not None:
342
            author_id = "%s <%s>" % (author[0],author[1])
343
            if author_id != who:
344
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
345
        rev = revision.Revision(
346
           timestamp=committer[2],
347
           timezone=committer[3],
348
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
349
           message=self._escape_commit_message(self.command.message),
350
           revision_id=self.revision_id,
351
           properties=rev_props,
352
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
353
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
354
            lambda file_id: self._get_lines(file_id))
355
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
356
    def _escape_commit_message(self, message):
357
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
358
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
359
        # Code copied from bzrlib.commit.
360
        
361
        # Python strings can include characters that can't be
362
        # represented in well-formed XML; escape characters that
363
        # aren't listed in the XML specification
364
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
365
        message, _ = re.subn(
366
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
367
            lambda match: match.group(0).encode('unicode_escape'),
368
            message)
369
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
370
371
    def modify_handler(self, filecmd):
372
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
373
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
374
        else:
375
            data = filecmd.data
376
        self._modify_inventory(filecmd.path, filecmd.kind,
377
            filecmd.is_executable, data)
378
379
    def delete_handler(self, filecmd):
380
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
381
        try:
382
            del self.inventory[self.bzr_file_id(path)]
383
        except errors.NoSuchId:
384
            warning("ignoring delete of %s - not in inventory" % (path,))
385
        finally:
386
            try:
387
                self.cache_mgr._delete_path(path)
388
            except KeyError:
389
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
390
391
    def copy_handler(self, filecmd):
392
        raise NotImplementedError(self.copy_handler)
393
394
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
395
        old_path = filecmd.old_path
396
        new_path = filecmd.new_path
397
        file_id = self.bzr_file_id(old_path)
398
        ie = self.inventory[file_id]
399
        self.inv_delta.append((old_path, new_path, file_id, ie))
400
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
401
402
    def deleteall_handler(self, filecmd):
403
        raise NotImplementedError(self.deleteall_handler)
404
0.64.16 by Ian Clatworthy
safe processing tweaks
405
    def bzr_file_id_and_new(self, path):
406
        """Get a Bazaar file identifier and new flag for a path.
407
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
408
        :return: file_id, is_new where
409
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
410
        """
411
        try:
412
            return self.cache_mgr.file_ids[path], False
413
        except KeyError:
414
            id = generate_ids.gen_file_id(path)
415
            self.cache_mgr.file_ids[path] = id
416
            return id, True
417
0.64.5 by Ian Clatworthy
first cut at generic processing method
418
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
419
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
420
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
421
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
422
    def gen_initial_inventory(self):
423
        """Generate an inventory for a parentless revision."""
424
        inv = inventory.Inventory(revision_id=self.revision_id)
425
        return inv
426
0.64.5 by Ian Clatworthy
first cut at generic processing method
427
    def gen_revision_id(self):
428
        """Generate a revision id.
429
430
        Subclasses may override this to produce deterministic ids say.
431
        """
432
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
433
        # Perhaps 'who' being the person running the import is ok? If so,
434
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
435
        who = "%s <%s>" % (committer[0],committer[1])
436
        timestamp = committer[2]
437
        return generate_ids.gen_revision_id(who, timestamp)
438
0.64.7 by Ian Clatworthy
start of multiple commit handling
439
    def get_inventory(self, revision_id):
440
        """Get the inventory for a revision id."""
441
        try:
442
            inv = self.cache_mgr.inventories[revision_id]
443
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
444
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
445
            # Not cached so reconstruct from repository
446
            inv = self.repo.revision_tree(revision_id).inventory
447
            self.cache_mgr.inventories[revision_id] = inv
448
        return inv
449
0.64.5 by Ian Clatworthy
first cut at generic processing method
450
    def _get_inventories(self, revision_ids):
451
        """Get the inventories for revision-ids.
452
        
453
        This is a callback used by the RepositoryLoader to
454
        speed up inventory reconstruction."""
455
        present = []
456
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
457
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
458
        # successfully loaded into the repsoitory
459
        for revision_id in revision_ids:
460
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
461
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
462
                present.append(revision_id)
463
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
464
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
465
                # Not cached so reconstruct from repository
466
                if self.repo.has_revision(revision_id):
467
                    rev_tree = self.repo.revision_tree(revision_id)
468
                    present.append(revision_id)
469
                else:
470
                    rev_tree = self.repo.revision_tree(None)
471
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
472
                self.cache_mgr.inventories[revision_id] = inv
473
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
474
        return present, inventories
475
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
476
    def _get_lines(self, file_id):
477
        """Get the lines for a file-id."""
478
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
479
480
    def _modify_inventory(self, path, kind, is_executable, data):
481
        """Add to or change an item in the inventory."""
482
        # Create the new InventoryEntry
483
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
484
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
485
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
486
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
487
        if isinstance(ie, inventory.InventoryFile):
488
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
489
            lines = osutils.split_lines(data)
490
            ie.text_sha1 = osutils.sha_strings(lines)
491
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
492
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
493
        elif isinstance(ie, inventory.InventoryLnk):
494
            ie.symlink_target = data
495
        else:
496
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
497
                (kind,))
498
0.64.16 by Ian Clatworthy
safe processing tweaks
499
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
500
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
501
            # HACK: no API for this (del+add does more than it needs to)
502
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
503
        else:
504
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
505
506
    def _ensure_directory(self, path):
507
        """Ensure that the containing directory exists for 'path'"""
508
        dirname, basename = osutils.split(path)
509
        if dirname == '':
510
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
511
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
512
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
513
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
514
        except KeyError:
515
            # We will create this entry, since it doesn't exist
516
            pass
517
        else:
518
            return basename, ie
519
520
        # No directory existed, we will just create one, first, make sure
521
        # the parent exists
522
        dir_basename, parent_ie = self._ensure_directory(dirname)
523
        dir_file_id = self.bzr_file_id(dirname)
524
        ie = inventory.entry_factory['directory'](dir_file_id,
525
                                                  dir_basename,
526
                                                  parent_ie.file_id)
527
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
528
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
529
        # There are no lines stored for a directory so
530
        # make sure the cache used by get_lines knows that
531
        self.lines_for_commit[dir_file_id] = []
532
        #print "adding dir %s" % path
533
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
534
        return basename, ie