/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
29
    revision,
30
    revisiontree,
31
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
32
from bzrlib.trace import (
33
    note,
34
    warning,
35
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
36
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
37
from bzrlib.plugins.fastimport import (
38
    processor,
39
    revisionloader,
40
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
41
42
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
43
def _single_plural(n, single, plural):
44
    """Return a single or plural form of a noun based on number."""
45
    if n == 1:
46
        return single
47
    else:
48
        return plural
49
50
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
51
class GenericProcessor(processor.ImportProcessor):
52
    """An import processor that handles basic imports.
53
54
    Current features supported:
55
0.64.5 by Ian Clatworthy
first cut at generic processing method
56
    * timestamped progress reporting
0.64.16 by Ian Clatworthy
safe processing tweaks
57
    * blobs are cached in memory
58
    * commits are processed
59
    * tags are stored in the current branch
60
    * LATER: named branch support
61
    * checkpoints are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
62
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
63
64
    Here are the supported parameters:
65
66
    * info - name of a config file holding the analysis generated
67
      by running the --info processor (this is important for knowing
68
      what to intelligently cache)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
69
    """
70
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
71
    known_params = ['info']
72
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
73
    def pre_process(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
74
        # Load the info file, if any
75
        info_path = self.params.get('info')
76
        if info_path is not None:
77
            self.info = configobj.ConfigObj(info_path)
78
        else:
79
            self.info = None
80
81
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
0.64.7 by Ian Clatworthy
start of multiple commit handling
82
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
83
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
84
        # mapping of tag name to revision_id
85
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
86
0.64.25 by Ian Clatworthy
slightly better progress reporting
87
        # Prepare progress reporting
88
        if self.info is not None:
89
            self.total_commits = int(self.info['Command counts']['commit'])
90
        else:
91
            self.total_commits = None
92
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
93
    def post_process(self):
94
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
95
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
96
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
97
        # TODO - loop over the branches created/modified
98
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
99
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
100
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
101
        if self.tags:
102
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
103
        # Update the working tree, if any
104
        if self.working_tree:
105
            self.working_tree.update(delta._ChangeReporter())
106
107
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
108
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
109
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
110
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
111
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
112
    def dump_stats(self):
113
        rc = self._revision_count
114
        bc = self._branch_count
115
        tc = self._tag_count
116
        note("Imported %d %s into %d %s with %d %s.",
117
            rc, _single_plural(rc, "revision", "revisions"),
118
            bc, _single_plural(bc, "branch", "branches"),
119
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
120
121
    def blob_handler(self, cmd):
122
        """Process a BlobCommand."""
123
        if cmd.mark is not None:
124
            dataref = ":%s" % (cmd.mark,)
125
        else:
126
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
127
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
128
129
    def checkpoint_handler(self, cmd):
130
        """Process a CheckpointCommand."""
131
        warning("ignoring checkpoint")
132
133
    def commit_handler(self, cmd):
134
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
135
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.14 by Ian Clatworthy
commit of modified files working
136
            self.active_branch, self.verbose)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
137
        # For now, put a write group around every commit. In the future,
138
        # we might only start/commit one every N to sppeed things up
139
        self.repo.start_write_group()
140
        try:
141
            handler.process()
0.64.16 by Ian Clatworthy
safe processing tweaks
142
            rev_id = handler.revision_id
143
            self.cache_mgr.revision_ids[cmd.ref] = rev_id
144
            if cmd.mark is not None:
145
                self.cache_mgr.revision_ids[":" + cmd.mark] = rev_id
146
            self.cache_mgr.last_revision_ids[self.active_branch] = rev_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
147
            self._revision_count += 1
0.64.25 by Ian Clatworthy
slightly better progress reporting
148
            self.report_progress("(%s)" % cmd.mark)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
149
        except:
150
            self.repo.abort_write_group()
151
            raise
152
        else:
153
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
154
0.64.25 by Ian Clatworthy
slightly better progress reporting
155
    def report_progress(self, details=''):
156
        #if self._revision_count % 10 != 0:
157
        #    return
158
        # TODO: use a progress bar with ETA enabled
159
        if self.total_commits is not None:
160
            counts = "%d/%d" % (self._revision_count, self.total_commits)
161
        else:
162
            counts = "%d" % (self._revision_count,)
163
        note("%s %s commits loaded %s" % (self._time_of_day(), counts,
164
            details))
165
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
166
    def progress_handler(self, cmd):
167
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
168
        # We could use a progress bar here but timestamped messages
169
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
170
        note("%s progress %s" % (self._time_of_day(), cmd.message))
171
172
    def _time_of_day(self):
173
        """Time of day as a string."""
174
        # Note: this is a separate method so tests can patch in a fixed value
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
175
        return time.strftime("%H:%M:%S")
0.64.5 by Ian Clatworthy
first cut at generic processing method
176
177
    def reset_handler(self, cmd):
178
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
179
        if cmd.ref.startswith('refs/tags/'):
180
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
181
        else:
0.64.16 by Ian Clatworthy
safe processing tweaks
182
            warning("named branches are not supported yet"
183
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
184
185
    def tag_handler(self, cmd):
186
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
187
        self._set_tag(cmd.id, cmd.from_)
188
189
    def _set_tag(self, name, from_):
190
        """Define a tag given a name an import 'from' reference."""
191
        bzr_tag_name = name.decode('utf-8', 'replace')
192
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
193
        self.tags[bzr_tag_name] = bzr_rev_id
194
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
195
196
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
197
class GenericCacheManager(object):
198
    """A manager of caches for the GenericProcessor."""
199
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
200
    def __init__(self, info, verbose=False, inventory_cache_size=10):
201
        """Create a manager of caches.
202
203
        :param info: a ConfigObj holding the output from
204
            the --info processor, or None if no hints are available
205
        """
206
        self.verbose = verbose
207
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
208
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
209
        # Sticky blobs aren't removed after being referenced.
210
        self._blobs = {}
211
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
212
213
        # revision-id -> Inventory cache
214
        # these are large and we probably don't need too many as
215
        # most parents are recent in history
216
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
217
218
        # import-ref -> revision-id lookup table
219
        # we need to keep all of these but they are small
220
        self.revision_ids = {}
221
0.64.7 by Ian Clatworthy
start of multiple commit handling
222
        # branch -> last revision-id lookup table
223
        self.last_revision_ids = {}
224
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
225
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
226
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
227
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
228
        # Work out the blobs to make sticky - None means all
229
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
230
        self._blobs_to_keep = None
231
        if info is not None:
232
            try:
233
                self._blobs_to_keep = info['Blob usage tracking']['multi']
234
            except KeyError:
235
                # info not in file - possible when no blobs used
236
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
237
238
    def store_blob(self, id, data):
239
        """Store a blob of data."""
240
        if (self._blobs_to_keep is None or data == '' or
241
            id in self._blobs_to_keep):
242
            self._sticky_blobs[id] = data
243
            if self.verbose:
244
                print "making blob %s sticky" % (id,)
245
        else:
246
            self._blobs[id] = data
247
248
    def fetch_blob(self, id):
249
        """Fetch a blob of data."""
250
        try:
251
            return self._sticky_blobs[id]
252
        except KeyError:
253
            return self._blobs.pop(id)
254
0.64.16 by Ian Clatworthy
safe processing tweaks
255
    def _delete_path(self, path):
256
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
257
        # we actually want to remember what file-id we gave a path,
258
        # even when that file is deleted, so doing nothing is correct
259
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
260
261
    def _rename_path(self, old_path, new_path):
262
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
263
        # we actually want to remember what file-id we gave a path,
264
        # even when that file is renamed, so both paths should have
265
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
266
        self.file_ids[new_path] = self.file_ids[old_path]
267
268
0.64.5 by Ian Clatworthy
first cut at generic processing method
269
class GenericCommitHandler(processor.CommitHandler):
270
0.64.14 by Ian Clatworthy
commit of modified files working
271
    def __init__(self, command, repo, cache_mgr, active_branch, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
272
        processor.CommitHandler.__init__(self, command)
273
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
274
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
275
        self.active_branch = active_branch
0.64.14 by Ian Clatworthy
commit of modified files working
276
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
277
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
278
        self.loader = revisionloader.RevisionLoader(repo,
279
            lambda revision_ids: self._get_inventories(revision_ids))
280
281
    def pre_process_files(self):
282
        """Prepare for committing."""
283
        self.revision_id = self.gen_revision_id()
284
        self.inv_delta = []
285
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
286
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
287
0.64.14 by Ian Clatworthy
commit of modified files working
288
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
289
        if self.command.parents:
0.64.14 by Ian Clatworthy
commit of modified files working
290
            self.parents = [self.cache_mgr.revision_ids[ref]
0.64.7 by Ian Clatworthy
start of multiple commit handling
291
                for ref in self.command.parents]
292
        else:
293
            # if no parents are given, the last revision on
294
            # the current branch is assumed according to the spec
295
            last_rev = self.cache_mgr.last_revision_ids.get(
296
                    self.active_branch)
297
            if last_rev:
0.64.14 by Ian Clatworthy
commit of modified files working
298
                self.parents = [last_rev]
0.64.7 by Ian Clatworthy
start of multiple commit handling
299
            else:
0.64.14 by Ian Clatworthy
commit of modified files working
300
                self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
301
0.64.14 by Ian Clatworthy
commit of modified files working
302
        # Seed the inventory from the previous one
303
        if len(self.parents) == 0:
304
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
305
        else:
306
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
307
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
308
        if not self.repo.supports_rich_root():
309
            # In this repository, root entries have no knit or weave. When
310
            # serializing out to disk and back in, root.revision is always
311
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
312
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
313
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
314
        # directory-path -> inventory-entry for current inventory
315
        self.directory_entries = dict(self.inventory.directories())
316
0.64.14 by Ian Clatworthy
commit of modified files working
317
    def post_process_files(self):
318
        """Save the revision."""
319
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
320
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
321
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
322
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
323
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
324
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
325
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
326
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
327
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
328
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
329
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
330
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
331
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
332
        committer = self.command.committer
333
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
334
        author = self.command.author
335
        if author is not None:
336
            author_id = "%s <%s>" % (author[0],author[1])
337
            if author_id != who:
338
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
339
        rev = revision.Revision(
340
           timestamp=committer[2],
341
           timezone=committer[3],
342
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
343
           message=self._escape_commit_message(self.command.message),
344
           revision_id=self.revision_id,
345
           properties=rev_props,
346
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
347
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
348
            lambda file_id: self._get_lines(file_id))
349
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
350
    def _escape_commit_message(self, message):
351
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
352
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
353
        # Code copied from bzrlib.commit.
354
        
355
        # Python strings can include characters that can't be
356
        # represented in well-formed XML; escape characters that
357
        # aren't listed in the XML specification
358
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
359
        message, _ = re.subn(
360
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
361
            lambda match: match.group(0).encode('unicode_escape'),
362
            message)
363
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
364
365
    def modify_handler(self, filecmd):
366
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
367
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
368
        else:
369
            data = filecmd.data
370
        self._modify_inventory(filecmd.path, filecmd.kind,
371
            filecmd.is_executable, data)
372
373
    def delete_handler(self, filecmd):
374
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
375
        try:
376
            del self.inventory[self.bzr_file_id(path)]
377
        except errors.NoSuchId:
378
            warning("ignoring delete of %s - not in inventory" % (path,))
379
        finally:
380
            try:
381
                self.cache_mgr._delete_path(path)
382
            except KeyError:
383
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
384
385
    def copy_handler(self, filecmd):
386
        raise NotImplementedError(self.copy_handler)
387
388
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
389
        old_path = filecmd.old_path
390
        new_path = filecmd.new_path
391
        file_id = self.bzr_file_id(old_path)
392
        ie = self.inventory[file_id]
393
        self.inv_delta.append((old_path, new_path, file_id, ie))
394
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
395
396
    def deleteall_handler(self, filecmd):
397
        raise NotImplementedError(self.deleteall_handler)
398
0.64.16 by Ian Clatworthy
safe processing tweaks
399
    def bzr_file_id_and_new(self, path):
400
        """Get a Bazaar file identifier and new flag for a path.
401
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
402
        :return: file_id, is_new where
403
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
404
        """
405
        try:
406
            return self.cache_mgr.file_ids[path], False
407
        except KeyError:
408
            id = generate_ids.gen_file_id(path)
409
            self.cache_mgr.file_ids[path] = id
410
            return id, True
411
0.64.5 by Ian Clatworthy
first cut at generic processing method
412
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
413
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
414
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
415
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
416
    def gen_initial_inventory(self):
417
        """Generate an inventory for a parentless revision."""
418
        inv = inventory.Inventory(revision_id=self.revision_id)
419
        return inv
420
0.64.5 by Ian Clatworthy
first cut at generic processing method
421
    def gen_revision_id(self):
422
        """Generate a revision id.
423
424
        Subclasses may override this to produce deterministic ids say.
425
        """
426
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
427
        # Perhaps 'who' being the person running the import is ok? If so,
428
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
429
        who = "%s <%s>" % (committer[0],committer[1])
430
        timestamp = committer[2]
431
        return generate_ids.gen_revision_id(who, timestamp)
432
0.64.7 by Ian Clatworthy
start of multiple commit handling
433
    def get_inventory(self, revision_id):
434
        """Get the inventory for a revision id."""
435
        try:
436
            inv = self.cache_mgr.inventories[revision_id]
437
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
438
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
439
            # Not cached so reconstruct from repository
440
            inv = self.repo.revision_tree(revision_id).inventory
441
            self.cache_mgr.inventories[revision_id] = inv
442
        return inv
443
0.64.5 by Ian Clatworthy
first cut at generic processing method
444
    def _get_inventories(self, revision_ids):
445
        """Get the inventories for revision-ids.
446
        
447
        This is a callback used by the RepositoryLoader to
448
        speed up inventory reconstruction."""
449
        present = []
450
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
451
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
452
        # successfully loaded into the repsoitory
453
        for revision_id in revision_ids:
454
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
455
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
456
                present.append(revision_id)
457
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
458
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
459
                # Not cached so reconstruct from repository
460
                if self.repo.has_revision(revision_id):
461
                    rev_tree = self.repo.revision_tree(revision_id)
462
                    present.append(revision_id)
463
                else:
464
                    rev_tree = self.repo.revision_tree(None)
465
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
466
                self.cache_mgr.inventories[revision_id] = inv
467
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
468
        return present, inventories
469
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
470
    def _get_lines(self, file_id):
471
        """Get the lines for a file-id."""
472
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
473
474
    def _modify_inventory(self, path, kind, is_executable, data):
475
        """Add to or change an item in the inventory."""
476
        # Create the new InventoryEntry
477
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
478
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
479
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
480
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
481
        if isinstance(ie, inventory.InventoryFile):
482
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
483
            lines = osutils.split_lines(data)
484
            ie.text_sha1 = osutils.sha_strings(lines)
485
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
486
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
487
        elif isinstance(ie, inventory.InventoryLnk):
488
            ie.symlink_target = data
489
        else:
490
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
491
                (kind,))
492
0.64.16 by Ian Clatworthy
safe processing tweaks
493
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
494
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
495
            # HACK: no API for this (del+add does more than it needs to)
496
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
497
        else:
498
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
499
500
    def _ensure_directory(self, path):
501
        """Ensure that the containing directory exists for 'path'"""
502
        dirname, basename = osutils.split(path)
503
        if dirname == '':
504
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
505
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
506
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
507
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
508
        except KeyError:
509
            # We will create this entry, since it doesn't exist
510
            pass
511
        else:
512
            return basename, ie
513
514
        # No directory existed, we will just create one, first, make sure
515
        # the parent exists
516
        dir_basename, parent_ie = self._ensure_directory(dirname)
517
        dir_file_id = self.bzr_file_id(dirname)
518
        ie = inventory.entry_factory['directory'](dir_file_id,
519
                                                  dir_basename,
520
                                                  parent_ie.file_id)
521
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
522
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
523
        # There are no lines stored for a directory so
524
        # make sure the cache used by get_lines knows that
525
        self.lines_for_commit[dir_file_id] = []
526
        #print "adding dir %s" % path
527
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
528
        return basename, ie