/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
23
    errors,
24
    generate_ids,
25
    inventory,
26
    lru_cache,
27
    osutils,
28
    revision,
29
    revisiontree,
30
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
31
from bzrlib.trace import (
32
    note,
33
    warning,
34
    )
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
from bzrlib.plugins.fastimport import (
36
    processor,
37
    revisionloader,
38
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
39
40
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
41
def _single_plural(n, single, plural):
42
    """Return a single or plural form of a noun based on number."""
43
    if n == 1:
44
        return single
45
    else:
46
        return plural
47
48
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
class GenericProcessor(processor.ImportProcessor):
50
    """An import processor that handles basic imports.
51
52
    Current features supported:
53
0.64.5 by Ian Clatworthy
first cut at generic processing method
54
    * timestamped progress reporting
55
    * blobs are cached in memory until used
56
    * TODO: commit handling
57
    * LATER: branch support
58
    * checkpoints and tags are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
60
    """
61
62
    def pre_process(self):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
        self.cache_mgr = GenericCacheManager()
64
        self.last_reversion_id = None
65
        self.init_stats()
66
67
    def post_process(self):
68
        self.dump_stats()
69
        # Update the branch, assuming the last revision is the head
70
        note("Updating branch information ...")
71
        last_rev_id = self.last_revision_id
72
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
73
        self.branch.set_last_revision_info(revno, last_rev_id)
74
        # Update the working tree, if any
75
        if self.working_tree:
76
            self.working_tree.update(delta._ChangeReporter())
77
78
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
79
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
80
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
81
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
82
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
83
    def dump_stats(self):
84
        rc = self._revision_count
85
        bc = self._branch_count
86
        tc = self._tag_count
87
        note("Imported %d %s into %d %s with %d %s.",
88
            rc, _single_plural(rc, "revision", "revisions"),
89
            bc, _single_plural(bc, "branch", "branches"),
90
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
91
92
    def blob_handler(self, cmd):
93
        """Process a BlobCommand."""
94
        if cmd.mark is not None:
95
            dataref = ":%s" % (cmd.mark,)
96
        else:
97
            dataref = osutils.sha_strings(cmd.data)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
98
        self.cache_mgr.blobs[dataref] = cmd.data
0.64.5 by Ian Clatworthy
first cut at generic processing method
99
100
    def checkpoint_handler(self, cmd):
101
        """Process a CheckpointCommand."""
102
        warning("ignoring checkpoint")
103
104
    def commit_handler(self, cmd):
105
        """Process a CommitCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
106
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr)
107
        # For now, put a write group around every commit. In the future,
108
        # we might only start/commit one every N to sppeed things up
109
        self.repo.start_write_group()
110
        try:
111
            handler.process()
112
            self.last_revision_id = handler.revision_id
113
            self._revision_count += 1
114
        except:
115
            self.repo.abort_write_group()
116
            raise
117
        else:
118
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
119
120
    def progress_handler(self, cmd):
121
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
122
        # We could use a progress bar here but timestamped messages
123
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
124
        note("%s progress %s" % (self._time_of_day(), cmd.message))
125
126
    def _time_of_day(self):
127
        """Time of day as a string."""
128
        # Note: this is a separate method so tests can patch in a fixed value
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
129
        return time.localtime().strftime("%H:%M:%s")
0.64.5 by Ian Clatworthy
first cut at generic processing method
130
131
    def reset_handler(self, cmd):
132
        """Process a ResetCommand."""
133
        warning("multiple branches are not supported yet"
134
            " - ignoring branch '%s'", cmd.ref)
135
136
    def tag_handler(self, cmd):
137
        """Process a TagCommand."""
138
        warning("tags are not supported yet - ignoring tag '%s'", cmd.id)
139
140
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
141
class GenericCacheManager(object):
142
    """A manager of caches for the GenericProcessor."""
143
144
    def __init__(self, inventory_cache_size=100):
145
        # dataref -> data. datref is either :mark or the sha-1.
146
        # Once a blob is used, it should be deleted from here.
147
        self.blobs = {}
148
149
        # revision-id -> Inventory cache
150
        # these are large and we probably don't need too many as
151
        # most parents are recent in history
152
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
153
154
        # directory-path -> inventory-entry lookup table
155
        # we need to keep all of these but they are small
156
        self.directory_entries = {}
157
158
        # import-ref -> revision-id lookup table
159
        # we need to keep all of these but they are small
160
        self.revision_ids = {}
161
162
0.64.5 by Ian Clatworthy
first cut at generic processing method
163
class GenericCommitHandler(processor.CommitHandler):
164
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
165
    def __init__(self, command, repo, cache_mgr):
0.64.5 by Ian Clatworthy
first cut at generic processing method
166
        processor.CommitHandler.__init__(self, command)
167
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
168
        self.cache_mgr = cache_mgr
169
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
170
        self.loader = revisionloader.RevisionLoader(repo,
171
            lambda revision_ids: self._get_inventories(revision_ids))
172
173
    def pre_process_files(self):
174
        """Prepare for committing."""
175
        self.revision_id = self.gen_revision_id()
176
        self.inv_delta = []
177
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
178
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
179
180
    def post_process_files(self):
181
        """Save the revision."""
182
        # Derive the inventory from the previous one
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
183
        parents = [self.cache_mgr.revision_ids[ref]
184
            for ref in self.command.parents]
0.64.5 by Ian Clatworthy
first cut at generic processing method
185
        if len(parents) == 0:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
186
            new_inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
187
        else:
188
            # use the bzr_revision_id to lookup the inv cache
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
189
            new_inventory = self.get_inventory(parents[0]).copy()
0.64.5 by Ian Clatworthy
first cut at generic processing method
190
        new_inventory.apply_delta(self.inv_delta)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
191
        self.cache_mgr.revision_ids[self.command.ref] = new_inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
192
193
        # debug trace ...
194
        print "applied inventory delta ..."
195
        for entry in self.inv_delta:
196
            print "  %r" % (entry,)
197
        print "creating inventory ..."
198
        for entry in new_inventory:
199
            print "  %r" % (entry,)
200
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
201
        # Load the revision into the repository
202
        # TODO: Escape the commit message
203
        committer = self.command.committer
204
        who = "%s <%s>" % (committer[0],committer[1])
205
        rev = revision.Revision(self.revision_id)
206
        rev = revision.Revision(
207
           timestamp=committer[2],
208
           timezone=committer[3],
209
           committer=who,
210
           message=self.escape_commit_message(self.command.message),
211
           revision_id=self.revision_id)
212
        rev.parent_ids = parents
213
        self.loader.load(rev, new_inventory, None,
214
            lambda file_id: self._get_lines(file_id))
215
        print "loaded revision %r" % (rev,)
216
217
    def escape_commit_message(self, msg):
218
        # It's crap that we need to do this at this level (but we do)
219
        # TODO
220
        return msg
0.64.5 by Ian Clatworthy
first cut at generic processing method
221
222
    def modify_handler(self, filecmd):
223
        if filecmd.dataref is not None:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
224
            data = self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
225
            # Conserve memory, assuming blobs aren't referenced twice
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
226
            del self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
227
        else:
228
            data = filecmd.data
229
        self._modify_inventory(filecmd.path, filecmd.kind,
230
            filecmd.is_executable, data)
231
232
    def delete_handler(self, filecmd):
233
        path = filecmd.path
234
        self.inv_delta.append((path, None, self.bzr_file_id(path), None))
235
236
    def copy_handler(self, filecmd):
237
        raise NotImplementedError(self.copy_handler)
238
239
    def rename_handler(self, filecmd):
240
        # TODO: add a suitable entry to the inventory delta
241
        raise NotImplementedError(self.rename_handler)
242
243
    def deleteall_handler(self, filecmd):
244
        raise NotImplementedError(self.deleteall_handler)
245
246
    def bzr_file_id(self, path):
247
        """Generate a Bazaar file identifier for a path."""
248
        # TODO: Search the current inventory instead of generating every time
249
        return generate_ids.gen_file_id(path)
250
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
251
    def gen_initial_inventory(self):
252
        """Generate an inventory for a parentless revision."""
253
        inv = inventory.Inventory(revision_id=self.revision_id)
254
        if not self.repo.supports_rich_root():
255
            # In this repository, root entries have no knit or weave. When
256
            # serializing out to disk and back in, root.revision is always
257
            # the new revision_id.
258
            inv.root.revision = self.revision_id
259
        return inv
260
0.64.5 by Ian Clatworthy
first cut at generic processing method
261
    def gen_revision_id(self):
262
        """Generate a revision id.
263
264
        Subclasses may override this to produce deterministic ids say.
265
        """
266
        committer = self.command.committer
267
        who = "%s <%s>" % (committer[0],committer[1])
268
        timestamp = committer[2]
269
        return generate_ids.gen_revision_id(who, timestamp)
270
271
    def _get_inventories(self, revision_ids):
272
        """Get the inventories for revision-ids.
273
        
274
        This is a callback used by the RepositoryLoader to
275
        speed up inventory reconstruction."""
276
        present = []
277
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
278
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
279
        # successfully loaded into the repsoitory
280
        for revision_id in revision_ids:
281
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
282
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
283
                present.append(revision_id)
284
            except KeyError:
285
                # TODO: count misses and/or inform the user about the miss?
286
                # Not cached so reconstruct from repository
287
                if self.repo.has_revision(revision_id):
288
                    rev_tree = self.repo.revision_tree(revision_id)
289
                    present.append(revision_id)
290
                else:
291
                    rev_tree = self.repo.revision_tree(None)
292
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
293
                self.cache_mgr.inventories[revision_id] = inv
294
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
295
        return present, inventories
296
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
297
    def _get_lines(self, file_id):
298
        """Get the lines for a file-id."""
299
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
300
301
    def _modify_inventory(self, path, kind, is_executable, data):
302
        """Add to or change an item in the inventory."""
303
        # Create the new InventoryEntry
304
        basename, parent_ie = self._ensure_directory(path)
305
        file_id = self.bzr_file_id(path)
306
        ie = inventory.make_entry(kind, basename, parent_ie, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
308
        if isinstance(ie, inventory.InventoryFile):
309
            ie.text_sha1 = osutils.sha_strings(data)
310
            ie.text_size = len(data)
311
            ie.executable = is_executable
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
312
            lines = data.split('\n')
313
            if lines[-1] == '':
314
                lines.pop()
315
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
316
        elif isinstance(ie, inventory.InventoryLnk):
317
            ie.symlink_target = data
318
        else:
319
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
320
                (kind,))
321
322
        # Record this new inventory entry. As the import stream doesn't
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
323
        # repeat all files every time, we build an inventory delta.
0.64.5 by Ian Clatworthy
first cut at generic processing method
324
        # HACK: We also assume that inventory.apply_delta handles the
325
        # 'add' case cleanly when asked to change a non-existent entry.
326
        # This saves time vs explicitly detecting add vs change.
327
        old_path = path
328
        self.inv_delta.append((old_path, path, file_id, ie))
329
330
    def _ensure_directory(self, path):
331
        """Ensure that the containing directory exists for 'path'"""
332
        dirname, basename = osutils.split(path)
333
        if dirname == '':
334
            # the root node doesn't get updated
335
            return basename, inventory.ROOT_ID
336
        try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
337
            ie = self.cache_mgr.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
338
        except KeyError:
339
            # We will create this entry, since it doesn't exist
340
            pass
341
        else:
342
            return basename, ie
343
344
        # No directory existed, we will just create one, first, make sure
345
        # the parent exists
346
        dir_basename, parent_ie = self._ensure_directory(dirname)
347
        dir_file_id = self.bzr_file_id(dirname)
348
        ie = inventory.entry_factory['directory'](dir_file_id,
349
                                                  dir_basename,
350
                                                  parent_ie.file_id)
351
        ie.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
352
        self.cache_mgr.directory_entries[dirname] = ie
0.64.5 by Ian Clatworthy
first cut at generic processing method
353
        self.inv_delta.append((None, path, dir_file_id, ie))
354
        return basename, ie