/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
23
    errors,
24
    generate_ids,
25
    inventory,
26
    lru_cache,
27
    osutils,
28
    revision,
29
    revisiontree,
30
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
31
from bzrlib.trace import (
32
    note,
33
    warning,
34
    )
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
from bzrlib.plugins.fastimport import (
36
    processor,
37
    revisionloader,
38
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
39
40
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
41
def _single_plural(n, single, plural):
42
    """Return a single or plural form of a noun based on number."""
43
    if n == 1:
44
        return single
45
    else:
46
        return plural
47
48
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
class GenericProcessor(processor.ImportProcessor):
50
    """An import processor that handles basic imports.
51
52
    Current features supported:
53
0.64.5 by Ian Clatworthy
first cut at generic processing method
54
    * timestamped progress reporting
55
    * blobs are cached in memory until used
56
    * TODO: commit handling
57
    * LATER: branch support
58
    * checkpoints and tags are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
60
    """
61
62
    def pre_process(self):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
        self.cache_mgr = GenericCacheManager()
0.64.7 by Ian Clatworthy
start of multiple commit handling
64
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
65
        self.init_stats()
66
67
    def post_process(self):
68
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
69
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
70
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
71
        # TODO - loop over the branches created/modified
72
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
73
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
74
        self.branch.set_last_revision_info(revno, last_rev_id)
75
        # Update the working tree, if any
76
        if self.working_tree:
77
            self.working_tree.update(delta._ChangeReporter())
78
79
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
80
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
81
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
82
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
83
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
84
    def dump_stats(self):
85
        rc = self._revision_count
86
        bc = self._branch_count
87
        tc = self._tag_count
88
        note("Imported %d %s into %d %s with %d %s.",
89
            rc, _single_plural(rc, "revision", "revisions"),
90
            bc, _single_plural(bc, "branch", "branches"),
91
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
92
93
    def blob_handler(self, cmd):
94
        """Process a BlobCommand."""
95
        if cmd.mark is not None:
96
            dataref = ":%s" % (cmd.mark,)
97
        else:
98
            dataref = osutils.sha_strings(cmd.data)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
99
        self.cache_mgr.blobs[dataref] = cmd.data
0.64.5 by Ian Clatworthy
first cut at generic processing method
100
101
    def checkpoint_handler(self, cmd):
102
        """Process a CheckpointCommand."""
103
        warning("ignoring checkpoint")
104
105
    def commit_handler(self, cmd):
106
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
107
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
108
            self.active_branch)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
109
        # For now, put a write group around every commit. In the future,
110
        # we might only start/commit one every N to sppeed things up
111
        self.repo.start_write_group()
112
        try:
113
            handler.process()
0.64.7 by Ian Clatworthy
start of multiple commit handling
114
            self.cache_mgr.last_revision_ids[self.active_branch] = \
115
                handler.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
116
            self._revision_count += 1
117
        except:
118
            self.repo.abort_write_group()
119
            raise
120
        else:
121
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
122
123
    def progress_handler(self, cmd):
124
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
125
        # We could use a progress bar here but timestamped messages
126
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
127
        note("%s progress %s" % (self._time_of_day(), cmd.message))
128
129
    def _time_of_day(self):
130
        """Time of day as a string."""
131
        # Note: this is a separate method so tests can patch in a fixed value
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
132
        return time.localtime().strftime("%H:%M:%s")
0.64.5 by Ian Clatworthy
first cut at generic processing method
133
134
    def reset_handler(self, cmd):
135
        """Process a ResetCommand."""
136
        warning("multiple branches are not supported yet"
137
            " - ignoring branch '%s'", cmd.ref)
138
139
    def tag_handler(self, cmd):
140
        """Process a TagCommand."""
141
        warning("tags are not supported yet - ignoring tag '%s'", cmd.id)
142
143
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
144
class GenericCacheManager(object):
145
    """A manager of caches for the GenericProcessor."""
146
147
    def __init__(self, inventory_cache_size=100):
148
        # dataref -> data. datref is either :mark or the sha-1.
149
        # Once a blob is used, it should be deleted from here.
150
        self.blobs = {}
151
152
        # revision-id -> Inventory cache
153
        # these are large and we probably don't need too many as
154
        # most parents are recent in history
155
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
156
157
        # directory-path -> inventory-entry lookup table
158
        # we need to keep all of these but they are small
159
        self.directory_entries = {}
160
161
        # import-ref -> revision-id lookup table
162
        # we need to keep all of these but they are small
163
        self.revision_ids = {}
164
0.64.7 by Ian Clatworthy
start of multiple commit handling
165
        # branch -> last revision-id lookup table
166
        self.last_revision_ids = {}
167
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
168
0.64.5 by Ian Clatworthy
first cut at generic processing method
169
class GenericCommitHandler(processor.CommitHandler):
170
0.64.7 by Ian Clatworthy
start of multiple commit handling
171
    def __init__(self, command, repo, cache_mgr, active_branch):
0.64.5 by Ian Clatworthy
first cut at generic processing method
172
        processor.CommitHandler.__init__(self, command)
173
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
174
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
175
        self.active_branch = active_branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
176
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
177
        self.loader = revisionloader.RevisionLoader(repo,
178
            lambda revision_ids: self._get_inventories(revision_ids))
179
180
    def pre_process_files(self):
181
        """Prepare for committing."""
182
        self.revision_id = self.gen_revision_id()
183
        self.inv_delta = []
184
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
185
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
186
187
    def post_process_files(self):
188
        """Save the revision."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
189
        if self.command.parents:
190
            parents = [self.cache_mgr.revision_ids[ref]
191
                for ref in self.command.parents]
192
        else:
193
            # if no parents are given, the last revision on
194
            # the current branch is assumed according to the spec
195
            last_rev = self.cache_mgr.last_revision_ids.get(
196
                    self.active_branch)
197
            if last_rev:
198
                parents = [last_rev]
199
            else:
200
                parents = []
201
0.64.5 by Ian Clatworthy
first cut at generic processing method
202
        # Derive the inventory from the previous one
203
        if len(parents) == 0:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
204
            new_inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
205
        else:
206
            # use the bzr_revision_id to lookup the inv cache
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
207
            new_inventory = self.get_inventory(parents[0]).copy()
0.64.5 by Ian Clatworthy
first cut at generic processing method
208
        new_inventory.apply_delta(self.inv_delta)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
209
        self.cache_mgr.revision_ids[self.command.ref] = new_inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
210
211
        # debug trace ...
212
        print "applied inventory delta ..."
213
        for entry in self.inv_delta:
214
            print "  %r" % (entry,)
215
        print "creating inventory ..."
216
        for entry in new_inventory:
217
            print "  %r" % (entry,)
218
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
219
        # Load the revision into the repository
220
        # TODO: Escape the commit message
221
        committer = self.command.committer
222
        who = "%s <%s>" % (committer[0],committer[1])
223
        rev = revision.Revision(self.revision_id)
224
        rev = revision.Revision(
225
           timestamp=committer[2],
226
           timezone=committer[3],
227
           committer=who,
228
           message=self.escape_commit_message(self.command.message),
229
           revision_id=self.revision_id)
230
        rev.parent_ids = parents
231
        self.loader.load(rev, new_inventory, None,
232
            lambda file_id: self._get_lines(file_id))
233
        print "loaded revision %r" % (rev,)
234
235
    def escape_commit_message(self, msg):
236
        # It's crap that we need to do this at this level (but we do)
237
        # TODO
238
        return msg
0.64.5 by Ian Clatworthy
first cut at generic processing method
239
240
    def modify_handler(self, filecmd):
241
        if filecmd.dataref is not None:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
242
            data = self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
243
            # Conserve memory, assuming blobs aren't referenced twice
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
244
            del self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
245
        else:
246
            data = filecmd.data
247
        self._modify_inventory(filecmd.path, filecmd.kind,
248
            filecmd.is_executable, data)
249
250
    def delete_handler(self, filecmd):
251
        path = filecmd.path
252
        self.inv_delta.append((path, None, self.bzr_file_id(path), None))
253
254
    def copy_handler(self, filecmd):
255
        raise NotImplementedError(self.copy_handler)
256
257
    def rename_handler(self, filecmd):
258
        # TODO: add a suitable entry to the inventory delta
259
        raise NotImplementedError(self.rename_handler)
260
261
    def deleteall_handler(self, filecmd):
262
        raise NotImplementedError(self.deleteall_handler)
263
264
    def bzr_file_id(self, path):
265
        """Generate a Bazaar file identifier for a path."""
266
        # TODO: Search the current inventory instead of generating every time
267
        return generate_ids.gen_file_id(path)
268
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
269
    def gen_initial_inventory(self):
270
        """Generate an inventory for a parentless revision."""
271
        inv = inventory.Inventory(revision_id=self.revision_id)
272
        if not self.repo.supports_rich_root():
273
            # In this repository, root entries have no knit or weave. When
274
            # serializing out to disk and back in, root.revision is always
275
            # the new revision_id.
276
            inv.root.revision = self.revision_id
277
        return inv
278
0.64.5 by Ian Clatworthy
first cut at generic processing method
279
    def gen_revision_id(self):
280
        """Generate a revision id.
281
282
        Subclasses may override this to produce deterministic ids say.
283
        """
284
        committer = self.command.committer
285
        who = "%s <%s>" % (committer[0],committer[1])
286
        timestamp = committer[2]
287
        return generate_ids.gen_revision_id(who, timestamp)
288
0.64.7 by Ian Clatworthy
start of multiple commit handling
289
    def get_inventory(self, revision_id):
290
        """Get the inventory for a revision id."""
291
        try:
292
            inv = self.cache_mgr.inventories[revision_id]
293
        except KeyError:
294
            # TODO: count misses and/or inform the user about the miss?
295
            # Not cached so reconstruct from repository
296
            inv = self.repo.revision_tree(revision_id).inventory
297
            self.cache_mgr.inventories[revision_id] = inv
298
        return inv
299
0.64.5 by Ian Clatworthy
first cut at generic processing method
300
    def _get_inventories(self, revision_ids):
301
        """Get the inventories for revision-ids.
302
        
303
        This is a callback used by the RepositoryLoader to
304
        speed up inventory reconstruction."""
305
        present = []
306
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
308
        # successfully loaded into the repsoitory
309
        for revision_id in revision_ids:
310
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
311
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
312
                present.append(revision_id)
313
            except KeyError:
314
                # TODO: count misses and/or inform the user about the miss?
315
                # Not cached so reconstruct from repository
316
                if self.repo.has_revision(revision_id):
317
                    rev_tree = self.repo.revision_tree(revision_id)
318
                    present.append(revision_id)
319
                else:
320
                    rev_tree = self.repo.revision_tree(None)
321
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
322
                self.cache_mgr.inventories[revision_id] = inv
323
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
324
        return present, inventories
325
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
326
    def _get_lines(self, file_id):
327
        """Get the lines for a file-id."""
328
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
329
330
    def _modify_inventory(self, path, kind, is_executable, data):
331
        """Add to or change an item in the inventory."""
332
        # Create the new InventoryEntry
333
        basename, parent_ie = self._ensure_directory(path)
334
        file_id = self.bzr_file_id(path)
335
        ie = inventory.make_entry(kind, basename, parent_ie, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
336
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
337
        if isinstance(ie, inventory.InventoryFile):
338
            ie.text_sha1 = osutils.sha_strings(data)
339
            ie.text_size = len(data)
340
            ie.executable = is_executable
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
341
            lines = data.split('\n')
342
            if lines[-1] == '':
343
                lines.pop()
344
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
345
        elif isinstance(ie, inventory.InventoryLnk):
346
            ie.symlink_target = data
347
        else:
348
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
349
                (kind,))
350
351
        # Record this new inventory entry. As the import stream doesn't
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
352
        # repeat all files every time, we build an inventory delta.
0.64.5 by Ian Clatworthy
first cut at generic processing method
353
        # HACK: We also assume that inventory.apply_delta handles the
354
        # 'add' case cleanly when asked to change a non-existent entry.
355
        # This saves time vs explicitly detecting add vs change.
356
        old_path = path
357
        self.inv_delta.append((old_path, path, file_id, ie))
358
359
    def _ensure_directory(self, path):
360
        """Ensure that the containing directory exists for 'path'"""
361
        dirname, basename = osutils.split(path)
362
        if dirname == '':
363
            # the root node doesn't get updated
364
            return basename, inventory.ROOT_ID
365
        try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
366
            ie = self.cache_mgr.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
367
        except KeyError:
368
            # We will create this entry, since it doesn't exist
369
            pass
370
        else:
371
            return basename, ie
372
373
        # No directory existed, we will just create one, first, make sure
374
        # the parent exists
375
        dir_basename, parent_ie = self._ensure_directory(dirname)
376
        dir_file_id = self.bzr_file_id(dirname)
377
        ie = inventory.entry_factory['directory'](dir_file_id,
378
                                                  dir_basename,
379
                                                  parent_ie.file_id)
380
        ie.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
381
        self.cache_mgr.directory_entries[dirname] = ie
0.64.5 by Ian Clatworthy
first cut at generic processing method
382
        self.inv_delta.append((None, path, dir_file_id, ie))
383
        return basename, ie