/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
23
    errors,
24
    generate_ids,
25
    inventory,
26
    lru_cache,
27
    osutils,
28
    revision,
29
    revisiontree,
30
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
31
from bzrlib.trace import (
32
    note,
33
    warning,
34
    )
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
from bzrlib.plugins.fastimport import (
36
    processor,
37
    revisionloader,
38
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
39
40
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
41
def _single_plural(n, single, plural):
42
    """Return a single or plural form of a noun based on number."""
43
    if n == 1:
44
        return single
45
    else:
46
        return plural
47
48
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
class GenericProcessor(processor.ImportProcessor):
50
    """An import processor that handles basic imports.
51
52
    Current features supported:
53
0.64.5 by Ian Clatworthy
first cut at generic processing method
54
    * timestamped progress reporting
55
    * blobs are cached in memory until used
56
    * TODO: commit handling
57
    * LATER: branch support
58
    * checkpoints and tags are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
60
    """
61
62
    def pre_process(self):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
        self.cache_mgr = GenericCacheManager()
0.64.7 by Ian Clatworthy
start of multiple commit handling
64
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
65
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
66
        # mapping of tag name to revision_id
67
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
68
69
    def post_process(self):
70
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
71
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
72
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
73
        # TODO - loop over the branches created/modified
74
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
75
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
76
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
77
        if self.tags:
78
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
79
        # Update the working tree, if any
80
        if self.working_tree:
81
            self.working_tree.update(delta._ChangeReporter())
82
83
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
84
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
85
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
87
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
88
    def dump_stats(self):
89
        rc = self._revision_count
90
        bc = self._branch_count
91
        tc = self._tag_count
92
        note("Imported %d %s into %d %s with %d %s.",
93
            rc, _single_plural(rc, "revision", "revisions"),
94
            bc, _single_plural(bc, "branch", "branches"),
95
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
96
97
    def blob_handler(self, cmd):
98
        """Process a BlobCommand."""
99
        if cmd.mark is not None:
100
            dataref = ":%s" % (cmd.mark,)
101
        else:
102
            dataref = osutils.sha_strings(cmd.data)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
103
        self.cache_mgr.blobs[dataref] = cmd.data
0.64.5 by Ian Clatworthy
first cut at generic processing method
104
105
    def checkpoint_handler(self, cmd):
106
        """Process a CheckpointCommand."""
107
        warning("ignoring checkpoint")
108
109
    def commit_handler(self, cmd):
110
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
111
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
112
            self.active_branch)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
113
        # For now, put a write group around every commit. In the future,
114
        # we might only start/commit one every N to sppeed things up
115
        self.repo.start_write_group()
116
        try:
117
            handler.process()
0.64.11 by Ian Clatworthy
tag support
118
            self.cache_mgr.revision_ids[cmd.ref] = handler.revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
119
            self.cache_mgr.last_revision_ids[self.active_branch] = \
120
                handler.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
121
            self._revision_count += 1
122
        except:
123
            self.repo.abort_write_group()
124
            raise
125
        else:
126
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
127
128
    def progress_handler(self, cmd):
129
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
130
        # We could use a progress bar here but timestamped messages
131
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
132
        note("%s progress %s" % (self._time_of_day(), cmd.message))
133
134
    def _time_of_day(self):
135
        """Time of day as a string."""
136
        # Note: this is a separate method so tests can patch in a fixed value
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
137
        return time.localtime().strftime("%H:%M:%s")
0.64.5 by Ian Clatworthy
first cut at generic processing method
138
139
    def reset_handler(self, cmd):
140
        """Process a ResetCommand."""
141
        warning("multiple branches are not supported yet"
142
            " - ignoring branch '%s'", cmd.ref)
143
144
    def tag_handler(self, cmd):
145
        """Process a TagCommand."""
0.64.11 by Ian Clatworthy
tag support
146
        bzr_tag_name = cmd.id.decode('utf-8', 'replace')
147
        bzr_rev_id = self.cache_mgr.revision_ids[cmd.from_]
148
        self.tags[bzr_tag_name] = bzr_rev_id
149
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
150
151
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
152
class GenericCacheManager(object):
153
    """A manager of caches for the GenericProcessor."""
154
155
    def __init__(self, inventory_cache_size=100):
156
        # dataref -> data. datref is either :mark or the sha-1.
157
        # Once a blob is used, it should be deleted from here.
158
        self.blobs = {}
159
160
        # revision-id -> Inventory cache
161
        # these are large and we probably don't need too many as
162
        # most parents are recent in history
163
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
164
165
        # directory-path -> inventory-entry lookup table
166
        # we need to keep all of these but they are small
167
        self.directory_entries = {}
168
169
        # import-ref -> revision-id lookup table
170
        # we need to keep all of these but they are small
171
        self.revision_ids = {}
172
0.64.7 by Ian Clatworthy
start of multiple commit handling
173
        # branch -> last revision-id lookup table
174
        self.last_revision_ids = {}
175
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
176
0.64.5 by Ian Clatworthy
first cut at generic processing method
177
class GenericCommitHandler(processor.CommitHandler):
178
0.64.7 by Ian Clatworthy
start of multiple commit handling
179
    def __init__(self, command, repo, cache_mgr, active_branch):
0.64.5 by Ian Clatworthy
first cut at generic processing method
180
        processor.CommitHandler.__init__(self, command)
181
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
182
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
183
        self.active_branch = active_branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
184
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
185
        self.loader = revisionloader.RevisionLoader(repo,
186
            lambda revision_ids: self._get_inventories(revision_ids))
187
188
    def pre_process_files(self):
189
        """Prepare for committing."""
190
        self.revision_id = self.gen_revision_id()
191
        self.inv_delta = []
192
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
193
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
194
195
    def post_process_files(self):
196
        """Save the revision."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
197
        if self.command.parents:
198
            parents = [self.cache_mgr.revision_ids[ref]
199
                for ref in self.command.parents]
200
        else:
201
            # if no parents are given, the last revision on
202
            # the current branch is assumed according to the spec
203
            last_rev = self.cache_mgr.last_revision_ids.get(
204
                    self.active_branch)
205
            if last_rev:
206
                parents = [last_rev]
207
            else:
208
                parents = []
209
0.64.5 by Ian Clatworthy
first cut at generic processing method
210
        # Derive the inventory from the previous one
211
        if len(parents) == 0:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
212
            new_inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
213
        else:
214
            # use the bzr_revision_id to lookup the inv cache
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
215
            new_inventory = self.get_inventory(parents[0]).copy()
0.64.5 by Ian Clatworthy
first cut at generic processing method
216
        new_inventory.apply_delta(self.inv_delta)
0.64.11 by Ian Clatworthy
tag support
217
        self.cache_mgr.inventories[self.command.ref] = new_inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
218
219
        # debug trace ...
220
        print "applied inventory delta ..."
221
        for entry in self.inv_delta:
222
            print "  %r" % (entry,)
223
        print "creating inventory ..."
224
        for entry in new_inventory:
225
            print "  %r" % (entry,)
226
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
227
        # Load the revision into the repository
228
        # TODO: Escape the commit message
229
        committer = self.command.committer
230
        who = "%s <%s>" % (committer[0],committer[1])
231
        rev = revision.Revision(self.revision_id)
232
        rev = revision.Revision(
233
           timestamp=committer[2],
234
           timezone=committer[3],
235
           committer=who,
236
           message=self.escape_commit_message(self.command.message),
237
           revision_id=self.revision_id)
238
        rev.parent_ids = parents
239
        self.loader.load(rev, new_inventory, None,
240
            lambda file_id: self._get_lines(file_id))
241
        print "loaded revision %r" % (rev,)
242
243
    def escape_commit_message(self, msg):
244
        # It's crap that we need to do this at this level (but we do)
245
        # TODO
246
        return msg
0.64.5 by Ian Clatworthy
first cut at generic processing method
247
248
    def modify_handler(self, filecmd):
249
        if filecmd.dataref is not None:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
250
            data = self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
251
            # Conserve memory, assuming blobs aren't referenced twice
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
252
            del self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
253
        else:
254
            data = filecmd.data
255
        self._modify_inventory(filecmd.path, filecmd.kind,
256
            filecmd.is_executable, data)
257
258
    def delete_handler(self, filecmd):
259
        path = filecmd.path
260
        self.inv_delta.append((path, None, self.bzr_file_id(path), None))
261
262
    def copy_handler(self, filecmd):
263
        raise NotImplementedError(self.copy_handler)
264
265
    def rename_handler(self, filecmd):
266
        # TODO: add a suitable entry to the inventory delta
267
        raise NotImplementedError(self.rename_handler)
268
269
    def deleteall_handler(self, filecmd):
270
        raise NotImplementedError(self.deleteall_handler)
271
272
    def bzr_file_id(self, path):
273
        """Generate a Bazaar file identifier for a path."""
274
        # TODO: Search the current inventory instead of generating every time
275
        return generate_ids.gen_file_id(path)
276
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
277
    def gen_initial_inventory(self):
278
        """Generate an inventory for a parentless revision."""
279
        inv = inventory.Inventory(revision_id=self.revision_id)
280
        if not self.repo.supports_rich_root():
281
            # In this repository, root entries have no knit or weave. When
282
            # serializing out to disk and back in, root.revision is always
283
            # the new revision_id.
284
            inv.root.revision = self.revision_id
285
        return inv
286
0.64.5 by Ian Clatworthy
first cut at generic processing method
287
    def gen_revision_id(self):
288
        """Generate a revision id.
289
290
        Subclasses may override this to produce deterministic ids say.
291
        """
292
        committer = self.command.committer
293
        who = "%s <%s>" % (committer[0],committer[1])
294
        timestamp = committer[2]
295
        return generate_ids.gen_revision_id(who, timestamp)
296
0.64.7 by Ian Clatworthy
start of multiple commit handling
297
    def get_inventory(self, revision_id):
298
        """Get the inventory for a revision id."""
299
        try:
300
            inv = self.cache_mgr.inventories[revision_id]
301
        except KeyError:
302
            # TODO: count misses and/or inform the user about the miss?
303
            # Not cached so reconstruct from repository
304
            inv = self.repo.revision_tree(revision_id).inventory
305
            self.cache_mgr.inventories[revision_id] = inv
306
        return inv
307
0.64.5 by Ian Clatworthy
first cut at generic processing method
308
    def _get_inventories(self, revision_ids):
309
        """Get the inventories for revision-ids.
310
        
311
        This is a callback used by the RepositoryLoader to
312
        speed up inventory reconstruction."""
313
        present = []
314
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
315
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
316
        # successfully loaded into the repsoitory
317
        for revision_id in revision_ids:
318
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
319
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
320
                present.append(revision_id)
321
            except KeyError:
322
                # TODO: count misses and/or inform the user about the miss?
323
                # Not cached so reconstruct from repository
324
                if self.repo.has_revision(revision_id):
325
                    rev_tree = self.repo.revision_tree(revision_id)
326
                    present.append(revision_id)
327
                else:
328
                    rev_tree = self.repo.revision_tree(None)
329
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
330
                self.cache_mgr.inventories[revision_id] = inv
331
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
332
        return present, inventories
333
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
334
    def _get_lines(self, file_id):
335
        """Get the lines for a file-id."""
336
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
337
338
    def _modify_inventory(self, path, kind, is_executable, data):
339
        """Add to or change an item in the inventory."""
340
        # Create the new InventoryEntry
341
        basename, parent_ie = self._ensure_directory(path)
342
        file_id = self.bzr_file_id(path)
343
        ie = inventory.make_entry(kind, basename, parent_ie, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
344
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
345
        if isinstance(ie, inventory.InventoryFile):
346
            ie.text_sha1 = osutils.sha_strings(data)
347
            ie.text_size = len(data)
348
            ie.executable = is_executable
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
349
            lines = data.split('\n')
350
            if lines[-1] == '':
351
                lines.pop()
352
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
353
        elif isinstance(ie, inventory.InventoryLnk):
354
            ie.symlink_target = data
355
        else:
356
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
357
                (kind,))
358
359
        # Record this new inventory entry. As the import stream doesn't
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
360
        # repeat all files every time, we build an inventory delta.
0.64.5 by Ian Clatworthy
first cut at generic processing method
361
        # HACK: We also assume that inventory.apply_delta handles the
362
        # 'add' case cleanly when asked to change a non-existent entry.
363
        # This saves time vs explicitly detecting add vs change.
364
        old_path = path
365
        self.inv_delta.append((old_path, path, file_id, ie))
366
367
    def _ensure_directory(self, path):
368
        """Ensure that the containing directory exists for 'path'"""
369
        dirname, basename = osutils.split(path)
370
        if dirname == '':
371
            # the root node doesn't get updated
372
            return basename, inventory.ROOT_ID
373
        try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
374
            ie = self.cache_mgr.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
375
        except KeyError:
376
            # We will create this entry, since it doesn't exist
377
            pass
378
        else:
379
            return basename, ie
380
381
        # No directory existed, we will just create one, first, make sure
382
        # the parent exists
383
        dir_basename, parent_ie = self._ensure_directory(dirname)
384
        dir_file_id = self.bzr_file_id(dirname)
385
        ie = inventory.entry_factory['directory'](dir_file_id,
386
                                                  dir_basename,
387
                                                  parent_ie.file_id)
388
        ie.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
389
        self.cache_mgr.directory_entries[dirname] = ie
0.64.5 by Ian Clatworthy
first cut at generic processing method
390
        self.inv_delta.append((None, path, dir_file_id, ie))
391
        return basename, ie