/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
23
    errors,
24
    generate_ids,
25
    inventory,
26
    lru_cache,
27
    osutils,
28
    revision,
29
    revisiontree,
30
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
31
from bzrlib.trace import (
32
    note,
33
    warning,
34
    )
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
from bzrlib.plugins.fastimport import (
36
    processor,
37
    revisionloader,
38
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
39
40
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
41
def _single_plural(n, single, plural):
42
    """Return a single or plural form of a noun based on number."""
43
    if n == 1:
44
        return single
45
    else:
46
        return plural
47
48
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
class GenericProcessor(processor.ImportProcessor):
50
    """An import processor that handles basic imports.
51
52
    Current features supported:
53
0.64.5 by Ian Clatworthy
first cut at generic processing method
54
    * timestamped progress reporting
55
    * blobs are cached in memory until used
56
    * TODO: commit handling
57
    * LATER: branch support
58
    * checkpoints and tags are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
60
    """
61
62
    def pre_process(self):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
        self.cache_mgr = GenericCacheManager()
0.64.7 by Ian Clatworthy
start of multiple commit handling
64
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
65
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
66
        # mapping of tag name to revision_id
67
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
68
69
    def post_process(self):
70
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
71
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
72
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
73
        # TODO - loop over the branches created/modified
74
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
75
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
76
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
77
        if self.tags:
78
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
79
        # Update the working tree, if any
80
        if self.working_tree:
81
            self.working_tree.update(delta._ChangeReporter())
82
83
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
84
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
85
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
87
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
88
    def dump_stats(self):
89
        rc = self._revision_count
90
        bc = self._branch_count
91
        tc = self._tag_count
92
        note("Imported %d %s into %d %s with %d %s.",
93
            rc, _single_plural(rc, "revision", "revisions"),
94
            bc, _single_plural(bc, "branch", "branches"),
95
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
96
97
    def blob_handler(self, cmd):
98
        """Process a BlobCommand."""
99
        if cmd.mark is not None:
100
            dataref = ":%s" % (cmd.mark,)
101
        else:
102
            dataref = osutils.sha_strings(cmd.data)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
103
        self.cache_mgr.blobs[dataref] = cmd.data
0.64.5 by Ian Clatworthy
first cut at generic processing method
104
105
    def checkpoint_handler(self, cmd):
106
        """Process a CheckpointCommand."""
107
        warning("ignoring checkpoint")
108
109
    def commit_handler(self, cmd):
110
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
111
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
112
            self.active_branch)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
113
        # For now, put a write group around every commit. In the future,
114
        # we might only start/commit one every N to sppeed things up
115
        self.repo.start_write_group()
116
        try:
117
            handler.process()
0.64.11 by Ian Clatworthy
tag support
118
            self.cache_mgr.revision_ids[cmd.ref] = handler.revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
119
            self.cache_mgr.last_revision_ids[self.active_branch] = \
120
                handler.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
121
            self._revision_count += 1
122
        except:
123
            self.repo.abort_write_group()
124
            raise
125
        else:
126
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
127
128
    def progress_handler(self, cmd):
129
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
130
        # We could use a progress bar here but timestamped messages
131
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
132
        note("%s progress %s" % (self._time_of_day(), cmd.message))
133
134
    def _time_of_day(self):
135
        """Time of day as a string."""
136
        # Note: this is a separate method so tests can patch in a fixed value
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
137
        return time.localtime().strftime("%H:%M:%s")
0.64.5 by Ian Clatworthy
first cut at generic processing method
138
139
    def reset_handler(self, cmd):
140
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
141
        if cmd.ref.startswith('refs/tags/'):
142
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
143
        else:
144
            warning("multiple branches are not supported yet"
145
                " - ignoring branch '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
146
147
    def tag_handler(self, cmd):
148
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
149
        self._set_tag(cmd.id, cmd.from_)
150
151
    def _set_tag(self, name, from_):
152
        """Define a tag given a name an import 'from' reference."""
153
        bzr_tag_name = name.decode('utf-8', 'replace')
154
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
155
        self.tags[bzr_tag_name] = bzr_rev_id
156
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
157
158
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
159
class GenericCacheManager(object):
160
    """A manager of caches for the GenericProcessor."""
161
162
    def __init__(self, inventory_cache_size=100):
163
        # dataref -> data. datref is either :mark or the sha-1.
164
        # Once a blob is used, it should be deleted from here.
165
        self.blobs = {}
166
167
        # revision-id -> Inventory cache
168
        # these are large and we probably don't need too many as
169
        # most parents are recent in history
170
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
171
172
        # directory-path -> inventory-entry lookup table
173
        # we need to keep all of these but they are small
174
        self.directory_entries = {}
175
176
        # import-ref -> revision-id lookup table
177
        # we need to keep all of these but they are small
178
        self.revision_ids = {}
179
0.64.7 by Ian Clatworthy
start of multiple commit handling
180
        # branch -> last revision-id lookup table
181
        self.last_revision_ids = {}
182
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
183
0.64.5 by Ian Clatworthy
first cut at generic processing method
184
class GenericCommitHandler(processor.CommitHandler):
185
0.64.7 by Ian Clatworthy
start of multiple commit handling
186
    def __init__(self, command, repo, cache_mgr, active_branch):
0.64.5 by Ian Clatworthy
first cut at generic processing method
187
        processor.CommitHandler.__init__(self, command)
188
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
189
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
190
        self.active_branch = active_branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
191
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
192
        self.loader = revisionloader.RevisionLoader(repo,
193
            lambda revision_ids: self._get_inventories(revision_ids))
194
195
    def pre_process_files(self):
196
        """Prepare for committing."""
197
        self.revision_id = self.gen_revision_id()
198
        self.inv_delta = []
199
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
200
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
201
202
    def post_process_files(self):
203
        """Save the revision."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
204
        if self.command.parents:
205
            parents = [self.cache_mgr.revision_ids[ref]
206
                for ref in self.command.parents]
207
        else:
208
            # if no parents are given, the last revision on
209
            # the current branch is assumed according to the spec
210
            last_rev = self.cache_mgr.last_revision_ids.get(
211
                    self.active_branch)
212
            if last_rev:
213
                parents = [last_rev]
214
            else:
215
                parents = []
216
0.64.5 by Ian Clatworthy
first cut at generic processing method
217
        # Derive the inventory from the previous one
218
        if len(parents) == 0:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
219
            new_inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
220
        else:
221
            # use the bzr_revision_id to lookup the inv cache
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
222
            new_inventory = self.get_inventory(parents[0]).copy()
0.64.5 by Ian Clatworthy
first cut at generic processing method
223
        new_inventory.apply_delta(self.inv_delta)
0.64.11 by Ian Clatworthy
tag support
224
        self.cache_mgr.inventories[self.command.ref] = new_inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
225
226
        # debug trace ...
227
        print "applied inventory delta ..."
228
        for entry in self.inv_delta:
229
            print "  %r" % (entry,)
230
        print "creating inventory ..."
231
        for entry in new_inventory:
232
            print "  %r" % (entry,)
233
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
234
        # Load the revision into the repository
235
        # TODO: Escape the commit message
236
        committer = self.command.committer
237
        who = "%s <%s>" % (committer[0],committer[1])
238
        rev = revision.Revision(self.revision_id)
239
        rev = revision.Revision(
240
           timestamp=committer[2],
241
           timezone=committer[3],
242
           committer=who,
243
           message=self.escape_commit_message(self.command.message),
244
           revision_id=self.revision_id)
245
        rev.parent_ids = parents
246
        self.loader.load(rev, new_inventory, None,
247
            lambda file_id: self._get_lines(file_id))
248
        print "loaded revision %r" % (rev,)
249
250
    def escape_commit_message(self, msg):
251
        # It's crap that we need to do this at this level (but we do)
252
        # TODO
253
        return msg
0.64.5 by Ian Clatworthy
first cut at generic processing method
254
255
    def modify_handler(self, filecmd):
256
        if filecmd.dataref is not None:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
257
            data = self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
258
            # Conserve memory, assuming blobs aren't referenced twice
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
259
            del self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
260
        else:
261
            data = filecmd.data
262
        self._modify_inventory(filecmd.path, filecmd.kind,
263
            filecmd.is_executable, data)
264
265
    def delete_handler(self, filecmd):
266
        path = filecmd.path
267
        self.inv_delta.append((path, None, self.bzr_file_id(path), None))
268
269
    def copy_handler(self, filecmd):
270
        raise NotImplementedError(self.copy_handler)
271
272
    def rename_handler(self, filecmd):
273
        # TODO: add a suitable entry to the inventory delta
274
        raise NotImplementedError(self.rename_handler)
275
276
    def deleteall_handler(self, filecmd):
277
        raise NotImplementedError(self.deleteall_handler)
278
279
    def bzr_file_id(self, path):
280
        """Generate a Bazaar file identifier for a path."""
281
        # TODO: Search the current inventory instead of generating every time
282
        return generate_ids.gen_file_id(path)
283
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
284
    def gen_initial_inventory(self):
285
        """Generate an inventory for a parentless revision."""
286
        inv = inventory.Inventory(revision_id=self.revision_id)
287
        if not self.repo.supports_rich_root():
288
            # In this repository, root entries have no knit or weave. When
289
            # serializing out to disk and back in, root.revision is always
290
            # the new revision_id.
291
            inv.root.revision = self.revision_id
292
        return inv
293
0.64.5 by Ian Clatworthy
first cut at generic processing method
294
    def gen_revision_id(self):
295
        """Generate a revision id.
296
297
        Subclasses may override this to produce deterministic ids say.
298
        """
299
        committer = self.command.committer
300
        who = "%s <%s>" % (committer[0],committer[1])
301
        timestamp = committer[2]
302
        return generate_ids.gen_revision_id(who, timestamp)
303
0.64.7 by Ian Clatworthy
start of multiple commit handling
304
    def get_inventory(self, revision_id):
305
        """Get the inventory for a revision id."""
306
        try:
307
            inv = self.cache_mgr.inventories[revision_id]
308
        except KeyError:
309
            # TODO: count misses and/or inform the user about the miss?
310
            # Not cached so reconstruct from repository
311
            inv = self.repo.revision_tree(revision_id).inventory
312
            self.cache_mgr.inventories[revision_id] = inv
313
        return inv
314
0.64.5 by Ian Clatworthy
first cut at generic processing method
315
    def _get_inventories(self, revision_ids):
316
        """Get the inventories for revision-ids.
317
        
318
        This is a callback used by the RepositoryLoader to
319
        speed up inventory reconstruction."""
320
        present = []
321
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
322
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
323
        # successfully loaded into the repsoitory
324
        for revision_id in revision_ids:
325
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
326
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
327
                present.append(revision_id)
328
            except KeyError:
329
                # TODO: count misses and/or inform the user about the miss?
330
                # Not cached so reconstruct from repository
331
                if self.repo.has_revision(revision_id):
332
                    rev_tree = self.repo.revision_tree(revision_id)
333
                    present.append(revision_id)
334
                else:
335
                    rev_tree = self.repo.revision_tree(None)
336
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
337
                self.cache_mgr.inventories[revision_id] = inv
338
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
339
        return present, inventories
340
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
341
    def _get_lines(self, file_id):
342
        """Get the lines for a file-id."""
343
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
344
345
    def _modify_inventory(self, path, kind, is_executable, data):
346
        """Add to or change an item in the inventory."""
347
        # Create the new InventoryEntry
348
        basename, parent_ie = self._ensure_directory(path)
349
        file_id = self.bzr_file_id(path)
350
        ie = inventory.make_entry(kind, basename, parent_ie, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
351
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
352
        if isinstance(ie, inventory.InventoryFile):
353
            ie.text_sha1 = osutils.sha_strings(data)
354
            ie.text_size = len(data)
355
            ie.executable = is_executable
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
356
            lines = data.split('\n')
357
            if lines[-1] == '':
358
                lines.pop()
359
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
360
        elif isinstance(ie, inventory.InventoryLnk):
361
            ie.symlink_target = data
362
        else:
363
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
364
                (kind,))
365
366
        # Record this new inventory entry. As the import stream doesn't
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
367
        # repeat all files every time, we build an inventory delta.
0.64.5 by Ian Clatworthy
first cut at generic processing method
368
        # HACK: We also assume that inventory.apply_delta handles the
369
        # 'add' case cleanly when asked to change a non-existent entry.
370
        # This saves time vs explicitly detecting add vs change.
371
        old_path = path
372
        self.inv_delta.append((old_path, path, file_id, ie))
373
374
    def _ensure_directory(self, path):
375
        """Ensure that the containing directory exists for 'path'"""
376
        dirname, basename = osutils.split(path)
377
        if dirname == '':
378
            # the root node doesn't get updated
379
            return basename, inventory.ROOT_ID
380
        try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
381
            ie = self.cache_mgr.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
382
        except KeyError:
383
            # We will create this entry, since it doesn't exist
384
            pass
385
        else:
386
            return basename, ie
387
388
        # No directory existed, we will just create one, first, make sure
389
        # the parent exists
390
        dir_basename, parent_ie = self._ensure_directory(dirname)
391
        dir_file_id = self.bzr_file_id(dirname)
392
        ie = inventory.entry_factory['directory'](dir_file_id,
393
                                                  dir_basename,
394
                                                  parent_ie.file_id)
395
        ie.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
396
        self.cache_mgr.directory_entries[dirname] = ie
0.64.5 by Ian Clatworthy
first cut at generic processing method
397
        self.inv_delta.append((None, path, dir_file_id, ie))
398
        return basename, ie