/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
23
    errors,
24
    generate_ids,
25
    inventory,
26
    lru_cache,
27
    osutils,
28
    revision,
29
    revisiontree,
30
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
31
from bzrlib.trace import (
32
    note,
33
    warning,
34
    )
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
from bzrlib.plugins.fastimport import (
36
    processor,
37
    revisionloader,
38
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
39
40
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
41
def _single_plural(n, single, plural):
42
    """Return a single or plural form of a noun based on number."""
43
    if n == 1:
44
        return single
45
    else:
46
        return plural
47
48
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
class GenericProcessor(processor.ImportProcessor):
50
    """An import processor that handles basic imports.
51
52
    Current features supported:
53
0.64.5 by Ian Clatworthy
first cut at generic processing method
54
    * timestamped progress reporting
55
    * blobs are cached in memory until used
56
    * TODO: commit handling
57
    * LATER: branch support
58
    * checkpoints and tags are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
60
    """
61
62
    def pre_process(self):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
        self.cache_mgr = GenericCacheManager()
0.64.7 by Ian Clatworthy
start of multiple commit handling
64
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
65
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
66
        # mapping of tag name to revision_id
67
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
68
69
    def post_process(self):
70
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
71
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
72
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
73
        # TODO - loop over the branches created/modified
74
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
75
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
76
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
77
        if self.tags:
78
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
79
        # Update the working tree, if any
80
        if self.working_tree:
81
            self.working_tree.update(delta._ChangeReporter())
82
83
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
84
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
85
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
87
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
88
    def dump_stats(self):
89
        rc = self._revision_count
90
        bc = self._branch_count
91
        tc = self._tag_count
92
        note("Imported %d %s into %d %s with %d %s.",
93
            rc, _single_plural(rc, "revision", "revisions"),
94
            bc, _single_plural(bc, "branch", "branches"),
95
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
96
97
    def blob_handler(self, cmd):
98
        """Process a BlobCommand."""
99
        if cmd.mark is not None:
100
            dataref = ":%s" % (cmd.mark,)
101
        else:
102
            dataref = osutils.sha_strings(cmd.data)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
103
        self.cache_mgr.blobs[dataref] = cmd.data
0.64.5 by Ian Clatworthy
first cut at generic processing method
104
105
    def checkpoint_handler(self, cmd):
106
        """Process a CheckpointCommand."""
107
        warning("ignoring checkpoint")
108
109
    def commit_handler(self, cmd):
110
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
111
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
112
            self.active_branch)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
113
        # For now, put a write group around every commit. In the future,
114
        # we might only start/commit one every N to sppeed things up
115
        self.repo.start_write_group()
116
        try:
117
            handler.process()
0.64.11 by Ian Clatworthy
tag support
118
            self.cache_mgr.revision_ids[cmd.ref] = handler.revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
119
            self.cache_mgr.last_revision_ids[self.active_branch] = \
120
                handler.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
121
            self._revision_count += 1
122
        except:
123
            self.repo.abort_write_group()
124
            raise
125
        else:
126
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
127
128
    def progress_handler(self, cmd):
129
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
130
        # We could use a progress bar here but timestamped messages
131
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
132
        note("%s progress %s" % (self._time_of_day(), cmd.message))
133
134
    def _time_of_day(self):
135
        """Time of day as a string."""
136
        # Note: this is a separate method so tests can patch in a fixed value
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
137
        return time.localtime().strftime("%H:%M:%s")
0.64.5 by Ian Clatworthy
first cut at generic processing method
138
139
    def reset_handler(self, cmd):
140
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
141
        if cmd.ref.startswith('refs/tags/'):
142
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
143
        else:
144
            warning("multiple branches are not supported yet"
145
                " - ignoring branch '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
146
147
    def tag_handler(self, cmd):
148
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
149
        self._set_tag(cmd.id, cmd.from_)
150
151
    def _set_tag(self, name, from_):
152
        """Define a tag given a name an import 'from' reference."""
153
        bzr_tag_name = name.decode('utf-8', 'replace')
154
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
155
        self.tags[bzr_tag_name] = bzr_rev_id
156
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
157
158
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
159
class GenericCacheManager(object):
160
    """A manager of caches for the GenericProcessor."""
161
162
    def __init__(self, inventory_cache_size=100):
163
        # dataref -> data. datref is either :mark or the sha-1.
164
        # Once a blob is used, it should be deleted from here.
165
        self.blobs = {}
166
167
        # revision-id -> Inventory cache
168
        # these are large and we probably don't need too many as
169
        # most parents are recent in history
170
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
171
172
        # directory-path -> inventory-entry lookup table
173
        # we need to keep all of these but they are small
174
        self.directory_entries = {}
175
176
        # import-ref -> revision-id lookup table
177
        # we need to keep all of these but they are small
178
        self.revision_ids = {}
179
0.64.7 by Ian Clatworthy
start of multiple commit handling
180
        # branch -> last revision-id lookup table
181
        self.last_revision_ids = {}
182
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
183
0.64.5 by Ian Clatworthy
first cut at generic processing method
184
class GenericCommitHandler(processor.CommitHandler):
185
0.64.7 by Ian Clatworthy
start of multiple commit handling
186
    def __init__(self, command, repo, cache_mgr, active_branch):
0.64.5 by Ian Clatworthy
first cut at generic processing method
187
        processor.CommitHandler.__init__(self, command)
188
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
189
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
190
        self.active_branch = active_branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
191
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
192
        self.loader = revisionloader.RevisionLoader(repo,
193
            lambda revision_ids: self._get_inventories(revision_ids))
194
195
    def pre_process_files(self):
196
        """Prepare for committing."""
197
        self.revision_id = self.gen_revision_id()
198
        self.inv_delta = []
199
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
200
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
201
202
    def post_process_files(self):
203
        """Save the revision."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
204
        if self.command.parents:
205
            parents = [self.cache_mgr.revision_ids[ref]
206
                for ref in self.command.parents]
207
        else:
208
            # if no parents are given, the last revision on
209
            # the current branch is assumed according to the spec
210
            last_rev = self.cache_mgr.last_revision_ids.get(
211
                    self.active_branch)
212
            if last_rev:
213
                parents = [last_rev]
214
            else:
215
                parents = []
216
0.64.5 by Ian Clatworthy
first cut at generic processing method
217
        # Derive the inventory from the previous one
218
        if len(parents) == 0:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
219
            new_inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
220
        else:
221
            # use the bzr_revision_id to lookup the inv cache
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
222
            new_inventory = self.get_inventory(parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
223
        if not self.repo.supports_rich_root():
224
            # In this repository, root entries have no knit or weave. When
225
            # serializing out to disk and back in, root.revision is always
226
            # the new revision_id.
227
            new_inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
228
        new_inventory.apply_delta(self.inv_delta)
0.64.11 by Ian Clatworthy
tag support
229
        self.cache_mgr.inventories[self.command.ref] = new_inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
230
231
        # debug trace ...
232
        print "applied inventory delta ..."
233
        for entry in self.inv_delta:
234
            print "  %r" % (entry,)
235
        print "creating inventory ..."
236
        for entry in new_inventory:
237
            print "  %r" % (entry,)
238
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
239
        # Load the revision into the repository
240
        # TODO: Escape the commit message
241
        committer = self.command.committer
242
        who = "%s <%s>" % (committer[0],committer[1])
243
        rev = revision.Revision(self.revision_id)
244
        rev = revision.Revision(
245
           timestamp=committer[2],
246
           timezone=committer[3],
247
           committer=who,
248
           message=self.escape_commit_message(self.command.message),
249
           revision_id=self.revision_id)
250
        rev.parent_ids = parents
251
        self.loader.load(rev, new_inventory, None,
252
            lambda file_id: self._get_lines(file_id))
253
        print "loaded revision %r" % (rev,)
254
255
    def escape_commit_message(self, msg):
256
        # It's crap that we need to do this at this level (but we do)
257
        # TODO
258
        return msg
0.64.5 by Ian Clatworthy
first cut at generic processing method
259
260
    def modify_handler(self, filecmd):
261
        if filecmd.dataref is not None:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
262
            data = self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
263
            # Conserve memory, assuming blobs aren't referenced twice
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
264
            del self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
265
        else:
266
            data = filecmd.data
267
        self._modify_inventory(filecmd.path, filecmd.kind,
268
            filecmd.is_executable, data)
269
270
    def delete_handler(self, filecmd):
271
        path = filecmd.path
272
        self.inv_delta.append((path, None, self.bzr_file_id(path), None))
273
274
    def copy_handler(self, filecmd):
275
        raise NotImplementedError(self.copy_handler)
276
277
    def rename_handler(self, filecmd):
278
        # TODO: add a suitable entry to the inventory delta
279
        raise NotImplementedError(self.rename_handler)
280
281
    def deleteall_handler(self, filecmd):
282
        raise NotImplementedError(self.deleteall_handler)
283
284
    def bzr_file_id(self, path):
285
        """Generate a Bazaar file identifier for a path."""
286
        # TODO: Search the current inventory instead of generating every time
287
        return generate_ids.gen_file_id(path)
288
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
289
    def gen_initial_inventory(self):
290
        """Generate an inventory for a parentless revision."""
291
        inv = inventory.Inventory(revision_id=self.revision_id)
292
        return inv
293
0.64.5 by Ian Clatworthy
first cut at generic processing method
294
    def gen_revision_id(self):
295
        """Generate a revision id.
296
297
        Subclasses may override this to produce deterministic ids say.
298
        """
299
        committer = self.command.committer
300
        who = "%s <%s>" % (committer[0],committer[1])
301
        timestamp = committer[2]
302
        return generate_ids.gen_revision_id(who, timestamp)
303
0.64.7 by Ian Clatworthy
start of multiple commit handling
304
    def get_inventory(self, revision_id):
305
        """Get the inventory for a revision id."""
306
        try:
307
            inv = self.cache_mgr.inventories[revision_id]
308
        except KeyError:
309
            # TODO: count misses and/or inform the user about the miss?
310
            # Not cached so reconstruct from repository
311
            inv = self.repo.revision_tree(revision_id).inventory
312
            self.cache_mgr.inventories[revision_id] = inv
313
        return inv
314
0.64.5 by Ian Clatworthy
first cut at generic processing method
315
    def _get_inventories(self, revision_ids):
316
        """Get the inventories for revision-ids.
317
        
318
        This is a callback used by the RepositoryLoader to
319
        speed up inventory reconstruction."""
320
        present = []
321
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
322
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
323
        # successfully loaded into the repsoitory
324
        for revision_id in revision_ids:
325
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
326
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
327
                present.append(revision_id)
328
            except KeyError:
329
                # TODO: count misses and/or inform the user about the miss?
330
                # Not cached so reconstruct from repository
331
                if self.repo.has_revision(revision_id):
332
                    rev_tree = self.repo.revision_tree(revision_id)
333
                    present.append(revision_id)
334
                else:
335
                    rev_tree = self.repo.revision_tree(None)
336
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
337
                self.cache_mgr.inventories[revision_id] = inv
338
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
339
        return present, inventories
340
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
341
    def _get_lines(self, file_id):
342
        """Get the lines for a file-id."""
343
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
344
345
    def _modify_inventory(self, path, kind, is_executable, data):
346
        """Add to or change an item in the inventory."""
347
        # Create the new InventoryEntry
348
        basename, parent_ie = self._ensure_directory(path)
349
        file_id = self.bzr_file_id(path)
350
        ie = inventory.make_entry(kind, basename, parent_ie, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
351
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
352
        if isinstance(ie, inventory.InventoryFile):
353
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
354
            lines = osutils.split_lines(data)
355
            ie.text_sha1 = osutils.sha_strings(lines)
356
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
357
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
358
        elif isinstance(ie, inventory.InventoryLnk):
359
            ie.symlink_target = data
360
        else:
361
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
362
                (kind,))
363
364
        # Record this new inventory entry. As the import stream doesn't
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
365
        # repeat all files every time, we build an inventory delta.
0.64.5 by Ian Clatworthy
first cut at generic processing method
366
        # HACK: We also assume that inventory.apply_delta handles the
367
        # 'add' case cleanly when asked to change a non-existent entry.
368
        # This saves time vs explicitly detecting add vs change.
369
        old_path = path
370
        self.inv_delta.append((old_path, path, file_id, ie))
371
372
    def _ensure_directory(self, path):
373
        """Ensure that the containing directory exists for 'path'"""
374
        dirname, basename = osutils.split(path)
375
        if dirname == '':
376
            # the root node doesn't get updated
377
            return basename, inventory.ROOT_ID
378
        try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
379
            ie = self.cache_mgr.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
380
        except KeyError:
381
            # We will create this entry, since it doesn't exist
382
            pass
383
        else:
384
            return basename, ie
385
386
        # No directory existed, we will just create one, first, make sure
387
        # the parent exists
388
        dir_basename, parent_ie = self._ensure_directory(dirname)
389
        dir_file_id = self.bzr_file_id(dirname)
390
        ie = inventory.entry_factory['directory'](dir_file_id,
391
                                                  dir_basename,
392
                                                  parent_ie.file_id)
393
        ie.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
394
        self.cache_mgr.directory_entries[dirname] = ie
0.64.5 by Ian Clatworthy
first cut at generic processing method
395
        self.inv_delta.append((None, path, dir_file_id, ie))
396
        return basename, ie