/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
20
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
21
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
23
    errors,
24
    generate_ids,
25
    inventory,
26
    lru_cache,
27
    osutils,
28
    revision,
29
    revisiontree,
30
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
31
from bzrlib.trace import (
32
    note,
33
    warning,
34
    )
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
from bzrlib.plugins.fastimport import (
36
    processor,
37
    revisionloader,
38
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
39
40
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
41
def _single_plural(n, single, plural):
42
    """Return a single or plural form of a noun based on number."""
43
    if n == 1:
44
        return single
45
    else:
46
        return plural
47
48
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
class GenericProcessor(processor.ImportProcessor):
50
    """An import processor that handles basic imports.
51
52
    Current features supported:
53
0.64.5 by Ian Clatworthy
first cut at generic processing method
54
    * timestamped progress reporting
55
    * blobs are cached in memory until used
56
    * TODO: commit handling
57
    * LATER: branch support
58
    * checkpoints and tags are ignored
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
60
    """
61
62
    def pre_process(self):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
        self.cache_mgr = GenericCacheManager()
0.64.7 by Ian Clatworthy
start of multiple commit handling
64
        self.active_branch = self.branch
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
65
        self.init_stats()
0.64.11 by Ian Clatworthy
tag support
66
        # mapping of tag name to revision_id
67
        self.tags = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
68
69
    def post_process(self):
70
        self.dump_stats()
0.64.7 by Ian Clatworthy
start of multiple commit handling
71
        # Update the branches, assuming the last revision is the head
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
72
        note("Updating branch information ...")
0.64.7 by Ian Clatworthy
start of multiple commit handling
73
        # TODO - loop over the branches created/modified
74
        last_rev_id = self.cache_mgr.last_revision_ids[self.branch]
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
75
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
76
        self.branch.set_last_revision_info(revno, last_rev_id)
0.64.11 by Ian Clatworthy
tag support
77
        if self.tags:
78
            self.branch.tags._set_tag_dict(self.tags)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
79
        # Update the working tree, if any
80
        if self.working_tree:
81
            self.working_tree.update(delta._ChangeReporter())
82
83
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
84
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
85
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
87
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
88
    def dump_stats(self):
89
        rc = self._revision_count
90
        bc = self._branch_count
91
        tc = self._tag_count
92
        note("Imported %d %s into %d %s with %d %s.",
93
            rc, _single_plural(rc, "revision", "revisions"),
94
            bc, _single_plural(bc, "branch", "branches"),
95
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
96
97
    def blob_handler(self, cmd):
98
        """Process a BlobCommand."""
99
        if cmd.mark is not None:
100
            dataref = ":%s" % (cmd.mark,)
101
        else:
102
            dataref = osutils.sha_strings(cmd.data)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
103
        self.cache_mgr.blobs[dataref] = cmd.data
0.64.5 by Ian Clatworthy
first cut at generic processing method
104
105
    def checkpoint_handler(self, cmd):
106
        """Process a CheckpointCommand."""
107
        warning("ignoring checkpoint")
108
109
    def commit_handler(self, cmd):
110
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
111
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.14 by Ian Clatworthy
commit of modified files working
112
            self.active_branch, self.verbose)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
113
        # For now, put a write group around every commit. In the future,
114
        # we might only start/commit one every N to sppeed things up
115
        self.repo.start_write_group()
116
        try:
117
            handler.process()
0.64.11 by Ian Clatworthy
tag support
118
            self.cache_mgr.revision_ids[cmd.ref] = handler.revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
119
            self.cache_mgr.last_revision_ids[self.active_branch] = \
120
                handler.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
121
            self._revision_count += 1
122
        except:
123
            self.repo.abort_write_group()
124
            raise
125
        else:
126
            self.repo.commit_write_group()
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
127
128
    def progress_handler(self, cmd):
129
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
130
        # We could use a progress bar here but timestamped messages
131
        # is more useful for determining when things might complete
0.64.5 by Ian Clatworthy
first cut at generic processing method
132
        note("%s progress %s" % (self._time_of_day(), cmd.message))
133
134
    def _time_of_day(self):
135
        """Time of day as a string."""
136
        # Note: this is a separate method so tests can patch in a fixed value
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
137
        return time.localtime().strftime("%H:%M:%s")
0.64.5 by Ian Clatworthy
first cut at generic processing method
138
139
    def reset_handler(self, cmd):
140
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
141
        if cmd.ref.startswith('refs/tags/'):
142
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
143
        else:
144
            warning("multiple branches are not supported yet"
145
                " - ignoring branch '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
146
147
    def tag_handler(self, cmd):
148
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
149
        self._set_tag(cmd.id, cmd.from_)
150
151
    def _set_tag(self, name, from_):
152
        """Define a tag given a name an import 'from' reference."""
153
        bzr_tag_name = name.decode('utf-8', 'replace')
154
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
155
        self.tags[bzr_tag_name] = bzr_rev_id
156
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
157
158
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
159
class GenericCacheManager(object):
160
    """A manager of caches for the GenericProcessor."""
161
162
    def __init__(self, inventory_cache_size=100):
163
        # dataref -> data. datref is either :mark or the sha-1.
164
        # Once a blob is used, it should be deleted from here.
165
        self.blobs = {}
166
167
        # revision-id -> Inventory cache
168
        # these are large and we probably don't need too many as
169
        # most parents are recent in history
170
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
171
172
        # directory-path -> inventory-entry lookup table
173
        # we need to keep all of these but they are small
174
        self.directory_entries = {}
175
176
        # import-ref -> revision-id lookup table
177
        # we need to keep all of these but they are small
178
        self.revision_ids = {}
179
0.64.7 by Ian Clatworthy
start of multiple commit handling
180
        # branch -> last revision-id lookup table
181
        self.last_revision_ids = {}
182
0.64.14 by Ian Clatworthy
commit of modified files working
183
        # path -> file-ids
184
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
185
0.64.5 by Ian Clatworthy
first cut at generic processing method
186
class GenericCommitHandler(processor.CommitHandler):
187
0.64.14 by Ian Clatworthy
commit of modified files working
188
    def __init__(self, command, repo, cache_mgr, active_branch, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
189
        processor.CommitHandler.__init__(self, command)
190
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
191
        self.cache_mgr = cache_mgr
0.64.7 by Ian Clatworthy
start of multiple commit handling
192
        self.active_branch = active_branch
0.64.14 by Ian Clatworthy
commit of modified files working
193
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
194
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
195
        self.loader = revisionloader.RevisionLoader(repo,
196
            lambda revision_ids: self._get_inventories(revision_ids))
197
198
    def pre_process_files(self):
199
        """Prepare for committing."""
200
        self.revision_id = self.gen_revision_id()
201
        self.inv_delta = []
202
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
203
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
204
0.64.14 by Ian Clatworthy
commit of modified files working
205
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
206
        if self.command.parents:
0.64.14 by Ian Clatworthy
commit of modified files working
207
            self.parents = [self.cache_mgr.revision_ids[ref]
0.64.7 by Ian Clatworthy
start of multiple commit handling
208
                for ref in self.command.parents]
209
        else:
210
            # if no parents are given, the last revision on
211
            # the current branch is assumed according to the spec
212
            last_rev = self.cache_mgr.last_revision_ids.get(
213
                    self.active_branch)
214
            if last_rev:
0.64.14 by Ian Clatworthy
commit of modified files working
215
                self.parents = [last_rev]
0.64.7 by Ian Clatworthy
start of multiple commit handling
216
            else:
0.64.14 by Ian Clatworthy
commit of modified files working
217
                self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
218
0.64.14 by Ian Clatworthy
commit of modified files working
219
        # Seed the inventory from the previous one
220
        if len(self.parents) == 0:
221
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
222
        else:
223
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
224
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
225
        if not self.repo.supports_rich_root():
226
            # In this repository, root entries have no knit or weave. When
227
            # serializing out to disk and back in, root.revision is always
228
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
229
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
230
0.64.14 by Ian Clatworthy
commit of modified files working
231
    def post_process_files(self):
232
        """Save the revision."""
233
        if self.verbose:
234
            print "applied inventory delta ..."
235
            for entry in self.inv_delta:
236
                print "  %r" % (entry,)
237
        self.inventory.apply_delta(self.inv_delta)
238
        self.cache_mgr.inventories[self.command.ref] = self.inventory
239
        if self.verbose:
240
            print "creating inventory ..."
241
            for entry in self.inventory:
242
                print "  %r" % (entry,)
0.64.5 by Ian Clatworthy
first cut at generic processing method
243
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
244
        # Load the revision into the repository
245
        committer = self.command.committer
246
        who = "%s <%s>" % (committer[0],committer[1])
247
        rev = revision.Revision(self.revision_id)
248
        rev = revision.Revision(
249
           timestamp=committer[2],
250
           timezone=committer[3],
251
           committer=who,
252
           message=self.escape_commit_message(self.command.message),
253
           revision_id=self.revision_id)
0.64.14 by Ian Clatworthy
commit of modified files working
254
        rev.parent_ids = self.parents
255
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
256
            lambda file_id: self._get_lines(file_id))
257
        print "loaded revision %r" % (rev,)
258
259
    def escape_commit_message(self, msg):
260
        # It's crap that we need to do this at this level (but we do)
261
        # TODO
262
        return msg
0.64.5 by Ian Clatworthy
first cut at generic processing method
263
264
    def modify_handler(self, filecmd):
265
        if filecmd.dataref is not None:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
266
            data = self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
267
            # Conserve memory, assuming blobs aren't referenced twice
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
268
            del self.cache_mgr.blobs[filecmd.dataref]
0.64.5 by Ian Clatworthy
first cut at generic processing method
269
        else:
270
            data = filecmd.data
271
        self._modify_inventory(filecmd.path, filecmd.kind,
272
            filecmd.is_executable, data)
273
274
    def delete_handler(self, filecmd):
275
        path = filecmd.path
276
        self.inv_delta.append((path, None, self.bzr_file_id(path), None))
277
278
    def copy_handler(self, filecmd):
279
        raise NotImplementedError(self.copy_handler)
280
281
    def rename_handler(self, filecmd):
282
        # TODO: add a suitable entry to the inventory delta
283
        raise NotImplementedError(self.rename_handler)
284
285
    def deleteall_handler(self, filecmd):
286
        raise NotImplementedError(self.deleteall_handler)
287
288
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
289
        """Get a Bazaar file identifier for a path."""
290
        try:
291
            return self.cache_mgr.file_ids[path]
292
        except KeyError:
293
            id = generate_ids.gen_file_id(path)
294
            self.cache_mgr.file_ids[path] = id
295
            return id
0.64.5 by Ian Clatworthy
first cut at generic processing method
296
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
297
    def gen_initial_inventory(self):
298
        """Generate an inventory for a parentless revision."""
299
        inv = inventory.Inventory(revision_id=self.revision_id)
300
        return inv
301
0.64.5 by Ian Clatworthy
first cut at generic processing method
302
    def gen_revision_id(self):
303
        """Generate a revision id.
304
305
        Subclasses may override this to produce deterministic ids say.
306
        """
307
        committer = self.command.committer
308
        who = "%s <%s>" % (committer[0],committer[1])
309
        timestamp = committer[2]
310
        return generate_ids.gen_revision_id(who, timestamp)
311
0.64.7 by Ian Clatworthy
start of multiple commit handling
312
    def get_inventory(self, revision_id):
313
        """Get the inventory for a revision id."""
314
        try:
315
            inv = self.cache_mgr.inventories[revision_id]
316
        except KeyError:
317
            # TODO: count misses and/or inform the user about the miss?
318
            # Not cached so reconstruct from repository
319
            inv = self.repo.revision_tree(revision_id).inventory
320
            self.cache_mgr.inventories[revision_id] = inv
321
        return inv
322
0.64.5 by Ian Clatworthy
first cut at generic processing method
323
    def _get_inventories(self, revision_ids):
324
        """Get the inventories for revision-ids.
325
        
326
        This is a callback used by the RepositoryLoader to
327
        speed up inventory reconstruction."""
328
        present = []
329
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
330
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
331
        # successfully loaded into the repsoitory
332
        for revision_id in revision_ids:
333
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
334
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
335
                present.append(revision_id)
336
            except KeyError:
337
                # TODO: count misses and/or inform the user about the miss?
338
                # Not cached so reconstruct from repository
339
                if self.repo.has_revision(revision_id):
340
                    rev_tree = self.repo.revision_tree(revision_id)
341
                    present.append(revision_id)
342
                else:
343
                    rev_tree = self.repo.revision_tree(None)
344
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
345
                self.cache_mgr.inventories[revision_id] = inv
346
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
347
        return present, inventories
348
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
349
    def _get_lines(self, file_id):
350
        """Get the lines for a file-id."""
351
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
352
353
    def _modify_inventory(self, path, kind, is_executable, data):
354
        """Add to or change an item in the inventory."""
355
        # Create the new InventoryEntry
356
        basename, parent_ie = self._ensure_directory(path)
357
        file_id = self.bzr_file_id(path)
358
        ie = inventory.make_entry(kind, basename, parent_ie, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
359
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
360
        if isinstance(ie, inventory.InventoryFile):
361
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
362
            lines = osutils.split_lines(data)
363
            ie.text_sha1 = osutils.sha_strings(lines)
364
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
365
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
366
        elif isinstance(ie, inventory.InventoryLnk):
367
            ie.symlink_target = data
368
        else:
369
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
370
                (kind,))
371
372
        # Record this new inventory entry. As the import stream doesn't
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
373
        # repeat all files every time, we build an inventory delta.
0.64.5 by Ian Clatworthy
first cut at generic processing method
374
        # HACK: We also assume that inventory.apply_delta handles the
375
        # 'add' case cleanly when asked to change a non-existent entry.
376
        # This saves time vs explicitly detecting add vs change.
377
        old_path = path
378
        self.inv_delta.append((old_path, path, file_id, ie))
379
380
    def _ensure_directory(self, path):
381
        """Ensure that the containing directory exists for 'path'"""
382
        dirname, basename = osutils.split(path)
383
        if dirname == '':
384
            # the root node doesn't get updated
385
            return basename, inventory.ROOT_ID
386
        try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
387
            ie = self.cache_mgr.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
388
        except KeyError:
389
            # We will create this entry, since it doesn't exist
390
            pass
391
        else:
392
            return basename, ie
393
394
        # No directory existed, we will just create one, first, make sure
395
        # the parent exists
396
        dir_basename, parent_ie = self._ensure_directory(dirname)
397
        dir_file_id = self.bzr_file_id(dirname)
398
        ie = inventory.entry_factory['directory'](dir_file_id,
399
                                                  dir_basename,
400
                                                  parent_ie.file_id)
401
        ie.revision = self.revision_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
402
        self.cache_mgr.directory_entries[dirname] = ie
0.64.5 by Ian Clatworthy
first cut at generic processing method
403
        self.inv_delta.append((None, path, dir_file_id, ie))
404
        return basename, ie