/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
29
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
    revision,
31
    revisiontree,
32
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
33
from bzrlib.trace import (
34
    note,
35
    warning,
36
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
37
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
39
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
40
    processor,
41
    revisionloader,
42
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
43
44
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
45
# How many commits before automatically checkpointing
46
_DEFAULT_AUTO_CHECKPOINT = 10000
47
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
48
def _single_plural(n, single, plural):
49
    """Return a single or plural form of a noun based on number."""
50
    if n == 1:
51
        return single
52
    else:
53
        return plural
54
55
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
56
class GenericProcessor(processor.ImportProcessor):
57
    """An import processor that handles basic imports.
58
59
    Current features supported:
60
0.64.16 by Ian Clatworthy
safe processing tweaks
61
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
62
    * files and symlinks commits are supported
63
    * checkpoints automatically happen at a configurable frequency
64
      over and above the stream requested checkpoints
65
    * timestamped progress reporting, both automatic and stream requested
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
66
    * LATER: named branch support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
67
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
68
69
    Here are the supported parameters:
70
71
    * info - name of a config file holding the analysis generated
72
      by running the --info processor (this is important for knowing
73
      what to intelligently cache)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
74
75
    * checkpoint - automatically checkpoint every n commits over and
76
      above any checkpoints contained in the import stream.
77
      The default is 10000.
78
79
    * count - only import this many commits then exit. If not set,
80
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
81
    """
82
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
83
    known_params = ['info', 'checkpoint', 'count']
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
84
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
85
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
86
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
87
        self._load_info_and_params()
88
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
89
        self.init_stats()
90
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
91
        # Head tracking: last ref & map of commit mark to ref
92
        self.last_ref = None
93
        self.heads = {}
94
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
95
        # mapping of tag name to revision_id
96
        self.tags = {}
97
98
        # Create a write group. This is committed at the end of the import.
99
        # Checkpointing closes the current one and starts a new one.
100
        self.repo.start_write_group()
101
102
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
103
        # Load the info file, if any
104
        info_path = self.params.get('info')
105
        if info_path is not None:
106
            self.info = configobj.ConfigObj(info_path)
107
        else:
108
            self.info = None
109
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
110
        # Decide how often to automatically checkpoint
111
        self.checkpoint_every = int(self.params.get('checkpoint',
112
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
113
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
114
        # Find the maximum number of commits to import (None means all)
115
        # and prepare progress reporting. Just in case the info file
116
        # has an outdated count of commits, we store the max counts
117
        # at which we need to terminate separately to the total used
118
        # for progress tracking.
119
        try:
120
            self.max_commits = int(self.params['count'])
121
        except KeyError:
122
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
123
        if self.info is not None:
124
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
125
            if (self.max_commits is not None and
126
                self.total_commits > self.max_commits):
127
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
128
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
129
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
130
0.64.27 by Ian Clatworthy
1st cut at performance tuning
131
132
    def _process(self, command_iter):
133
        # if anything goes wrong, abort the write group if any
134
        try:
135
            processor.ImportProcessor._process(self, command_iter)
136
        except:
137
            if self.repo is not None and self.repo.is_in_write_group():
138
                self.repo.abort_write_group()
139
            raise
140
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
141
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
142
        # Commit the current write group.
143
        self.repo.commit_write_group()
144
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
145
        # Update the branches
146
        self.note("Updating branch information ...")
147
        updater = BranchUpdater(self.branch, self.cache_mgr,
148
            helpers.invert_dict(self.heads), self.last_ref)
149
        updater.update()
150
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
151
        # Update the working tree, if any
152
        if self.working_tree:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
153
            self.note("Updating the working tree ...")
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
154
            self.working_tree.update(delta._ChangeReporter())
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
155
        self.dump_stats()
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
156
157
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
158
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
159
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
160
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
161
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
162
    def dump_stats(self):
163
        rc = self._revision_count
164
        bc = self._branch_count
165
        tc = self._tag_count
166
        note("Imported %d %s into %d %s with %d %s.",
167
            rc, _single_plural(rc, "revision", "revisions"),
168
            bc, _single_plural(bc, "branch", "branches"),
169
            tc, _single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
170
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
171
    def note(self, msg, *args):
172
        """Output a note but timestamp it."""
173
        msg = "%s %s" % (self._time_of_day(), msg)
174
        note(msg, *args)
175
0.64.5 by Ian Clatworthy
first cut at generic processing method
176
    def blob_handler(self, cmd):
177
        """Process a BlobCommand."""
178
        if cmd.mark is not None:
179
            dataref = ":%s" % (cmd.mark,)
180
        else:
181
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
182
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
183
184
    def checkpoint_handler(self, cmd):
185
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
186
        # Commit the current write group and start a new one
187
        self.repo.commit_write_group()
188
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
189
190
    def commit_handler(self, cmd):
191
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
192
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
193
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
194
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
195
        mark = ":" + cmd.mark
196
        self.cache_mgr.revision_ids[mark] = handler.revision_id
197
198
        # Track the heads
199
        for parent in cmd.parents:
200
            try:
201
                del self.heads[parent]
202
            except KeyError:
203
                warning("didn't find parent %s while tracking heads" % parent)
204
        self.heads[mark] = cmd.ref
205
        self.last_ref = cmd.ref
206
207
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
208
        self._revision_count += 1
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
209
        self.report_progress("(%s)" % mark)
210
211
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
212
        if (self.max_commits is not None and
213
            self._revision_count >= self.max_commits):
214
            self.note("stopping after reaching requested count of commits")
215
            self.finished = True
216
        elif self._revision_count % self.checkpoint_every == 0:
217
            self.note("%d commits - automatic checkpoint triggered",
218
                self._revision_count)
219
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
220
0.64.25 by Ian Clatworthy
slightly better progress reporting
221
    def report_progress(self, details=''):
222
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
223
        if self.verbose or self._revision_count % 10 == 0:
224
            if self.total_commits is not None:
225
                counts = "%d/%d" % (self._revision_count, self.total_commits)
226
                eta = progress.get_eta(self._start_time, self._revision_count,
227
                    self.total_commits)
228
                eta_str = '[%s] ' % progress.str_tdelta(eta)
229
            else:
230
                counts = "%d" % (self._revision_count,)
231
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
232
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
233
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
234
    def progress_handler(self, cmd):
235
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
236
        # We could use a progress bar here but timestamped messages
237
        # is more useful for determining when things might complete
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
238
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
239
240
    def _time_of_day(self):
241
        """Time of day as a string."""
242
        # Note: this is a separate method so tests can patch in a fixed value
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
243
        return time.strftime("%H:%M:%S")
0.64.5 by Ian Clatworthy
first cut at generic processing method
244
245
    def reset_handler(self, cmd):
246
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
247
        if cmd.ref.startswith('refs/tags/'):
248
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
249
        else:
0.64.16 by Ian Clatworthy
safe processing tweaks
250
            warning("named branches are not supported yet"
251
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
252
253
    def tag_handler(self, cmd):
254
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
255
        self._set_tag(cmd.id, cmd.from_)
256
257
    def _set_tag(self, name, from_):
258
        """Define a tag given a name an import 'from' reference."""
259
        bzr_tag_name = name.decode('utf-8', 'replace')
260
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
261
        self.tags[bzr_tag_name] = bzr_rev_id
262
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
263
264
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
265
class GenericCacheManager(object):
266
    """A manager of caches for the GenericProcessor."""
267
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
268
    def __init__(self, info, verbose=False, inventory_cache_size=10):
269
        """Create a manager of caches.
270
271
        :param info: a ConfigObj holding the output from
272
            the --info processor, or None if no hints are available
273
        """
274
        self.verbose = verbose
275
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
276
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
277
        # Sticky blobs aren't removed after being referenced.
278
        self._blobs = {}
279
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
280
281
        # revision-id -> Inventory cache
282
        # these are large and we probably don't need too many as
283
        # most parents are recent in history
284
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
285
286
        # import-ref -> revision-id lookup table
287
        # we need to keep all of these but they are small
288
        self.revision_ids = {}
289
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
290
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
291
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
292
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
293
        # Work out the blobs to make sticky - None means all
294
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
295
        self._blobs_to_keep = None
296
        if info is not None:
297
            try:
298
                self._blobs_to_keep = info['Blob usage tracking']['multi']
299
            except KeyError:
300
                # info not in file - possible when no blobs used
301
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
302
303
    def store_blob(self, id, data):
304
        """Store a blob of data."""
305
        if (self._blobs_to_keep is None or data == '' or
306
            id in self._blobs_to_keep):
307
            self._sticky_blobs[id] = data
308
            if self.verbose:
309
                print "making blob %s sticky" % (id,)
310
        else:
311
            self._blobs[id] = data
312
313
    def fetch_blob(self, id):
314
        """Fetch a blob of data."""
315
        try:
316
            return self._sticky_blobs[id]
317
        except KeyError:
318
            return self._blobs.pop(id)
319
0.64.16 by Ian Clatworthy
safe processing tweaks
320
    def _delete_path(self, path):
321
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
322
        # we actually want to remember what file-id we gave a path,
323
        # even when that file is deleted, so doing nothing is correct
324
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
325
326
    def _rename_path(self, old_path, new_path):
327
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
328
        # we actually want to remember what file-id we gave a path,
329
        # even when that file is renamed, so both paths should have
330
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
331
        self.file_ids[new_path] = self.file_ids[old_path]
332
333
0.64.5 by Ian Clatworthy
first cut at generic processing method
334
class GenericCommitHandler(processor.CommitHandler):
335
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
336
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
337
        processor.CommitHandler.__init__(self, command)
338
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
339
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
340
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
341
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
342
        self.loader = revisionloader.RevisionLoader(repo,
343
            lambda revision_ids: self._get_inventories(revision_ids))
344
345
    def pre_process_files(self):
346
        """Prepare for committing."""
347
        self.revision_id = self.gen_revision_id()
348
        self.inv_delta = []
349
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
350
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
351
0.64.14 by Ian Clatworthy
commit of modified files working
352
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
353
        if self.command.parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
354
            self.parents = [self.cache_mgr.revision_ids[p]
355
                for p in self.command.parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
356
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
357
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
358
0.64.14 by Ian Clatworthy
commit of modified files working
359
        # Seed the inventory from the previous one
360
        if len(self.parents) == 0:
361
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
362
        else:
363
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
364
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
365
        if not self.repo.supports_rich_root():
366
            # In this repository, root entries have no knit or weave. When
367
            # serializing out to disk and back in, root.revision is always
368
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
369
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
370
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
371
        # directory-path -> inventory-entry for current inventory
372
        self.directory_entries = dict(self.inventory.directories())
373
0.64.14 by Ian Clatworthy
commit of modified files working
374
    def post_process_files(self):
375
        """Save the revision."""
376
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
377
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
378
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
379
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
380
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
381
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
382
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
383
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
384
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
385
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
386
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
387
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
388
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
389
        committer = self.command.committer
390
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
391
        author = self.command.author
392
        if author is not None:
393
            author_id = "%s <%s>" % (author[0],author[1])
394
            if author_id != who:
395
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
396
        rev = revision.Revision(
397
           timestamp=committer[2],
398
           timezone=committer[3],
399
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
400
           message=self._escape_commit_message(self.command.message),
401
           revision_id=self.revision_id,
402
           properties=rev_props,
403
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
404
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
405
            lambda file_id: self._get_lines(file_id))
406
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
407
    def _escape_commit_message(self, message):
408
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
409
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
410
        # Code copied from bzrlib.commit.
411
        
412
        # Python strings can include characters that can't be
413
        # represented in well-formed XML; escape characters that
414
        # aren't listed in the XML specification
415
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
416
        message, _ = re.subn(
417
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
418
            lambda match: match.group(0).encode('unicode_escape'),
419
            message)
420
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
421
422
    def modify_handler(self, filecmd):
423
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
424
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
425
        else:
426
            data = filecmd.data
427
        self._modify_inventory(filecmd.path, filecmd.kind,
428
            filecmd.is_executable, data)
429
430
    def delete_handler(self, filecmd):
431
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
432
        try:
433
            del self.inventory[self.bzr_file_id(path)]
434
        except errors.NoSuchId:
435
            warning("ignoring delete of %s - not in inventory" % (path,))
436
        finally:
437
            try:
438
                self.cache_mgr._delete_path(path)
439
            except KeyError:
440
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
441
442
    def copy_handler(self, filecmd):
443
        raise NotImplementedError(self.copy_handler)
444
445
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
446
        old_path = filecmd.old_path
447
        new_path = filecmd.new_path
448
        file_id = self.bzr_file_id(old_path)
449
        ie = self.inventory[file_id]
450
        self.inv_delta.append((old_path, new_path, file_id, ie))
451
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
452
453
    def deleteall_handler(self, filecmd):
454
        raise NotImplementedError(self.deleteall_handler)
455
0.64.16 by Ian Clatworthy
safe processing tweaks
456
    def bzr_file_id_and_new(self, path):
457
        """Get a Bazaar file identifier and new flag for a path.
458
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
459
        :return: file_id, is_new where
460
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
461
        """
462
        try:
463
            return self.cache_mgr.file_ids[path], False
464
        except KeyError:
465
            id = generate_ids.gen_file_id(path)
466
            self.cache_mgr.file_ids[path] = id
467
            return id, True
468
0.64.5 by Ian Clatworthy
first cut at generic processing method
469
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
470
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
471
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
472
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
473
    def gen_initial_inventory(self):
474
        """Generate an inventory for a parentless revision."""
475
        inv = inventory.Inventory(revision_id=self.revision_id)
476
        return inv
477
0.64.5 by Ian Clatworthy
first cut at generic processing method
478
    def gen_revision_id(self):
479
        """Generate a revision id.
480
481
        Subclasses may override this to produce deterministic ids say.
482
        """
483
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
484
        # Perhaps 'who' being the person running the import is ok? If so,
485
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
486
        who = "%s <%s>" % (committer[0],committer[1])
487
        timestamp = committer[2]
488
        return generate_ids.gen_revision_id(who, timestamp)
489
0.64.7 by Ian Clatworthy
start of multiple commit handling
490
    def get_inventory(self, revision_id):
491
        """Get the inventory for a revision id."""
492
        try:
493
            inv = self.cache_mgr.inventories[revision_id]
494
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
495
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
496
            # Not cached so reconstruct from repository
497
            inv = self.repo.revision_tree(revision_id).inventory
498
            self.cache_mgr.inventories[revision_id] = inv
499
        return inv
500
0.64.5 by Ian Clatworthy
first cut at generic processing method
501
    def _get_inventories(self, revision_ids):
502
        """Get the inventories for revision-ids.
503
        
504
        This is a callback used by the RepositoryLoader to
505
        speed up inventory reconstruction."""
506
        present = []
507
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
508
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
509
        # successfully loaded into the repsoitory
510
        for revision_id in revision_ids:
511
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
512
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
513
                present.append(revision_id)
514
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
515
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
516
                # Not cached so reconstruct from repository
517
                if self.repo.has_revision(revision_id):
518
                    rev_tree = self.repo.revision_tree(revision_id)
519
                    present.append(revision_id)
520
                else:
521
                    rev_tree = self.repo.revision_tree(None)
522
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
523
                self.cache_mgr.inventories[revision_id] = inv
524
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
525
        return present, inventories
526
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
527
    def _get_lines(self, file_id):
528
        """Get the lines for a file-id."""
529
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
530
531
    def _modify_inventory(self, path, kind, is_executable, data):
532
        """Add to or change an item in the inventory."""
533
        # Create the new InventoryEntry
534
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
535
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
536
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
537
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
538
        if isinstance(ie, inventory.InventoryFile):
539
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
540
            lines = osutils.split_lines(data)
541
            ie.text_sha1 = osutils.sha_strings(lines)
542
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
543
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
544
        elif isinstance(ie, inventory.InventoryLnk):
545
            ie.symlink_target = data
546
        else:
547
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
548
                (kind,))
549
0.64.16 by Ian Clatworthy
safe processing tweaks
550
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
551
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
552
            # HACK: no API for this (del+add does more than it needs to)
553
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
554
        else:
555
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
556
557
    def _ensure_directory(self, path):
558
        """Ensure that the containing directory exists for 'path'"""
559
        dirname, basename = osutils.split(path)
560
        if dirname == '':
561
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
562
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
563
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
564
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
565
        except KeyError:
566
            # We will create this entry, since it doesn't exist
567
            pass
568
        else:
569
            return basename, ie
570
571
        # No directory existed, we will just create one, first, make sure
572
        # the parent exists
573
        dir_basename, parent_ie = self._ensure_directory(dirname)
574
        dir_file_id = self.bzr_file_id(dirname)
575
        ie = inventory.entry_factory['directory'](dir_file_id,
576
                                                  dir_basename,
577
                                                  parent_ie.file_id)
578
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
579
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
580
        # There are no lines stored for a directory so
581
        # make sure the cache used by get_lines knows that
582
        self.lines_for_commit[dir_file_id] = []
583
        #print "adding dir %s" % path
584
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
585
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
586
587
588
class BranchUpdater(object):
589
590
    def __init__(self, branch, cache_mgr, heads_by_ref, last_ref):
591
        """Create an object responsible for updating branches.
592
593
        :param heads_by_ref: a dictionary where
594
          names are git-style references like refs/heads/master;
595
          values are one item lists of commits marks.
596
        """
597
        self.branch = branch
598
        self.repo = branch.repository
599
        self.cache_mgr = cache_mgr
600
        self.heads_by_ref = heads_by_ref
601
        self.last_ref = last_ref
602
603
    def update(self):
604
        """Update the Bazaar branches and tips matching the heads.
605
606
        If the repository is shared, this routine creates branches
607
        as required. If it isn't, warnings are produced about the
608
        lost of information.
609
        """
610
        default_tip, branch_tips = self._get_matching_branches()
611
        self._update_branch(self.branch, default_tip)
612
        for br, tip in branch_tips:
613
            self._update_branch(br, tip)
614
615
    def _get_matching_branches(self):
616
        """Get the Bazaar branches.
617
618
        :return: default_tip, branch_tips where
619
          default_tip = the last commit mark for the default branch
620
          branch_tips = a list of (branch,tip) tuples for other branches.
621
        """
622
        # simple for now
623
        return self.heads_by_ref[self.last_ref][0], []
624
625
        #names = sorted(heads.keys())
626
        #try:
627
        #    default_head = names.pop(names.index('refs/heads/master'))
628
        #except ValueError:
629
        #    # 1st one is as good as any
630
        #    default_head = names.pop(0)
631
        #default_tip = heads[default_head][0]
632
633
        # Get/Create missing branches
634
        #branch_tips = []
635
        #return default_tip, branch_tips
636
637
        #shared_repo = self.repo.is_shared()
638
        #for head in heads:
639
        #    # TODO
640
        #    pass
641
#
642
#        if not shared_repo:
643
#            # Tell the user about their loss
644
#            warning("unshared repository so not creating these branches:")
645
#            for head in heads:
646
#                # rev = ...
647
#                # warning("  %s -> %s", head)
648
#                warning("  %s", head)
649
#            branch_tips = []
650
#        return default_tip, branch_tips
651
652
    def _update_branch(self, br, last_mark):
653
        """Update a branch with last revision and tag information."""
654
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
655
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
656
        br.set_last_revision_info(revno, last_rev_id)
657
        # TODO: apply tags known in this branch
658
        #if self.tags:
659
        #    br.tags._set_tag_dict(self.tags)
660
        note("branch %s has %d revisions", br.nick, revno)
661