/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
29
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
    revision,
31
    revisiontree,
32
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
33
from bzrlib.trace import (
34
    note,
35
    warning,
36
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
37
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
39
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
40
    processor,
41
    revisionloader,
42
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
43
44
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
45
# How many commits before automatically checkpointing
46
_DEFAULT_AUTO_CHECKPOINT = 10000
47
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
48
class GenericProcessor(processor.ImportProcessor):
49
    """An import processor that handles basic imports.
50
51
    Current features supported:
52
0.64.16 by Ian Clatworthy
safe processing tweaks
53
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
54
    * files and symlinks commits are supported
55
    * checkpoints automatically happen at a configurable frequency
56
      over and above the stream requested checkpoints
57
    * timestamped progress reporting, both automatic and stream requested
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
58
    * LATER: named branch support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
60
61
    Here are the supported parameters:
62
63
    * info - name of a config file holding the analysis generated
64
      by running the --info processor (this is important for knowing
65
      what to intelligently cache)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
66
67
    * checkpoint - automatically checkpoint every n commits over and
68
      above any checkpoints contained in the import stream.
69
      The default is 10000.
70
71
    * count - only import this many commits then exit. If not set,
72
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
73
    """
74
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
75
    known_params = ['info', 'checkpoint', 'count']
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
76
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
77
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
78
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
79
        self._load_info_and_params()
80
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
81
        self.init_stats()
82
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
83
        # Head tracking: last ref & map of commit mark to ref
84
        self.last_ref = None
85
        self.heads = {}
86
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
87
        # mapping of tag name to revision_id
88
        self.tags = {}
89
90
        # Create a write group. This is committed at the end of the import.
91
        # Checkpointing closes the current one and starts a new one.
92
        self.repo.start_write_group()
93
94
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
95
        # Load the info file, if any
96
        info_path = self.params.get('info')
97
        if info_path is not None:
98
            self.info = configobj.ConfigObj(info_path)
99
        else:
100
            self.info = None
101
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
102
        # Decide how often to automatically checkpoint
103
        self.checkpoint_every = int(self.params.get('checkpoint',
104
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
105
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
106
        # Find the maximum number of commits to import (None means all)
107
        # and prepare progress reporting. Just in case the info file
108
        # has an outdated count of commits, we store the max counts
109
        # at which we need to terminate separately to the total used
110
        # for progress tracking.
111
        try:
112
            self.max_commits = int(self.params['count'])
113
        except KeyError:
114
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
115
        if self.info is not None:
116
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
117
            if (self.max_commits is not None and
118
                self.total_commits > self.max_commits):
119
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
120
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
121
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
122
0.64.27 by Ian Clatworthy
1st cut at performance tuning
123
124
    def _process(self, command_iter):
125
        # if anything goes wrong, abort the write group if any
126
        try:
127
            processor.ImportProcessor._process(self, command_iter)
128
        except:
129
            if self.repo is not None and self.repo.is_in_write_group():
130
                self.repo.abort_write_group()
131
            raise
132
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
133
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
134
        # Commit the current write group.
135
        self.repo.commit_write_group()
136
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
137
        # Update the branches
138
        self.note("Updating branch information ...")
139
        updater = BranchUpdater(self.branch, self.cache_mgr,
140
            helpers.invert_dict(self.heads), self.last_ref)
141
        updater.update()
142
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
143
        # Update the working tree, if any
144
        if self.working_tree:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
145
            self.note("Updating the working tree ...")
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
146
            self.working_tree.update(delta._ChangeReporter())
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
147
        self.dump_stats()
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
148
149
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
150
        self._revision_count = 0
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
151
        self._branch_count = 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
152
        self._tag_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
153
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
154
    def dump_stats(self):
155
        rc = self._revision_count
156
        bc = self._branch_count
157
        tc = self._tag_count
158
        note("Imported %d %s into %d %s with %d %s.",
0.64.32 by Ian Clatworthy
move single_plural into helpers
159
            rc, helpers.single_plural(rc, "revision", "revisions"),
160
            bc, helpers.single_plural(bc, "branch", "branches"),
161
            tc, helpers.single_plural(tc, "tag", "tags"))
0.64.5 by Ian Clatworthy
first cut at generic processing method
162
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
163
    def note(self, msg, *args):
164
        """Output a note but timestamp it."""
165
        msg = "%s %s" % (self._time_of_day(), msg)
166
        note(msg, *args)
167
0.64.5 by Ian Clatworthy
first cut at generic processing method
168
    def blob_handler(self, cmd):
169
        """Process a BlobCommand."""
170
        if cmd.mark is not None:
171
            dataref = ":%s" % (cmd.mark,)
172
        else:
173
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
174
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
175
176
    def checkpoint_handler(self, cmd):
177
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
178
        # Commit the current write group and start a new one
179
        self.repo.commit_write_group()
180
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
181
182
    def commit_handler(self, cmd):
183
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
184
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
185
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
186
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
187
        mark = ":" + cmd.mark
188
        self.cache_mgr.revision_ids[mark] = handler.revision_id
189
190
        # Track the heads
191
        for parent in cmd.parents:
192
            try:
193
                del self.heads[parent]
194
            except KeyError:
195
                warning("didn't find parent %s while tracking heads" % parent)
196
        self.heads[mark] = cmd.ref
197
        self.last_ref = cmd.ref
198
199
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
200
        self._revision_count += 1
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
201
        self.report_progress("(%s)" % mark)
202
203
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
204
        if (self.max_commits is not None and
205
            self._revision_count >= self.max_commits):
206
            self.note("stopping after reaching requested count of commits")
207
            self.finished = True
208
        elif self._revision_count % self.checkpoint_every == 0:
209
            self.note("%d commits - automatic checkpoint triggered",
210
                self._revision_count)
211
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
212
0.64.25 by Ian Clatworthy
slightly better progress reporting
213
    def report_progress(self, details=''):
214
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
215
        if self.verbose or self._revision_count % 10 == 0:
216
            if self.total_commits is not None:
217
                counts = "%d/%d" % (self._revision_count, self.total_commits)
218
                eta = progress.get_eta(self._start_time, self._revision_count,
219
                    self.total_commits)
220
                eta_str = '[%s] ' % progress.str_tdelta(eta)
221
            else:
222
                counts = "%d" % (self._revision_count,)
223
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
224
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
225
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
226
    def progress_handler(self, cmd):
227
        """Process a ProgressCommand."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
228
        # We could use a progress bar here but timestamped messages
229
        # is more useful for determining when things might complete
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
230
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
231
232
    def _time_of_day(self):
233
        """Time of day as a string."""
234
        # Note: this is a separate method so tests can patch in a fixed value
0.64.18 by Ian Clatworthy
timestamp loaded commit messages
235
        return time.strftime("%H:%M:%S")
0.64.5 by Ian Clatworthy
first cut at generic processing method
236
237
    def reset_handler(self, cmd):
238
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
239
        if cmd.ref.startswith('refs/tags/'):
240
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
241
        else:
0.64.16 by Ian Clatworthy
safe processing tweaks
242
            warning("named branches are not supported yet"
243
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
244
245
    def tag_handler(self, cmd):
246
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
247
        self._set_tag(cmd.id, cmd.from_)
248
249
    def _set_tag(self, name, from_):
250
        """Define a tag given a name an import 'from' reference."""
251
        bzr_tag_name = name.decode('utf-8', 'replace')
252
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
253
        self.tags[bzr_tag_name] = bzr_rev_id
254
        self._tag_count += 1
0.64.5 by Ian Clatworthy
first cut at generic processing method
255
256
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
257
class GenericCacheManager(object):
258
    """A manager of caches for the GenericProcessor."""
259
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
260
    def __init__(self, info, verbose=False, inventory_cache_size=10):
261
        """Create a manager of caches.
262
263
        :param info: a ConfigObj holding the output from
264
            the --info processor, or None if no hints are available
265
        """
266
        self.verbose = verbose
267
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
268
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
269
        # Sticky blobs aren't removed after being referenced.
270
        self._blobs = {}
271
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
272
273
        # revision-id -> Inventory cache
274
        # these are large and we probably don't need too many as
275
        # most parents are recent in history
276
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
277
278
        # import-ref -> revision-id lookup table
279
        # we need to keep all of these but they are small
280
        self.revision_ids = {}
281
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
282
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
283
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
284
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
285
        # Work out the blobs to make sticky - None means all
286
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
287
        self._blobs_to_keep = None
288
        if info is not None:
289
            try:
290
                self._blobs_to_keep = info['Blob usage tracking']['multi']
291
            except KeyError:
292
                # info not in file - possible when no blobs used
293
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
294
295
    def store_blob(self, id, data):
296
        """Store a blob of data."""
297
        if (self._blobs_to_keep is None or data == '' or
298
            id in self._blobs_to_keep):
299
            self._sticky_blobs[id] = data
300
            if self.verbose:
301
                print "making blob %s sticky" % (id,)
302
        else:
303
            self._blobs[id] = data
304
305
    def fetch_blob(self, id):
306
        """Fetch a blob of data."""
307
        try:
308
            return self._sticky_blobs[id]
309
        except KeyError:
310
            return self._blobs.pop(id)
311
0.64.16 by Ian Clatworthy
safe processing tweaks
312
    def _delete_path(self, path):
313
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
314
        # we actually want to remember what file-id we gave a path,
315
        # even when that file is deleted, so doing nothing is correct
316
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
317
318
    def _rename_path(self, old_path, new_path):
319
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
320
        # we actually want to remember what file-id we gave a path,
321
        # even when that file is renamed, so both paths should have
322
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
323
        self.file_ids[new_path] = self.file_ids[old_path]
324
325
0.64.5 by Ian Clatworthy
first cut at generic processing method
326
class GenericCommitHandler(processor.CommitHandler):
327
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
328
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
329
        processor.CommitHandler.__init__(self, command)
330
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
331
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
332
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
333
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
334
        self.loader = revisionloader.RevisionLoader(repo,
335
            lambda revision_ids: self._get_inventories(revision_ids))
336
337
    def pre_process_files(self):
338
        """Prepare for committing."""
339
        self.revision_id = self.gen_revision_id()
340
        self.inv_delta = []
341
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
342
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
343
0.64.14 by Ian Clatworthy
commit of modified files working
344
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
345
        if self.command.parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
346
            self.parents = [self.cache_mgr.revision_ids[p]
347
                for p in self.command.parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
348
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
349
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
350
0.64.14 by Ian Clatworthy
commit of modified files working
351
        # Seed the inventory from the previous one
352
        if len(self.parents) == 0:
353
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
354
        else:
355
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
356
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
357
        if not self.repo.supports_rich_root():
358
            # In this repository, root entries have no knit or weave. When
359
            # serializing out to disk and back in, root.revision is always
360
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
361
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
362
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
363
        # directory-path -> inventory-entry for current inventory
364
        self.directory_entries = dict(self.inventory.directories())
365
0.64.14 by Ian Clatworthy
commit of modified files working
366
    def post_process_files(self):
367
        """Save the revision."""
368
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
369
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
370
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
371
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
372
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
373
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
374
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
375
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
376
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
377
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
378
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
379
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
380
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
381
        committer = self.command.committer
382
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
383
        author = self.command.author
384
        if author is not None:
385
            author_id = "%s <%s>" % (author[0],author[1])
386
            if author_id != who:
387
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
388
        rev = revision.Revision(
389
           timestamp=committer[2],
390
           timezone=committer[3],
391
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
392
           message=self._escape_commit_message(self.command.message),
393
           revision_id=self.revision_id,
394
           properties=rev_props,
395
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
396
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
397
            lambda file_id: self._get_lines(file_id))
398
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
399
    def _escape_commit_message(self, message):
400
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
401
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
402
        # Code copied from bzrlib.commit.
403
        
404
        # Python strings can include characters that can't be
405
        # represented in well-formed XML; escape characters that
406
        # aren't listed in the XML specification
407
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
408
        message, _ = re.subn(
409
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
410
            lambda match: match.group(0).encode('unicode_escape'),
411
            message)
412
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
413
414
    def modify_handler(self, filecmd):
415
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
416
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
417
        else:
418
            data = filecmd.data
419
        self._modify_inventory(filecmd.path, filecmd.kind,
420
            filecmd.is_executable, data)
421
422
    def delete_handler(self, filecmd):
423
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
424
        try:
425
            del self.inventory[self.bzr_file_id(path)]
426
        except errors.NoSuchId:
427
            warning("ignoring delete of %s - not in inventory" % (path,))
428
        finally:
429
            try:
430
                self.cache_mgr._delete_path(path)
431
            except KeyError:
432
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
433
434
    def copy_handler(self, filecmd):
435
        raise NotImplementedError(self.copy_handler)
436
437
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
438
        old_path = filecmd.old_path
439
        new_path = filecmd.new_path
440
        file_id = self.bzr_file_id(old_path)
441
        ie = self.inventory[file_id]
442
        self.inv_delta.append((old_path, new_path, file_id, ie))
443
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
444
445
    def deleteall_handler(self, filecmd):
446
        raise NotImplementedError(self.deleteall_handler)
447
0.64.16 by Ian Clatworthy
safe processing tweaks
448
    def bzr_file_id_and_new(self, path):
449
        """Get a Bazaar file identifier and new flag for a path.
450
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
451
        :return: file_id, is_new where
452
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
453
        """
454
        try:
455
            return self.cache_mgr.file_ids[path], False
456
        except KeyError:
457
            id = generate_ids.gen_file_id(path)
458
            self.cache_mgr.file_ids[path] = id
459
            return id, True
460
0.64.5 by Ian Clatworthy
first cut at generic processing method
461
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
462
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
463
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
464
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
465
    def gen_initial_inventory(self):
466
        """Generate an inventory for a parentless revision."""
467
        inv = inventory.Inventory(revision_id=self.revision_id)
468
        return inv
469
0.64.5 by Ian Clatworthy
first cut at generic processing method
470
    def gen_revision_id(self):
471
        """Generate a revision id.
472
473
        Subclasses may override this to produce deterministic ids say.
474
        """
475
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
476
        # Perhaps 'who' being the person running the import is ok? If so,
477
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
478
        who = "%s <%s>" % (committer[0],committer[1])
479
        timestamp = committer[2]
480
        return generate_ids.gen_revision_id(who, timestamp)
481
0.64.7 by Ian Clatworthy
start of multiple commit handling
482
    def get_inventory(self, revision_id):
483
        """Get the inventory for a revision id."""
484
        try:
485
            inv = self.cache_mgr.inventories[revision_id]
486
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
487
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
488
            # Not cached so reconstruct from repository
489
            inv = self.repo.revision_tree(revision_id).inventory
490
            self.cache_mgr.inventories[revision_id] = inv
491
        return inv
492
0.64.5 by Ian Clatworthy
first cut at generic processing method
493
    def _get_inventories(self, revision_ids):
494
        """Get the inventories for revision-ids.
495
        
496
        This is a callback used by the RepositoryLoader to
497
        speed up inventory reconstruction."""
498
        present = []
499
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
500
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
501
        # successfully loaded into the repsoitory
502
        for revision_id in revision_ids:
503
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
504
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
505
                present.append(revision_id)
506
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
507
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
508
                # Not cached so reconstruct from repository
509
                if self.repo.has_revision(revision_id):
510
                    rev_tree = self.repo.revision_tree(revision_id)
511
                    present.append(revision_id)
512
                else:
513
                    rev_tree = self.repo.revision_tree(None)
514
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
515
                self.cache_mgr.inventories[revision_id] = inv
516
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
517
        return present, inventories
518
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
519
    def _get_lines(self, file_id):
520
        """Get the lines for a file-id."""
521
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
522
523
    def _modify_inventory(self, path, kind, is_executable, data):
524
        """Add to or change an item in the inventory."""
525
        # Create the new InventoryEntry
526
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
527
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
528
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
529
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
530
        if isinstance(ie, inventory.InventoryFile):
531
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
532
            lines = osutils.split_lines(data)
533
            ie.text_sha1 = osutils.sha_strings(lines)
534
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
535
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
536
        elif isinstance(ie, inventory.InventoryLnk):
537
            ie.symlink_target = data
538
        else:
539
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
540
                (kind,))
541
0.64.16 by Ian Clatworthy
safe processing tweaks
542
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
543
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
544
            # HACK: no API for this (del+add does more than it needs to)
545
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
546
        else:
547
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
548
549
    def _ensure_directory(self, path):
550
        """Ensure that the containing directory exists for 'path'"""
551
        dirname, basename = osutils.split(path)
552
        if dirname == '':
553
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
554
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
556
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
557
        except KeyError:
558
            # We will create this entry, since it doesn't exist
559
            pass
560
        else:
561
            return basename, ie
562
563
        # No directory existed, we will just create one, first, make sure
564
        # the parent exists
565
        dir_basename, parent_ie = self._ensure_directory(dirname)
566
        dir_file_id = self.bzr_file_id(dirname)
567
        ie = inventory.entry_factory['directory'](dir_file_id,
568
                                                  dir_basename,
569
                                                  parent_ie.file_id)
570
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
571
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
572
        # There are no lines stored for a directory so
573
        # make sure the cache used by get_lines knows that
574
        self.lines_for_commit[dir_file_id] = []
575
        #print "adding dir %s" % path
576
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
577
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
578
579
580
class BranchUpdater(object):
581
582
    def __init__(self, branch, cache_mgr, heads_by_ref, last_ref):
583
        """Create an object responsible for updating branches.
584
585
        :param heads_by_ref: a dictionary where
586
          names are git-style references like refs/heads/master;
587
          values are one item lists of commits marks.
588
        """
589
        self.branch = branch
590
        self.repo = branch.repository
591
        self.cache_mgr = cache_mgr
592
        self.heads_by_ref = heads_by_ref
593
        self.last_ref = last_ref
594
595
    def update(self):
596
        """Update the Bazaar branches and tips matching the heads.
597
598
        If the repository is shared, this routine creates branches
599
        as required. If it isn't, warnings are produced about the
600
        lost of information.
601
        """
602
        default_tip, branch_tips = self._get_matching_branches()
603
        self._update_branch(self.branch, default_tip)
604
        for br, tip in branch_tips:
605
            self._update_branch(br, tip)
606
607
    def _get_matching_branches(self):
608
        """Get the Bazaar branches.
609
610
        :return: default_tip, branch_tips where
611
          default_tip = the last commit mark for the default branch
612
          branch_tips = a list of (branch,tip) tuples for other branches.
613
        """
614
        # simple for now
615
        return self.heads_by_ref[self.last_ref][0], []
616
617
        #names = sorted(heads.keys())
618
        #try:
619
        #    default_head = names.pop(names.index('refs/heads/master'))
620
        #except ValueError:
621
        #    # 1st one is as good as any
622
        #    default_head = names.pop(0)
623
        #default_tip = heads[default_head][0]
624
625
        # Get/Create missing branches
626
        #branch_tips = []
627
        #return default_tip, branch_tips
628
629
        #shared_repo = self.repo.is_shared()
630
        #for head in heads:
631
        #    # TODO
632
        #    pass
633
#
634
#        if not shared_repo:
635
#            # Tell the user about their loss
636
#            warning("unshared repository so not creating these branches:")
637
#            for head in heads:
638
#                # rev = ...
639
#                # warning("  %s -> %s", head)
640
#                warning("  %s", head)
641
#            branch_tips = []
642
#        return default_tip, branch_tips
643
644
    def _update_branch(self, br, last_mark):
645
        """Update a branch with last revision and tag information."""
646
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
647
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
648
        br.set_last_revision_info(revno, last_rev_id)
649
        # TODO: apply tags known in this branch
650
        #if self.tags:
651
        #    br.tags._set_tag_dict(self.tags)
652
        note("branch %s has %d revisions", br.nick, revno)
653