/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
36
from bzrlib.trace import (
37
    note,
38
    warning,
0.64.37 by Ian Clatworthy
create branches as required
39
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
40
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
41
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
42
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
43
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
44
    processor,
45
    revisionloader,
46
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
48
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
49
# How many commits before automatically checkpointing
50
_DEFAULT_AUTO_CHECKPOINT = 10000
51
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
class GenericProcessor(processor.ImportProcessor):
53
    """An import processor that handles basic imports.
54
55
    Current features supported:
56
0.64.16 by Ian Clatworthy
safe processing tweaks
57
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
58
    * files and symlinks commits are supported
59
    * checkpoints automatically happen at a configurable frequency
60
      over and above the stream requested checkpoints
61
    * timestamped progress reporting, both automatic and stream requested
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
62
    * LATER: named branch support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
63
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
64
65
    Here are the supported parameters:
66
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
67
    * info - name of a hints file holding the analysis generated
68
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
69
      importing large repositories, this parameter is needed so
70
      that the importer knows what blobs to intelligently cache.
71
72
    * trees - update the working tree before completing.
73
      By default, the importer updates the repository
74
      and branches and the user needs to run 'bzr update' for the
75
      branches of interest afterwards. In the future, this parameter
76
      might be more flexible, e.g. take a pattern of trees to update.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
77
78
    * checkpoint - automatically checkpoint every n commits over and
79
      above any checkpoints contained in the import stream.
80
      The default is 10000.
81
82
    * count - only import this many commits then exit. If not set,
83
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
84
    """
85
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
86
    known_params = ['info', 'trees', 'checkpoint', 'count']
87
88
    def note(self, msg, *args):
89
        """Output a note but timestamp it."""
90
        msg = "%s %s" % (self._time_of_day(), msg)
91
        note(msg, *args)
92
93
    def warning(self, msg, *args):
94
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
95
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
96
        warning(msg, *args)
97
98
    def _time_of_day(self):
99
        """Time of day as a string."""
100
        # Note: this is a separate method so tests can patch in a fixed value
101
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
102
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
103
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
104
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
105
        self._load_info_and_params()
106
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
107
        self.init_stats()
108
109
        # mapping of tag name to revision_id
110
        self.tags = {}
111
112
        # Create a write group. This is committed at the end of the import.
113
        # Checkpointing closes the current one and starts a new one.
114
        self.repo.start_write_group()
115
116
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
117
        # Load the info file, if any
118
        info_path = self.params.get('info')
119
        if info_path is not None:
120
            self.info = configobj.ConfigObj(info_path)
121
        else:
122
            self.info = None
123
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
124
        # Decide how often to automatically checkpoint
125
        self.checkpoint_every = int(self.params.get('checkpoint',
126
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
127
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
128
        # Find the maximum number of commits to import (None means all)
129
        # and prepare progress reporting. Just in case the info file
130
        # has an outdated count of commits, we store the max counts
131
        # at which we need to terminate separately to the total used
132
        # for progress tracking.
133
        try:
134
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
135
            if self.max_commits < 0:
136
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
137
        except KeyError:
138
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
139
        if self.info is not None:
140
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
141
            if (self.max_commits is not None and
142
                self.total_commits > self.max_commits):
143
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
144
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
145
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
146
0.64.27 by Ian Clatworthy
1st cut at performance tuning
147
148
    def _process(self, command_iter):
149
        # if anything goes wrong, abort the write group if any
150
        try:
151
            processor.ImportProcessor._process(self, command_iter)
152
        except:
153
            if self.repo is not None and self.repo.is_in_write_group():
154
                self.repo.abort_write_group()
155
            raise
156
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
157
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
158
        # Commit the current write group.
159
        self.repo.commit_write_group()
160
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
161
        # Update the branches
162
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
163
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
164
            helpers.invert_dict(self.cache_mgr.heads),
165
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
166
        branches_updated, branches_lost = updater.update()
167
        self._branch_count = len(branches_updated)
168
169
        # Tell the user about branches that were not created
170
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
171
            if not self.repo.is_shared():
172
                self.warning("Cannot import multiple branches into "
173
                    "an unshared repository")
174
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
175
            for lost_info in branches_lost:
176
                head_revision = lost_info[1]
177
                branch_name = lost_info[0]
178
                note("\t %s = %s", head_revision, branch_name)
179
180
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
181
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
182
        remind_about_update = True
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
183
        if self.params.get('trees', False):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
184
            if self.working_tree is None:
185
                self.warning("No working tree available to update")
186
            else:
187
                if self.verbose:
188
                    report = delta._ChangeReporter()
189
                else:
190
                    reporter = None
191
                self.note("Updating the working tree ...")
192
                self.working_tree.update(reporter)
193
                self._tree_count = 1
0.64.34 by Ian Clatworthy
report lost branches
194
                remind_about_update = False
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
195
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
196
        if remind_about_update:
197
            self.note("NOTE: To refresh working trees, use 'bzr update'")
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
198
199
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
200
        self._revision_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
201
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
202
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
203
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
204
        rc = self._revision_count
205
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
206
        wtc = self._tree_count
207
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
208
            rc, helpers.single_plural(rc, "revision", "revisions"),
209
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
210
            wtc, helpers.single_plural(wtc, "tree", "trees"),
211
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
212
0.64.5 by Ian Clatworthy
first cut at generic processing method
213
    def blob_handler(self, cmd):
214
        """Process a BlobCommand."""
215
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
216
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
217
        else:
218
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
219
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
220
221
    def checkpoint_handler(self, cmd):
222
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
223
        # Commit the current write group and start a new one
224
        self.repo.commit_write_group()
225
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
226
227
    def commit_handler(self, cmd):
228
        """Process a CommitCommand."""
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
229
        # 'Commit' the revision
0.64.7 by Ian Clatworthy
start of multiple commit handling
230
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
231
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
232
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
233
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
234
        # Update caches
235
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
236
        self.cache_mgr.last_ids[cmd.ref] = cmd.id
237
        self.cache_mgr.last_ref = cmd.ref
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
238
239
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
240
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
241
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
242
243
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
244
        if (self.max_commits is not None and
245
            self._revision_count >= self.max_commits):
246
            self.note("stopping after reaching requested count of commits")
247
            self.finished = True
248
        elif self._revision_count % self.checkpoint_every == 0:
249
            self.note("%d commits - automatic checkpoint triggered",
250
                self._revision_count)
251
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
252
0.64.25 by Ian Clatworthy
slightly better progress reporting
253
    def report_progress(self, details=''):
254
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
255
        if self.verbose or self._revision_count % 10 == 0:
256
            if self.total_commits is not None:
257
                counts = "%d/%d" % (self._revision_count, self.total_commits)
258
                eta = progress.get_eta(self._start_time, self._revision_count,
259
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
260
                eta_str = progress.str_tdelta(eta)
261
                if eta_str.endswith('--'):
262
                    eta_str = ''
263
                else:
264
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
265
            else:
266
                counts = "%d" % (self._revision_count,)
267
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
268
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
269
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
270
    def progress_handler(self, cmd):
271
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
272
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
273
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
274
275
    def reset_handler(self, cmd):
276
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
277
        if cmd.ref.startswith('refs/tags/'):
278
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
279
        else:
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
280
            self.warning("named branches are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
281
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
282
283
    def tag_handler(self, cmd):
284
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
285
        self._set_tag(cmd.id, cmd.from_)
286
287
    def _set_tag(self, name, from_):
288
        """Define a tag given a name an import 'from' reference."""
289
        bzr_tag_name = name.decode('utf-8', 'replace')
290
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
291
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
292
293
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
294
class GenericCacheManager(object):
295
    """A manager of caches for the GenericProcessor."""
296
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
297
    def __init__(self, info, verbose=False, inventory_cache_size=10):
298
        """Create a manager of caches.
299
300
        :param info: a ConfigObj holding the output from
301
            the --info processor, or None if no hints are available
302
        """
303
        self.verbose = verbose
304
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
305
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
306
        # Sticky blobs aren't removed after being referenced.
307
        self._blobs = {}
308
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
309
310
        # revision-id -> Inventory cache
311
        # these are large and we probably don't need too many as
312
        # most parents are recent in history
313
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
314
315
        # import-ref -> revision-id lookup table
316
        # we need to keep all of these but they are small
317
        self.revision_ids = {}
318
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
319
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
320
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
321
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
322
        # Head tracking: last ref, last id per ref & map of commit mark to ref
323
        self.last_ref = None
324
        self.last_ids = {}
325
        self.heads = {}
326
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
327
        # Work out the blobs to make sticky - None means all
328
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
329
        self._blobs_to_keep = None
330
        if info is not None:
331
            try:
332
                self._blobs_to_keep = info['Blob usage tracking']['multi']
333
            except KeyError:
334
                # info not in file - possible when no blobs used
335
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
336
337
    def store_blob(self, id, data):
338
        """Store a blob of data."""
339
        if (self._blobs_to_keep is None or data == '' or
340
            id in self._blobs_to_keep):
341
            self._sticky_blobs[id] = data
342
            if self.verbose:
343
                print "making blob %s sticky" % (id,)
344
        else:
345
            self._blobs[id] = data
346
347
    def fetch_blob(self, id):
348
        """Fetch a blob of data."""
349
        try:
350
            return self._sticky_blobs[id]
351
        except KeyError:
352
            return self._blobs.pop(id)
353
0.64.16 by Ian Clatworthy
safe processing tweaks
354
    def _delete_path(self, path):
355
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
356
        # we actually want to remember what file-id we gave a path,
357
        # even when that file is deleted, so doing nothing is correct
358
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
359
360
    def _rename_path(self, old_path, new_path):
361
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
362
        # we actually want to remember what file-id we gave a path,
363
        # even when that file is renamed, so both paths should have
364
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
365
        self.file_ids[new_path] = self.file_ids[old_path]
366
367
0.64.5 by Ian Clatworthy
first cut at generic processing method
368
class GenericCommitHandler(processor.CommitHandler):
369
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
370
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
371
        processor.CommitHandler.__init__(self, command)
372
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
373
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
374
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
375
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
376
        self.loader = revisionloader.RevisionLoader(repo,
377
            lambda revision_ids: self._get_inventories(revision_ids))
378
379
    def pre_process_files(self):
380
        """Prepare for committing."""
381
        self.revision_id = self.gen_revision_id()
382
        self.inv_delta = []
383
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
384
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
385
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
386
        # Work out the true set of parents
387
        cmd = self.command
388
        if cmd.mark is None:
389
            last_id = self.cache_mgr.last_ids.get(cmd.ref)
390
            if last_id is not None:
391
                parents = [last_id]
392
            else:
393
                parents = []
394
        else:
395
            parents = cmd.parents
396
397
        # Track the heads
398
        for parent in parents:
399
            try:
400
                del self.cache_mgr.heads[parent]
401
            except KeyError:
402
                warning("didn't find parent %s while tracking heads",
403
                    parent)
404
        self.cache_mgr.heads[cmd.id] = cmd.ref
405
0.64.14 by Ian Clatworthy
commit of modified files working
406
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
407
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
408
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
409
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
410
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
411
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
412
0.64.14 by Ian Clatworthy
commit of modified files working
413
        # Seed the inventory from the previous one
414
        if len(self.parents) == 0:
415
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
416
        else:
417
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
418
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
419
        if not self.repo.supports_rich_root():
420
            # In this repository, root entries have no knit or weave. When
421
            # serializing out to disk and back in, root.revision is always
422
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
423
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
424
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
425
        # directory-path -> inventory-entry for current inventory
426
        self.directory_entries = dict(self.inventory.directories())
427
0.64.14 by Ian Clatworthy
commit of modified files working
428
    def post_process_files(self):
429
        """Save the revision."""
430
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
431
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
432
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
433
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
434
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
435
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
436
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
437
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
438
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
439
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
440
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
441
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
442
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
443
        committer = self.command.committer
444
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
445
        author = self.command.author
446
        if author is not None:
447
            author_id = "%s <%s>" % (author[0],author[1])
448
            if author_id != who:
449
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
450
        rev = revision.Revision(
451
           timestamp=committer[2],
452
           timezone=committer[3],
453
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
454
           message=self._escape_commit_message(self.command.message),
455
           revision_id=self.revision_id,
456
           properties=rev_props,
457
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
458
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
459
            lambda file_id: self._get_lines(file_id))
460
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
461
    def _escape_commit_message(self, message):
462
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
463
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
464
        # Code copied from bzrlib.commit.
465
        
466
        # Python strings can include characters that can't be
467
        # represented in well-formed XML; escape characters that
468
        # aren't listed in the XML specification
469
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
470
        message, _ = re.subn(
471
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
472
            lambda match: match.group(0).encode('unicode_escape'),
473
            message)
474
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
475
476
    def modify_handler(self, filecmd):
477
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
478
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
479
        else:
480
            data = filecmd.data
481
        self._modify_inventory(filecmd.path, filecmd.kind,
482
            filecmd.is_executable, data)
483
484
    def delete_handler(self, filecmd):
485
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
486
        try:
487
            del self.inventory[self.bzr_file_id(path)]
488
        except errors.NoSuchId:
489
            warning("ignoring delete of %s - not in inventory" % (path,))
490
        finally:
491
            try:
492
                self.cache_mgr._delete_path(path)
493
            except KeyError:
494
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
495
496
    def copy_handler(self, filecmd):
497
        raise NotImplementedError(self.copy_handler)
498
499
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
500
        old_path = filecmd.old_path
501
        new_path = filecmd.new_path
502
        file_id = self.bzr_file_id(old_path)
503
        ie = self.inventory[file_id]
504
        self.inv_delta.append((old_path, new_path, file_id, ie))
505
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
506
507
    def deleteall_handler(self, filecmd):
508
        raise NotImplementedError(self.deleteall_handler)
509
0.64.16 by Ian Clatworthy
safe processing tweaks
510
    def bzr_file_id_and_new(self, path):
511
        """Get a Bazaar file identifier and new flag for a path.
512
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
513
        :return: file_id, is_new where
514
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
515
        """
516
        try:
517
            return self.cache_mgr.file_ids[path], False
518
        except KeyError:
519
            id = generate_ids.gen_file_id(path)
520
            self.cache_mgr.file_ids[path] = id
521
            return id, True
522
0.64.5 by Ian Clatworthy
first cut at generic processing method
523
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
524
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
525
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
526
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
527
    def gen_initial_inventory(self):
528
        """Generate an inventory for a parentless revision."""
529
        inv = inventory.Inventory(revision_id=self.revision_id)
530
        return inv
531
0.64.5 by Ian Clatworthy
first cut at generic processing method
532
    def gen_revision_id(self):
533
        """Generate a revision id.
534
535
        Subclasses may override this to produce deterministic ids say.
536
        """
537
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
538
        # Perhaps 'who' being the person running the import is ok? If so,
539
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
540
        who = "%s <%s>" % (committer[0],committer[1])
541
        timestamp = committer[2]
542
        return generate_ids.gen_revision_id(who, timestamp)
543
0.64.7 by Ian Clatworthy
start of multiple commit handling
544
    def get_inventory(self, revision_id):
545
        """Get the inventory for a revision id."""
546
        try:
547
            inv = self.cache_mgr.inventories[revision_id]
548
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
549
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
550
            # Not cached so reconstruct from repository
551
            inv = self.repo.revision_tree(revision_id).inventory
552
            self.cache_mgr.inventories[revision_id] = inv
553
        return inv
554
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
    def _get_inventories(self, revision_ids):
556
        """Get the inventories for revision-ids.
557
        
558
        This is a callback used by the RepositoryLoader to
559
        speed up inventory reconstruction."""
560
        present = []
561
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
562
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
563
        # successfully loaded into the repsoitory
564
        for revision_id in revision_ids:
565
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
566
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
567
                present.append(revision_id)
568
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
569
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
570
                # Not cached so reconstruct from repository
571
                if self.repo.has_revision(revision_id):
572
                    rev_tree = self.repo.revision_tree(revision_id)
573
                    present.append(revision_id)
574
                else:
575
                    rev_tree = self.repo.revision_tree(None)
576
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
577
                self.cache_mgr.inventories[revision_id] = inv
578
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
579
        return present, inventories
580
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
581
    def _get_lines(self, file_id):
582
        """Get the lines for a file-id."""
583
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
584
585
    def _modify_inventory(self, path, kind, is_executable, data):
586
        """Add to or change an item in the inventory."""
587
        # Create the new InventoryEntry
588
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
589
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
590
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
591
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
592
        if isinstance(ie, inventory.InventoryFile):
593
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
594
            lines = osutils.split_lines(data)
595
            ie.text_sha1 = osutils.sha_strings(lines)
596
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
597
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
598
        elif isinstance(ie, inventory.InventoryLnk):
599
            ie.symlink_target = data
600
        else:
601
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
602
                (kind,))
603
0.64.16 by Ian Clatworthy
safe processing tweaks
604
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
605
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
606
            # HACK: no API for this (del+add does more than it needs to)
607
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
608
        else:
609
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
610
611
    def _ensure_directory(self, path):
612
        """Ensure that the containing directory exists for 'path'"""
613
        dirname, basename = osutils.split(path)
614
        if dirname == '':
615
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
616
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
617
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
618
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
619
        except KeyError:
620
            # We will create this entry, since it doesn't exist
621
            pass
622
        else:
623
            return basename, ie
624
625
        # No directory existed, we will just create one, first, make sure
626
        # the parent exists
627
        dir_basename, parent_ie = self._ensure_directory(dirname)
628
        dir_file_id = self.bzr_file_id(dirname)
629
        ie = inventory.entry_factory['directory'](dir_file_id,
630
                                                  dir_basename,
631
                                                  parent_ie.file_id)
632
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
633
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
634
        # There are no lines stored for a directory so
635
        # make sure the cache used by get_lines knows that
636
        self.lines_for_commit[dir_file_id] = []
637
        #print "adding dir %s" % path
638
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
639
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
640
641
0.64.34 by Ian Clatworthy
report lost branches
642
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
643
0.64.37 by Ian Clatworthy
create branches as required
644
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
645
        """Create an object responsible for updating branches.
646
647
        :param heads_by_ref: a dictionary where
648
          names are git-style references like refs/heads/master;
649
          values are one item lists of commits marks.
650
        """
0.64.37 by Ian Clatworthy
create branches as required
651
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
652
        self.branch = branch
653
        self.cache_mgr = cache_mgr
654
        self.heads_by_ref = heads_by_ref
655
        self.last_ref = last_ref
656
657
    def update(self):
658
        """Update the Bazaar branches and tips matching the heads.
659
660
        If the repository is shared, this routine creates branches
661
        as required. If it isn't, warnings are produced about the
662
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
663
0.64.34 by Ian Clatworthy
report lost branches
664
        :return: updated, lost_heads where
665
          updated = the list of branches updated
666
          lost_heads = a list of (bazaar-name,revision) for branches that
667
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
668
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
669
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
670
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
671
        for br, tip in branch_tips:
672
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
673
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
674
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
675
676
    def _get_matching_branches(self):
677
        """Get the Bazaar branches.
678
0.64.34 by Ian Clatworthy
report lost branches
679
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
680
          default_tip = the last commit mark for the default branch
681
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
682
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
683
            would have been created had the repository been shared and
684
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
685
        """
0.64.37 by Ian Clatworthy
create branches as required
686
        branch_tips = []
687
        lost_heads = []
688
        ref_names = self.heads_by_ref.keys()
689
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
690
            trunk = self.select_trunk(ref_names)
691
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
692
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
693
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
694
695
        # Convert the reference names into Bazaar speak
696
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
697
0.64.37 by Ian Clatworthy
create branches as required
698
        # Policy for locating branches
699
        def dir_under_current(name, ref_name):
700
            # Using the Bazaar name, get a directory under the current one
701
            return name
702
        def dir_sister_branch(name, ref_name):
703
            # Using the Bazaar name, get a sister directory to the branch
704
            return osutils.pathjoin(self.branch.base, "..", name)
705
        if self.branch is not None:
706
            dir_policy = dir_sister_branch
707
        else:
708
            dir_policy = dir_under_current
709
0.64.34 by Ian Clatworthy
report lost branches
710
        # Create/track missing branches
711
        shared_repo = self.repo.is_shared()
712
        for name in sorted(bzr_names.keys()):
713
            ref_name = bzr_names[name]
714
            tip = self.heads_by_ref[ref_name][0]
715
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
716
                location = dir_policy(name, ref_name)
717
                try:
718
                    br = self.make_branch(location)
719
                    branch_tips.append((br,tip))
720
                    continue
721
                except errors.BzrError, ex:
722
                    error("ERROR: failed to create branch %s: %s",
723
                        location, ex)
724
            lost_head = self.cache_mgr.revision_ids[tip]
725
            lost_info = (name, lost_head)
726
            lost_heads.append(lost_info)
727
        return branch_tips, lost_heads
728
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
729
    def select_trunk(self, ref_names):
730
        """Given a set of ref names, choose one as the trunk."""
731
        for candidate in ['refs/heads/master']:
732
            if candidate in ref_names:
733
                return candidate
734
        # Use the last reference in the import stream
735
        return self.last_ref
736
0.64.37 by Ian Clatworthy
create branches as required
737
    def make_branch(self, location):
738
        """Create a branch in the repository."""
739
        return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
740
741
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
742
        """Generate Bazaar branch names from import ref names.
743
        
744
        :return: a dictionary with Bazaar names as keys and
745
          the original reference names as values.
746
        """
0.64.34 by Ian Clatworthy
report lost branches
747
        bazaar_names = {}
748
        for ref_name in sorted(ref_names):
749
            parts = ref_name.split('/')
750
            if parts[0] == 'refs':
751
                parts.pop(0)
752
            full_name = "--".join(parts)
753
            bazaar_name = parts[-1]
754
            if bazaar_name in bazaar_names:
755
                bazaar_name = full_name
756
            bazaar_names[bazaar_name] = ref_name
757
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
758
759
    def _update_branch(self, br, last_mark):
760
        """Update a branch with last revision and tag information."""
761
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
762
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
763
        br.set_last_revision_info(revno, last_rev_id)
764
        # TODO: apply tags known in this branch
765
        #if self.tags:
766
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
767
        note("\t branch %s has %d revisions", br.nick, revno)