/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
36
from bzrlib.trace import (
37
    note,
38
    warning,
0.64.37 by Ian Clatworthy
create branches as required
39
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
40
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
41
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
42
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
43
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
44
    processor,
45
    revisionloader,
46
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
48
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
49
# How many commits before automatically checkpointing
50
_DEFAULT_AUTO_CHECKPOINT = 10000
51
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
52
class GenericProcessor(processor.ImportProcessor):
53
    """An import processor that handles basic imports.
54
55
    Current features supported:
56
0.64.16 by Ian Clatworthy
safe processing tweaks
57
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
58
    * files and symlinks commits are supported
59
    * checkpoints automatically happen at a configurable frequency
60
      over and above the stream requested checkpoints
61
    * timestamped progress reporting, both automatic and stream requested
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
62
    * LATER: named branch support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
63
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
64
65
    Here are the supported parameters:
66
67
    * info - name of a config file holding the analysis generated
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
68
      by running the --info processor in verbose mode. When
69
      importing large repositories, this parameter is needed so
70
      that the importer knows what blobs to intelligently cache.
71
72
    * trees - update the working tree before completing.
73
      By default, the importer updates the repository
74
      and branches and the user needs to run 'bzr update' for the
75
      branches of interest afterwards. In the future, this parameter
76
      might be more flexible, e.g. take a pattern of trees to update.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
77
78
    * checkpoint - automatically checkpoint every n commits over and
79
      above any checkpoints contained in the import stream.
80
      The default is 10000.
81
82
    * count - only import this many commits then exit. If not set,
83
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
84
    """
85
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
86
    known_params = ['info', 'trees', 'checkpoint', 'count']
87
88
    def note(self, msg, *args):
89
        """Output a note but timestamp it."""
90
        msg = "%s %s" % (self._time_of_day(), msg)
91
        note(msg, *args)
92
93
    def warning(self, msg, *args):
94
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
95
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
96
        warning(msg, *args)
97
98
    def _time_of_day(self):
99
        """Time of day as a string."""
100
        # Note: this is a separate method so tests can patch in a fixed value
101
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
102
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
103
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
104
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
105
        self._load_info_and_params()
106
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
107
        self.init_stats()
108
109
        # mapping of tag name to revision_id
110
        self.tags = {}
111
112
        # Create a write group. This is committed at the end of the import.
113
        # Checkpointing closes the current one and starts a new one.
114
        self.repo.start_write_group()
115
116
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
117
        # Load the info file, if any
118
        info_path = self.params.get('info')
119
        if info_path is not None:
120
            self.info = configobj.ConfigObj(info_path)
121
        else:
122
            self.info = None
123
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
124
        # Decide how often to automatically checkpoint
125
        self.checkpoint_every = int(self.params.get('checkpoint',
126
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
127
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
128
        # Find the maximum number of commits to import (None means all)
129
        # and prepare progress reporting. Just in case the info file
130
        # has an outdated count of commits, we store the max counts
131
        # at which we need to terminate separately to the total used
132
        # for progress tracking.
133
        try:
134
            self.max_commits = int(self.params['count'])
135
        except KeyError:
136
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
137
        if self.info is not None:
138
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
139
            if (self.max_commits is not None and
140
                self.total_commits > self.max_commits):
141
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
142
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
143
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
144
0.64.27 by Ian Clatworthy
1st cut at performance tuning
145
146
    def _process(self, command_iter):
147
        # if anything goes wrong, abort the write group if any
148
        try:
149
            processor.ImportProcessor._process(self, command_iter)
150
        except:
151
            if self.repo is not None and self.repo.is_in_write_group():
152
                self.repo.abort_write_group()
153
            raise
154
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
155
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
156
        # Commit the current write group.
157
        self.repo.commit_write_group()
158
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
159
        # Update the branches
160
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
161
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
162
            helpers.invert_dict(self.cache_mgr.heads),
163
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
164
        branches_updated, branches_lost = updater.update()
165
        self._branch_count = len(branches_updated)
166
167
        # Tell the user about branches that were not created
168
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
169
            if not self.repo.is_shared():
170
                self.warning("Cannot import multiple branches into "
171
                    "an unshared repository")
172
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
173
            for lost_info in branches_lost:
174
                head_revision = lost_info[1]
175
                branch_name = lost_info[0]
176
                note("\t %s = %s", head_revision, branch_name)
177
178
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
179
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
180
        remind_about_update = True
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
181
        if self.params.get('trees'):
182
            if self.working_tree is None:
183
                self.warning("No working tree available to update")
184
            else:
185
                if self.verbose:
186
                    report = delta._ChangeReporter()
187
                else:
188
                    reporter = None
189
                self.note("Updating the working tree ...")
190
                self.working_tree.update(reporter)
191
                self._tree_count = 1
0.64.34 by Ian Clatworthy
report lost branches
192
                remind_about_update = False
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
193
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
194
        if remind_about_update:
195
            self.note("NOTE: To refresh working trees, use 'bzr update'")
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
196
197
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
198
        self._revision_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
199
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
200
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
201
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
202
        rc = self._revision_count
203
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
204
        wtc = self._tree_count
205
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
206
            rc, helpers.single_plural(rc, "revision", "revisions"),
207
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
208
            wtc, helpers.single_plural(wtc, "tree", "trees"),
209
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
210
0.64.5 by Ian Clatworthy
first cut at generic processing method
211
    def blob_handler(self, cmd):
212
        """Process a BlobCommand."""
213
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
214
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
215
        else:
216
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
217
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
218
219
    def checkpoint_handler(self, cmd):
220
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
221
        # Commit the current write group and start a new one
222
        self.repo.commit_write_group()
223
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
224
225
    def commit_handler(self, cmd):
226
        """Process a CommitCommand."""
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
227
        # 'Commit' the revision
0.64.7 by Ian Clatworthy
start of multiple commit handling
228
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
229
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
230
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
231
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
232
        # Update caches
233
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
234
        self.cache_mgr.last_ids[cmd.ref] = cmd.id
235
        self.cache_mgr.last_ref = cmd.ref
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
236
237
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
238
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
239
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
240
241
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
242
        if (self.max_commits is not None and
243
            self._revision_count >= self.max_commits):
244
            self.note("stopping after reaching requested count of commits")
245
            self.finished = True
246
        elif self._revision_count % self.checkpoint_every == 0:
247
            self.note("%d commits - automatic checkpoint triggered",
248
                self._revision_count)
249
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
250
0.64.25 by Ian Clatworthy
slightly better progress reporting
251
    def report_progress(self, details=''):
252
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
253
        if self.verbose or self._revision_count % 10 == 0:
254
            if self.total_commits is not None:
255
                counts = "%d/%d" % (self._revision_count, self.total_commits)
256
                eta = progress.get_eta(self._start_time, self._revision_count,
257
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
258
                eta_str = progress.str_tdelta(eta)
259
                if eta_str.endswith('--'):
260
                    eta_str = ''
261
                else:
262
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
263
            else:
264
                counts = "%d" % (self._revision_count,)
265
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
266
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
267
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
268
    def progress_handler(self, cmd):
269
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
270
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
271
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
272
273
    def reset_handler(self, cmd):
274
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
275
        if cmd.ref.startswith('refs/tags/'):
276
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
277
        else:
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
278
            self.warning("named branches are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
279
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
280
281
    def tag_handler(self, cmd):
282
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
283
        self._set_tag(cmd.id, cmd.from_)
284
285
    def _set_tag(self, name, from_):
286
        """Define a tag given a name an import 'from' reference."""
287
        bzr_tag_name = name.decode('utf-8', 'replace')
288
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
289
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
290
291
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
292
class GenericCacheManager(object):
293
    """A manager of caches for the GenericProcessor."""
294
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
295
    def __init__(self, info, verbose=False, inventory_cache_size=10):
296
        """Create a manager of caches.
297
298
        :param info: a ConfigObj holding the output from
299
            the --info processor, or None if no hints are available
300
        """
301
        self.verbose = verbose
302
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
303
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
304
        # Sticky blobs aren't removed after being referenced.
305
        self._blobs = {}
306
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
307
308
        # revision-id -> Inventory cache
309
        # these are large and we probably don't need too many as
310
        # most parents are recent in history
311
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
312
313
        # import-ref -> revision-id lookup table
314
        # we need to keep all of these but they are small
315
        self.revision_ids = {}
316
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
317
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
318
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
319
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
320
        # Head tracking: last ref, last id per ref & map of commit mark to ref
321
        self.last_ref = None
322
        self.last_ids = {}
323
        self.heads = {}
324
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
325
        # Work out the blobs to make sticky - None means all
326
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
327
        self._blobs_to_keep = None
328
        if info is not None:
329
            try:
330
                self._blobs_to_keep = info['Blob usage tracking']['multi']
331
            except KeyError:
332
                # info not in file - possible when no blobs used
333
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
334
335
    def store_blob(self, id, data):
336
        """Store a blob of data."""
337
        if (self._blobs_to_keep is None or data == '' or
338
            id in self._blobs_to_keep):
339
            self._sticky_blobs[id] = data
340
            if self.verbose:
341
                print "making blob %s sticky" % (id,)
342
        else:
343
            self._blobs[id] = data
344
345
    def fetch_blob(self, id):
346
        """Fetch a blob of data."""
347
        try:
348
            return self._sticky_blobs[id]
349
        except KeyError:
350
            return self._blobs.pop(id)
351
0.64.16 by Ian Clatworthy
safe processing tweaks
352
    def _delete_path(self, path):
353
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
354
        # we actually want to remember what file-id we gave a path,
355
        # even when that file is deleted, so doing nothing is correct
356
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
357
358
    def _rename_path(self, old_path, new_path):
359
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
360
        # we actually want to remember what file-id we gave a path,
361
        # even when that file is renamed, so both paths should have
362
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
363
        self.file_ids[new_path] = self.file_ids[old_path]
364
365
0.64.5 by Ian Clatworthy
first cut at generic processing method
366
class GenericCommitHandler(processor.CommitHandler):
367
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
368
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
369
        processor.CommitHandler.__init__(self, command)
370
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
371
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
372
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
373
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
374
        self.loader = revisionloader.RevisionLoader(repo,
375
            lambda revision_ids: self._get_inventories(revision_ids))
376
377
    def pre_process_files(self):
378
        """Prepare for committing."""
379
        self.revision_id = self.gen_revision_id()
380
        self.inv_delta = []
381
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
382
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
383
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
384
        # Work out the true set of parents
385
        cmd = self.command
386
        if cmd.mark is None:
387
            last_id = self.cache_mgr.last_ids.get(cmd.ref)
388
            if last_id is not None:
389
                parents = [last_id]
390
            else:
391
                parents = []
392
        else:
393
            parents = cmd.parents
394
395
        # Track the heads
396
        for parent in parents:
397
            try:
398
                del self.cache_mgr.heads[parent]
399
            except KeyError:
400
                warning("didn't find parent %s while tracking heads",
401
                    parent)
402
        self.cache_mgr.heads[cmd.id] = cmd.ref
403
0.64.14 by Ian Clatworthy
commit of modified files working
404
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
405
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
406
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
407
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
408
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
409
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
410
0.64.14 by Ian Clatworthy
commit of modified files working
411
        # Seed the inventory from the previous one
412
        if len(self.parents) == 0:
413
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
414
        else:
415
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
416
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
417
        if not self.repo.supports_rich_root():
418
            # In this repository, root entries have no knit or weave. When
419
            # serializing out to disk and back in, root.revision is always
420
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
421
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
422
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
423
        # directory-path -> inventory-entry for current inventory
424
        self.directory_entries = dict(self.inventory.directories())
425
0.64.14 by Ian Clatworthy
commit of modified files working
426
    def post_process_files(self):
427
        """Save the revision."""
428
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
429
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
430
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
431
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
432
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
433
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
434
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
435
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
436
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
437
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
438
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
439
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
440
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
441
        committer = self.command.committer
442
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
443
        author = self.command.author
444
        if author is not None:
445
            author_id = "%s <%s>" % (author[0],author[1])
446
            if author_id != who:
447
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
448
        rev = revision.Revision(
449
           timestamp=committer[2],
450
           timezone=committer[3],
451
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
452
           message=self._escape_commit_message(self.command.message),
453
           revision_id=self.revision_id,
454
           properties=rev_props,
455
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
456
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
457
            lambda file_id: self._get_lines(file_id))
458
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
459
    def _escape_commit_message(self, message):
460
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
461
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
462
        # Code copied from bzrlib.commit.
463
        
464
        # Python strings can include characters that can't be
465
        # represented in well-formed XML; escape characters that
466
        # aren't listed in the XML specification
467
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
468
        message, _ = re.subn(
469
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
470
            lambda match: match.group(0).encode('unicode_escape'),
471
            message)
472
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
473
474
    def modify_handler(self, filecmd):
475
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
476
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
477
        else:
478
            data = filecmd.data
479
        self._modify_inventory(filecmd.path, filecmd.kind,
480
            filecmd.is_executable, data)
481
482
    def delete_handler(self, filecmd):
483
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
484
        try:
485
            del self.inventory[self.bzr_file_id(path)]
486
        except errors.NoSuchId:
487
            warning("ignoring delete of %s - not in inventory" % (path,))
488
        finally:
489
            try:
490
                self.cache_mgr._delete_path(path)
491
            except KeyError:
492
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
493
494
    def copy_handler(self, filecmd):
495
        raise NotImplementedError(self.copy_handler)
496
497
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
498
        old_path = filecmd.old_path
499
        new_path = filecmd.new_path
500
        file_id = self.bzr_file_id(old_path)
501
        ie = self.inventory[file_id]
502
        self.inv_delta.append((old_path, new_path, file_id, ie))
503
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
504
505
    def deleteall_handler(self, filecmd):
506
        raise NotImplementedError(self.deleteall_handler)
507
0.64.16 by Ian Clatworthy
safe processing tweaks
508
    def bzr_file_id_and_new(self, path):
509
        """Get a Bazaar file identifier and new flag for a path.
510
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
511
        :return: file_id, is_new where
512
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
513
        """
514
        try:
515
            return self.cache_mgr.file_ids[path], False
516
        except KeyError:
517
            id = generate_ids.gen_file_id(path)
518
            self.cache_mgr.file_ids[path] = id
519
            return id, True
520
0.64.5 by Ian Clatworthy
first cut at generic processing method
521
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
522
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
523
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
524
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
525
    def gen_initial_inventory(self):
526
        """Generate an inventory for a parentless revision."""
527
        inv = inventory.Inventory(revision_id=self.revision_id)
528
        return inv
529
0.64.5 by Ian Clatworthy
first cut at generic processing method
530
    def gen_revision_id(self):
531
        """Generate a revision id.
532
533
        Subclasses may override this to produce deterministic ids say.
534
        """
535
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
536
        # Perhaps 'who' being the person running the import is ok? If so,
537
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
538
        who = "%s <%s>" % (committer[0],committer[1])
539
        timestamp = committer[2]
540
        return generate_ids.gen_revision_id(who, timestamp)
541
0.64.7 by Ian Clatworthy
start of multiple commit handling
542
    def get_inventory(self, revision_id):
543
        """Get the inventory for a revision id."""
544
        try:
545
            inv = self.cache_mgr.inventories[revision_id]
546
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
547
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
548
            # Not cached so reconstruct from repository
549
            inv = self.repo.revision_tree(revision_id).inventory
550
            self.cache_mgr.inventories[revision_id] = inv
551
        return inv
552
0.64.5 by Ian Clatworthy
first cut at generic processing method
553
    def _get_inventories(self, revision_ids):
554
        """Get the inventories for revision-ids.
555
        
556
        This is a callback used by the RepositoryLoader to
557
        speed up inventory reconstruction."""
558
        present = []
559
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
560
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
561
        # successfully loaded into the repsoitory
562
        for revision_id in revision_ids:
563
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
564
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
565
                present.append(revision_id)
566
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
567
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
568
                # Not cached so reconstruct from repository
569
                if self.repo.has_revision(revision_id):
570
                    rev_tree = self.repo.revision_tree(revision_id)
571
                    present.append(revision_id)
572
                else:
573
                    rev_tree = self.repo.revision_tree(None)
574
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
575
                self.cache_mgr.inventories[revision_id] = inv
576
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
577
        return present, inventories
578
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
579
    def _get_lines(self, file_id):
580
        """Get the lines for a file-id."""
581
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
582
583
    def _modify_inventory(self, path, kind, is_executable, data):
584
        """Add to or change an item in the inventory."""
585
        # Create the new InventoryEntry
586
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
587
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
588
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
589
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
590
        if isinstance(ie, inventory.InventoryFile):
591
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
592
            lines = osutils.split_lines(data)
593
            ie.text_sha1 = osutils.sha_strings(lines)
594
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
595
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
596
        elif isinstance(ie, inventory.InventoryLnk):
597
            ie.symlink_target = data
598
        else:
599
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
600
                (kind,))
601
0.64.16 by Ian Clatworthy
safe processing tweaks
602
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
603
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
604
            # HACK: no API for this (del+add does more than it needs to)
605
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
606
        else:
607
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
608
609
    def _ensure_directory(self, path):
610
        """Ensure that the containing directory exists for 'path'"""
611
        dirname, basename = osutils.split(path)
612
        if dirname == '':
613
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
614
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
615
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
616
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
617
        except KeyError:
618
            # We will create this entry, since it doesn't exist
619
            pass
620
        else:
621
            return basename, ie
622
623
        # No directory existed, we will just create one, first, make sure
624
        # the parent exists
625
        dir_basename, parent_ie = self._ensure_directory(dirname)
626
        dir_file_id = self.bzr_file_id(dirname)
627
        ie = inventory.entry_factory['directory'](dir_file_id,
628
                                                  dir_basename,
629
                                                  parent_ie.file_id)
630
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
631
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
632
        # There are no lines stored for a directory so
633
        # make sure the cache used by get_lines knows that
634
        self.lines_for_commit[dir_file_id] = []
635
        #print "adding dir %s" % path
636
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
637
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
638
639
0.64.34 by Ian Clatworthy
report lost branches
640
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
641
0.64.37 by Ian Clatworthy
create branches as required
642
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
643
        """Create an object responsible for updating branches.
644
645
        :param heads_by_ref: a dictionary where
646
          names are git-style references like refs/heads/master;
647
          values are one item lists of commits marks.
648
        """
0.64.37 by Ian Clatworthy
create branches as required
649
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
650
        self.branch = branch
651
        self.cache_mgr = cache_mgr
652
        self.heads_by_ref = heads_by_ref
653
        self.last_ref = last_ref
654
655
    def update(self):
656
        """Update the Bazaar branches and tips matching the heads.
657
658
        If the repository is shared, this routine creates branches
659
        as required. If it isn't, warnings are produced about the
660
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
661
0.64.34 by Ian Clatworthy
report lost branches
662
        :return: updated, lost_heads where
663
          updated = the list of branches updated
664
          lost_heads = a list of (bazaar-name,revision) for branches that
665
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
666
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
667
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
668
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
669
        for br, tip in branch_tips:
670
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
671
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
672
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
673
674
    def _get_matching_branches(self):
675
        """Get the Bazaar branches.
676
0.64.34 by Ian Clatworthy
report lost branches
677
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
678
          default_tip = the last commit mark for the default branch
679
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
680
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
681
            would have been created had the repository been shared and
682
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
683
        """
0.64.37 by Ian Clatworthy
create branches as required
684
        branch_tips = []
685
        lost_heads = []
686
        ref_names = self.heads_by_ref.keys()
687
        if self.branch is not None:
688
            # Until there's a good reason to be more selective,
689
            # use the last imported revision as the tip of the default branch
690
            default_tip = self.heads_by_ref[self.last_ref][0]
691
            branch_tips.append((self.branch, default_tip))
692
            ref_names.remove(self.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
693
694
        # Convert the reference names into Bazaar speak
695
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
696
0.64.37 by Ian Clatworthy
create branches as required
697
        # Policy for locating branches
698
        def dir_under_current(name, ref_name):
699
            # Using the Bazaar name, get a directory under the current one
700
            return name
701
        def dir_sister_branch(name, ref_name):
702
            # Using the Bazaar name, get a sister directory to the branch
703
            return osutils.pathjoin(self.branch.base, "..", name)
704
        if self.branch is not None:
705
            dir_policy = dir_sister_branch
706
        else:
707
            dir_policy = dir_under_current
708
0.64.34 by Ian Clatworthy
report lost branches
709
        # Create/track missing branches
710
        shared_repo = self.repo.is_shared()
711
        for name in sorted(bzr_names.keys()):
712
            ref_name = bzr_names[name]
713
            tip = self.heads_by_ref[ref_name][0]
714
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
715
                location = dir_policy(name, ref_name)
716
                try:
717
                    br = self.make_branch(location)
718
                    branch_tips.append((br,tip))
719
                    continue
720
                except errors.BzrError, ex:
721
                    error("ERROR: failed to create branch %s: %s",
722
                        location, ex)
723
            lost_head = self.cache_mgr.revision_ids[tip]
724
            lost_info = (name, lost_head)
725
            lost_heads.append(lost_info)
726
        return branch_tips, lost_heads
727
728
    def make_branch(self, location):
729
        """Create a branch in the repository."""
730
        return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
731
732
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
733
        """Generate Bazaar branch names from import ref names.
734
        
735
        :return: a dictionary with Bazaar names as keys and
736
          the original reference names as values.
737
        """
0.64.34 by Ian Clatworthy
report lost branches
738
        bazaar_names = {}
739
        for ref_name in sorted(ref_names):
740
            parts = ref_name.split('/')
741
            if parts[0] == 'refs':
742
                parts.pop(0)
743
            full_name = "--".join(parts)
744
            bazaar_name = parts[-1]
745
            if bazaar_name in bazaar_names:
746
                bazaar_name = full_name
747
            bazaar_names[bazaar_name] = ref_name
748
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
749
750
    def _update_branch(self, br, last_mark):
751
        """Update a branch with last revision and tag information."""
752
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
753
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
754
        br.set_last_revision_info(revno, last_rev_id)
755
        # TODO: apply tags known in this branch
756
        #if self.tags:
757
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
758
        note("\t branch %s has %d revisions", br.nick, revno)