/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
29
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
    revision,
31
    revisiontree,
32
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
33
from bzrlib.trace import (
34
    note,
35
    warning,
36
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
37
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
39
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
40
    processor,
41
    revisionloader,
42
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
43
44
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
45
# How many commits before automatically checkpointing
46
_DEFAULT_AUTO_CHECKPOINT = 10000
47
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
48
class GenericProcessor(processor.ImportProcessor):
49
    """An import processor that handles basic imports.
50
51
    Current features supported:
52
0.64.16 by Ian Clatworthy
safe processing tweaks
53
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
54
    * files and symlinks commits are supported
55
    * checkpoints automatically happen at a configurable frequency
56
      over and above the stream requested checkpoints
57
    * timestamped progress reporting, both automatic and stream requested
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
58
    * LATER: named branch support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
60
61
    Here are the supported parameters:
62
63
    * info - name of a config file holding the analysis generated
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
64
      by running the --info processor in verbose mode. When
65
      importing large repositories, this parameter is needed so
66
      that the importer knows what blobs to intelligently cache.
67
68
    * trees - update the working tree before completing.
69
      By default, the importer updates the repository
70
      and branches and the user needs to run 'bzr update' for the
71
      branches of interest afterwards. In the future, this parameter
72
      might be more flexible, e.g. take a pattern of trees to update.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
73
74
    * checkpoint - automatically checkpoint every n commits over and
75
      above any checkpoints contained in the import stream.
76
      The default is 10000.
77
78
    * count - only import this many commits then exit. If not set,
79
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
80
    """
81
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
82
    known_params = ['info', 'trees', 'checkpoint', 'count']
83
84
    def note(self, msg, *args):
85
        """Output a note but timestamp it."""
86
        msg = "%s %s" % (self._time_of_day(), msg)
87
        note(msg, *args)
88
89
    def warning(self, msg, *args):
90
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
91
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
92
        warning(msg, *args)
93
94
    def _time_of_day(self):
95
        """Time of day as a string."""
96
        # Note: this is a separate method so tests can patch in a fixed value
97
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
98
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
99
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
100
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
101
        self._load_info_and_params()
102
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
103
        self.init_stats()
104
105
        # mapping of tag name to revision_id
106
        self.tags = {}
107
108
        # Create a write group. This is committed at the end of the import.
109
        # Checkpointing closes the current one and starts a new one.
110
        self.repo.start_write_group()
111
112
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
113
        # Load the info file, if any
114
        info_path = self.params.get('info')
115
        if info_path is not None:
116
            self.info = configobj.ConfigObj(info_path)
117
        else:
118
            self.info = None
119
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
120
        # Decide how often to automatically checkpoint
121
        self.checkpoint_every = int(self.params.get('checkpoint',
122
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
123
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
124
        # Find the maximum number of commits to import (None means all)
125
        # and prepare progress reporting. Just in case the info file
126
        # has an outdated count of commits, we store the max counts
127
        # at which we need to terminate separately to the total used
128
        # for progress tracking.
129
        try:
130
            self.max_commits = int(self.params['count'])
131
        except KeyError:
132
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
133
        if self.info is not None:
134
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
135
            if (self.max_commits is not None and
136
                self.total_commits > self.max_commits):
137
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
138
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
139
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
140
0.64.27 by Ian Clatworthy
1st cut at performance tuning
141
142
    def _process(self, command_iter):
143
        # if anything goes wrong, abort the write group if any
144
        try:
145
            processor.ImportProcessor._process(self, command_iter)
146
        except:
147
            if self.repo is not None and self.repo.is_in_write_group():
148
                self.repo.abort_write_group()
149
            raise
150
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
151
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
152
        # Commit the current write group.
153
        self.repo.commit_write_group()
154
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
155
        # Update the branches
156
        self.note("Updating branch information ...")
0.64.34 by Ian Clatworthy
report lost branches
157
        updater = GenericBranchUpdater(self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
158
            helpers.invert_dict(self.cache_mgr.heads),
159
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
160
        branches_updated, branches_lost = updater.update()
161
        self._branch_count = len(branches_updated)
162
163
        # Tell the user about branches that were not created
164
        if branches_lost:
165
            self.warning("Unshared repository - not creating branches for "
166
                "these head revisions:")
167
            for lost_info in branches_lost:
168
                head_revision = lost_info[1]
169
                branch_name = lost_info[0]
170
                note("\t %s = %s", head_revision, branch_name)
171
172
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
173
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
174
        remind_about_update = True
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
175
        if self.params.get('trees'):
176
            if self.working_tree is None:
177
                self.warning("No working tree available to update")
178
            else:
179
                if self.verbose:
180
                    report = delta._ChangeReporter()
181
                else:
182
                    reporter = None
183
                self.note("Updating the working tree ...")
184
                self.working_tree.update(reporter)
185
                self._tree_count = 1
0.64.34 by Ian Clatworthy
report lost branches
186
                remind_about_update = False
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
187
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
188
        if remind_about_update:
189
            self.note("NOTE: To refresh working trees, use 'bzr update'")
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
190
191
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
192
        self._revision_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
193
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
194
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
195
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
196
        rc = self._revision_count
197
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
198
        wtc = self._tree_count
199
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
200
            rc, helpers.single_plural(rc, "revision", "revisions"),
201
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
202
            wtc, helpers.single_plural(wtc, "tree", "trees"),
203
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
204
0.64.5 by Ian Clatworthy
first cut at generic processing method
205
    def blob_handler(self, cmd):
206
        """Process a BlobCommand."""
207
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
208
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
209
        else:
210
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
211
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
212
213
    def checkpoint_handler(self, cmd):
214
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
215
        # Commit the current write group and start a new one
216
        self.repo.commit_write_group()
217
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
218
219
    def commit_handler(self, cmd):
220
        """Process a CommitCommand."""
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
221
        # 'Commit' the revision
0.64.7 by Ian Clatworthy
start of multiple commit handling
222
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
223
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
224
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
225
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
226
        # Update caches
227
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
228
        self.cache_mgr.last_ids[cmd.ref] = cmd.id
229
        self.cache_mgr.last_ref = cmd.ref
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
230
231
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
232
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
233
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
234
235
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
236
        if (self.max_commits is not None and
237
            self._revision_count >= self.max_commits):
238
            self.note("stopping after reaching requested count of commits")
239
            self.finished = True
240
        elif self._revision_count % self.checkpoint_every == 0:
241
            self.note("%d commits - automatic checkpoint triggered",
242
                self._revision_count)
243
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
244
0.64.25 by Ian Clatworthy
slightly better progress reporting
245
    def report_progress(self, details=''):
246
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
247
        if self.verbose or self._revision_count % 10 == 0:
248
            if self.total_commits is not None:
249
                counts = "%d/%d" % (self._revision_count, self.total_commits)
250
                eta = progress.get_eta(self._start_time, self._revision_count,
251
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
252
                eta_str = progress.str_tdelta(eta)
253
                if eta_str.endswith('--'):
254
                    eta_str = ''
255
                else:
256
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
257
            else:
258
                counts = "%d" % (self._revision_count,)
259
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
260
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
261
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
262
    def progress_handler(self, cmd):
263
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
264
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
265
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
266
267
    def reset_handler(self, cmd):
268
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
269
        if cmd.ref.startswith('refs/tags/'):
270
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
271
        else:
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
272
            self.warning("named branches are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
273
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
274
275
    def tag_handler(self, cmd):
276
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
277
        self._set_tag(cmd.id, cmd.from_)
278
279
    def _set_tag(self, name, from_):
280
        """Define a tag given a name an import 'from' reference."""
281
        bzr_tag_name = name.decode('utf-8', 'replace')
282
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
283
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
284
285
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
286
class GenericCacheManager(object):
287
    """A manager of caches for the GenericProcessor."""
288
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
289
    def __init__(self, info, verbose=False, inventory_cache_size=10):
290
        """Create a manager of caches.
291
292
        :param info: a ConfigObj holding the output from
293
            the --info processor, or None if no hints are available
294
        """
295
        self.verbose = verbose
296
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
297
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
298
        # Sticky blobs aren't removed after being referenced.
299
        self._blobs = {}
300
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
301
302
        # revision-id -> Inventory cache
303
        # these are large and we probably don't need too many as
304
        # most parents are recent in history
305
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
306
307
        # import-ref -> revision-id lookup table
308
        # we need to keep all of these but they are small
309
        self.revision_ids = {}
310
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
311
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
312
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
313
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
314
        # Head tracking: last ref, last id per ref & map of commit mark to ref
315
        self.last_ref = None
316
        self.last_ids = {}
317
        self.heads = {}
318
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
319
        # Work out the blobs to make sticky - None means all
320
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
321
        self._blobs_to_keep = None
322
        if info is not None:
323
            try:
324
                self._blobs_to_keep = info['Blob usage tracking']['multi']
325
            except KeyError:
326
                # info not in file - possible when no blobs used
327
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
328
329
    def store_blob(self, id, data):
330
        """Store a blob of data."""
331
        if (self._blobs_to_keep is None or data == '' or
332
            id in self._blobs_to_keep):
333
            self._sticky_blobs[id] = data
334
            if self.verbose:
335
                print "making blob %s sticky" % (id,)
336
        else:
337
            self._blobs[id] = data
338
339
    def fetch_blob(self, id):
340
        """Fetch a blob of data."""
341
        try:
342
            return self._sticky_blobs[id]
343
        except KeyError:
344
            return self._blobs.pop(id)
345
0.64.16 by Ian Clatworthy
safe processing tweaks
346
    def _delete_path(self, path):
347
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
348
        # we actually want to remember what file-id we gave a path,
349
        # even when that file is deleted, so doing nothing is correct
350
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
351
352
    def _rename_path(self, old_path, new_path):
353
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
354
        # we actually want to remember what file-id we gave a path,
355
        # even when that file is renamed, so both paths should have
356
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
357
        self.file_ids[new_path] = self.file_ids[old_path]
358
359
0.64.5 by Ian Clatworthy
first cut at generic processing method
360
class GenericCommitHandler(processor.CommitHandler):
361
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
362
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
363
        processor.CommitHandler.__init__(self, command)
364
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
365
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
366
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
367
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
368
        self.loader = revisionloader.RevisionLoader(repo,
369
            lambda revision_ids: self._get_inventories(revision_ids))
370
371
    def pre_process_files(self):
372
        """Prepare for committing."""
373
        self.revision_id = self.gen_revision_id()
374
        self.inv_delta = []
375
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
376
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
377
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
378
        # Work out the true set of parents
379
        cmd = self.command
380
        if cmd.mark is None:
381
            last_id = self.cache_mgr.last_ids.get(cmd.ref)
382
            if last_id is not None:
383
                parents = [last_id]
384
            else:
385
                parents = []
386
        else:
387
            parents = cmd.parents
388
389
        # Track the heads
390
        for parent in parents:
391
            try:
392
                del self.cache_mgr.heads[parent]
393
            except KeyError:
394
                warning("didn't find parent %s while tracking heads",
395
                    parent)
396
        self.cache_mgr.heads[cmd.id] = cmd.ref
397
0.64.14 by Ian Clatworthy
commit of modified files working
398
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
399
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
400
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
401
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
402
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
403
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
404
0.64.14 by Ian Clatworthy
commit of modified files working
405
        # Seed the inventory from the previous one
406
        if len(self.parents) == 0:
407
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
408
        else:
409
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
410
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
411
        if not self.repo.supports_rich_root():
412
            # In this repository, root entries have no knit or weave. When
413
            # serializing out to disk and back in, root.revision is always
414
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
415
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
416
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
417
        # directory-path -> inventory-entry for current inventory
418
        self.directory_entries = dict(self.inventory.directories())
419
0.64.14 by Ian Clatworthy
commit of modified files working
420
    def post_process_files(self):
421
        """Save the revision."""
422
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
423
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
424
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
425
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
426
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
427
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
428
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
429
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
430
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
431
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
432
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
433
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
434
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
435
        committer = self.command.committer
436
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
437
        author = self.command.author
438
        if author is not None:
439
            author_id = "%s <%s>" % (author[0],author[1])
440
            if author_id != who:
441
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
442
        rev = revision.Revision(
443
           timestamp=committer[2],
444
           timezone=committer[3],
445
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
446
           message=self._escape_commit_message(self.command.message),
447
           revision_id=self.revision_id,
448
           properties=rev_props,
449
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
450
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
451
            lambda file_id: self._get_lines(file_id))
452
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
453
    def _escape_commit_message(self, message):
454
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
455
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
456
        # Code copied from bzrlib.commit.
457
        
458
        # Python strings can include characters that can't be
459
        # represented in well-formed XML; escape characters that
460
        # aren't listed in the XML specification
461
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
462
        message, _ = re.subn(
463
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
464
            lambda match: match.group(0).encode('unicode_escape'),
465
            message)
466
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
467
468
    def modify_handler(self, filecmd):
469
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
470
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
471
        else:
472
            data = filecmd.data
473
        self._modify_inventory(filecmd.path, filecmd.kind,
474
            filecmd.is_executable, data)
475
476
    def delete_handler(self, filecmd):
477
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
478
        try:
479
            del self.inventory[self.bzr_file_id(path)]
480
        except errors.NoSuchId:
481
            warning("ignoring delete of %s - not in inventory" % (path,))
482
        finally:
483
            try:
484
                self.cache_mgr._delete_path(path)
485
            except KeyError:
486
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
487
488
    def copy_handler(self, filecmd):
489
        raise NotImplementedError(self.copy_handler)
490
491
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
492
        old_path = filecmd.old_path
493
        new_path = filecmd.new_path
494
        file_id = self.bzr_file_id(old_path)
495
        ie = self.inventory[file_id]
496
        self.inv_delta.append((old_path, new_path, file_id, ie))
497
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
498
499
    def deleteall_handler(self, filecmd):
500
        raise NotImplementedError(self.deleteall_handler)
501
0.64.16 by Ian Clatworthy
safe processing tweaks
502
    def bzr_file_id_and_new(self, path):
503
        """Get a Bazaar file identifier and new flag for a path.
504
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
505
        :return: file_id, is_new where
506
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
507
        """
508
        try:
509
            return self.cache_mgr.file_ids[path], False
510
        except KeyError:
511
            id = generate_ids.gen_file_id(path)
512
            self.cache_mgr.file_ids[path] = id
513
            return id, True
514
0.64.5 by Ian Clatworthy
first cut at generic processing method
515
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
516
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
517
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
518
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
519
    def gen_initial_inventory(self):
520
        """Generate an inventory for a parentless revision."""
521
        inv = inventory.Inventory(revision_id=self.revision_id)
522
        return inv
523
0.64.5 by Ian Clatworthy
first cut at generic processing method
524
    def gen_revision_id(self):
525
        """Generate a revision id.
526
527
        Subclasses may override this to produce deterministic ids say.
528
        """
529
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
530
        # Perhaps 'who' being the person running the import is ok? If so,
531
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
532
        who = "%s <%s>" % (committer[0],committer[1])
533
        timestamp = committer[2]
534
        return generate_ids.gen_revision_id(who, timestamp)
535
0.64.7 by Ian Clatworthy
start of multiple commit handling
536
    def get_inventory(self, revision_id):
537
        """Get the inventory for a revision id."""
538
        try:
539
            inv = self.cache_mgr.inventories[revision_id]
540
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
541
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
542
            # Not cached so reconstruct from repository
543
            inv = self.repo.revision_tree(revision_id).inventory
544
            self.cache_mgr.inventories[revision_id] = inv
545
        return inv
546
0.64.5 by Ian Clatworthy
first cut at generic processing method
547
    def _get_inventories(self, revision_ids):
548
        """Get the inventories for revision-ids.
549
        
550
        This is a callback used by the RepositoryLoader to
551
        speed up inventory reconstruction."""
552
        present = []
553
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
554
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
        # successfully loaded into the repsoitory
556
        for revision_id in revision_ids:
557
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
558
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
559
                present.append(revision_id)
560
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
561
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
562
                # Not cached so reconstruct from repository
563
                if self.repo.has_revision(revision_id):
564
                    rev_tree = self.repo.revision_tree(revision_id)
565
                    present.append(revision_id)
566
                else:
567
                    rev_tree = self.repo.revision_tree(None)
568
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
569
                self.cache_mgr.inventories[revision_id] = inv
570
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
571
        return present, inventories
572
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
573
    def _get_lines(self, file_id):
574
        """Get the lines for a file-id."""
575
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
576
577
    def _modify_inventory(self, path, kind, is_executable, data):
578
        """Add to or change an item in the inventory."""
579
        # Create the new InventoryEntry
580
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
581
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
582
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
583
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
584
        if isinstance(ie, inventory.InventoryFile):
585
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
586
            lines = osutils.split_lines(data)
587
            ie.text_sha1 = osutils.sha_strings(lines)
588
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
589
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
590
        elif isinstance(ie, inventory.InventoryLnk):
591
            ie.symlink_target = data
592
        else:
593
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
594
                (kind,))
595
0.64.16 by Ian Clatworthy
safe processing tweaks
596
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
597
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
598
            # HACK: no API for this (del+add does more than it needs to)
599
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
600
        else:
601
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
602
603
    def _ensure_directory(self, path):
604
        """Ensure that the containing directory exists for 'path'"""
605
        dirname, basename = osutils.split(path)
606
        if dirname == '':
607
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
608
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
609
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
610
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
611
        except KeyError:
612
            # We will create this entry, since it doesn't exist
613
            pass
614
        else:
615
            return basename, ie
616
617
        # No directory existed, we will just create one, first, make sure
618
        # the parent exists
619
        dir_basename, parent_ie = self._ensure_directory(dirname)
620
        dir_file_id = self.bzr_file_id(dirname)
621
        ie = inventory.entry_factory['directory'](dir_file_id,
622
                                                  dir_basename,
623
                                                  parent_ie.file_id)
624
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
625
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
626
        # There are no lines stored for a directory so
627
        # make sure the cache used by get_lines knows that
628
        self.lines_for_commit[dir_file_id] = []
629
        #print "adding dir %s" % path
630
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
631
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
632
633
0.64.34 by Ian Clatworthy
report lost branches
634
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
635
636
    def __init__(self, branch, cache_mgr, heads_by_ref, last_ref):
637
        """Create an object responsible for updating branches.
638
639
        :param heads_by_ref: a dictionary where
640
          names are git-style references like refs/heads/master;
641
          values are one item lists of commits marks.
642
        """
643
        self.branch = branch
644
        self.repo = branch.repository
645
        self.cache_mgr = cache_mgr
646
        self.heads_by_ref = heads_by_ref
647
        self.last_ref = last_ref
648
649
    def update(self):
650
        """Update the Bazaar branches and tips matching the heads.
651
652
        If the repository is shared, this routine creates branches
653
        as required. If it isn't, warnings are produced about the
654
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
655
0.64.34 by Ian Clatworthy
report lost branches
656
        :return: updated, lost_heads where
657
          updated = the list of branches updated
658
          lost_heads = a list of (bazaar-name,revision) for branches that
659
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
660
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
661
        updated = []
0.64.34 by Ian Clatworthy
report lost branches
662
        default_tip, branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
663
        self._update_branch(self.branch, default_tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
664
        updated.append(self.branch)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
665
        for br, tip in branch_tips:
666
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
667
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
668
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
669
670
    def _get_matching_branches(self):
671
        """Get the Bazaar branches.
672
0.64.34 by Ian Clatworthy
report lost branches
673
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
674
          default_tip = the last commit mark for the default branch
675
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
676
          lost_heads = a list of (bazaar-name,revision) for branches that
677
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
678
        """
0.64.34 by Ian Clatworthy
report lost branches
679
        # Until there's a good reason to be more sellective,
680
        # use the last imported revision as the tip of the default branch
681
        default_tip = self.heads_by_ref[self.last_ref][0]
682
683
        # Convert the reference names into Bazaar speak
684
        ref_names = self.heads_by_ref.keys()
685
        ref_names.remove(self.last_ref)
686
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
687
688
        # Create/track missing branches
689
        branch_tips = []
690
        lost_heads = []
691
        shared_repo = self.repo.is_shared()
692
        for name in sorted(bzr_names.keys()):
693
            ref_name = bzr_names[name]
694
            tip = self.heads_by_ref[ref_name][0]
695
            if shared_repo:
696
                # TODO: create the branch
697
                pass
698
            else:
699
                lost_head = self.cache_mgr.revision_ids[tip]
700
                lost_info = (name, lost_head)
701
                lost_heads.append(lost_info)
702
        return default_tip, branch_tips, lost_heads
703
704
    def _get_bzr_names_from_ref_names(self, ref_names):
705
        """Map reference names to Bazaar branch names."""
706
        bazaar_names = {}
707
        for ref_name in sorted(ref_names):
708
            parts = ref_name.split('/')
709
            if parts[0] == 'refs':
710
                parts.pop(0)
711
            full_name = "--".join(parts)
712
            bazaar_name = parts[-1]
713
            if bazaar_name in bazaar_names:
714
                bazaar_name = full_name
715
            bazaar_names[bazaar_name] = ref_name
716
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
717
718
    def _update_branch(self, br, last_mark):
719
        """Update a branch with last revision and tag information."""
720
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
721
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
722
        br.set_last_revision_info(revno, last_rev_id)
723
        # TODO: apply tags known in this branch
724
        #if self.tags:
725
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
726
        note("\t branch %s has %d revisions", br.nick, revno)