/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.37 by Ian Clatworthy
create branches as required
23
    builtins,
24
    bzrdir,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
26
    errors,
27
    generate_ids,
28
    inventory,
29
    lru_cache,
30
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
31
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
32
    revision,
33
    revisiontree,
0.64.37 by Ian Clatworthy
create branches as required
34
    transport,
0.64.5 by Ian Clatworthy
first cut at generic processing method
35
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
36
from bzrlib.trace import (
37
    note,
38
    warning,
0.64.37 by Ian Clatworthy
create branches as required
39
    error,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
40
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
41
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
42
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
43
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
44
    processor,
45
    revisionloader,
46
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
48
0.64.41 by Ian Clatworthy
update multiple working trees if requested
49
# How many commits before automatically reporting progress
50
_DEFAULT_AUTO_PROGRESS = 1000
51
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
52
# How many commits before automatically checkpointing
53
_DEFAULT_AUTO_CHECKPOINT = 10000
54
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
55
# How many inventories to cache
56
_DEFAULT_INV_CACHE_SIZE = 10
57
0.64.41 by Ian Clatworthy
update multiple working trees if requested
58
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
class GenericProcessor(processor.ImportProcessor):
60
    """An import processor that handles basic imports.
61
62
    Current features supported:
63
0.64.16 by Ian Clatworthy
safe processing tweaks
64
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
65
    * files and symlinks commits are supported
66
    * checkpoints automatically happen at a configurable frequency
67
      over and above the stream requested checkpoints
68
    * timestamped progress reporting, both automatic and stream requested
0.64.41 by Ian Clatworthy
update multiple working trees if requested
69
    * LATER: reset support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
70
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
71
72
    Here are the supported parameters:
73
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
74
    * info - name of a hints file holding the analysis generated
75
      by running the fast-import-info processor in verbose mode. When
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
76
      importing large repositories, this parameter is needed so
77
      that the importer knows what blobs to intelligently cache.
78
0.64.41 by Ian Clatworthy
update multiple working trees if requested
79
    * trees - update the working trees before completing.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
80
      By default, the importer updates the repository
81
      and branches and the user needs to run 'bzr update' for the
0.64.41 by Ian Clatworthy
update multiple working trees if requested
82
      branches of interest afterwards.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
83
84
    * checkpoint - automatically checkpoint every n commits over and
85
      above any checkpoints contained in the import stream.
86
      The default is 10000.
87
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
88
    * count - only import this many commits then exit. If not set
89
      or negative, all commits are imported.
90
    
91
    * inv-cache - number of inventories to cache.
92
      If not set, the default is 10.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
93
    """
94
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
95
    known_params = ['info', 'trees', 'checkpoint', 'count', 'inv-cache']
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
96
97
    def note(self, msg, *args):
98
        """Output a note but timestamp it."""
99
        msg = "%s %s" % (self._time_of_day(), msg)
100
        note(msg, *args)
101
102
    def warning(self, msg, *args):
103
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
104
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
105
        warning(msg, *args)
106
107
    def _time_of_day(self):
108
        """Time of day as a string."""
109
        # Note: this is a separate method so tests can patch in a fixed value
110
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
111
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
112
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
113
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
114
        self._load_info_and_params()
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
115
        self.cache_mgr = GenericCacheManager(self.info, self.verbose,
116
            self.inventory_cache_size)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
117
        self.init_stats()
118
119
        # mapping of tag name to revision_id
120
        self.tags = {}
121
122
        # Create a write group. This is committed at the end of the import.
123
        # Checkpointing closes the current one and starts a new one.
124
        self.repo.start_write_group()
125
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
126
        # Turn on caching for the inventory versioned file
127
        inv_vf = self.repo.get_inventory_weave()
128
        inv_vf.enable_cache()
129
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
130
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
131
        # Load the info file, if any
132
        info_path = self.params.get('info')
133
        if info_path is not None:
134
            self.info = configobj.ConfigObj(info_path)
135
        else:
136
            self.info = None
137
0.64.41 by Ian Clatworthy
update multiple working trees if requested
138
        # Decide how often to automatically report progress
139
        # (not a parameter yet)
140
        self.progress_every = _DEFAULT_AUTO_PROGRESS
141
        if self.verbose:
142
            self.progress_every = self.progress_every / 10
143
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
144
        # Decide how often to automatically checkpoint
145
        self.checkpoint_every = int(self.params.get('checkpoint',
146
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
147
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
148
        # Decide how big to make the inventory cache
149
        self.inventory_cache_size = int(self.params.get('inv-cache',
150
            _DEFAULT_INV_CACHE_SIZE))
151
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
152
        # Find the maximum number of commits to import (None means all)
153
        # and prepare progress reporting. Just in case the info file
154
        # has an outdated count of commits, we store the max counts
155
        # at which we need to terminate separately to the total used
156
        # for progress tracking.
157
        try:
158
            self.max_commits = int(self.params['count'])
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
159
            if self.max_commits < 0:
160
                self.max_commits = None
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
161
        except KeyError:
162
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
163
        if self.info is not None:
164
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
165
            if (self.max_commits is not None and
166
                self.total_commits > self.max_commits):
167
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
168
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
169
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
170
0.64.27 by Ian Clatworthy
1st cut at performance tuning
171
172
    def _process(self, command_iter):
173
        # if anything goes wrong, abort the write group if any
174
        try:
175
            processor.ImportProcessor._process(self, command_iter)
176
        except:
177
            if self.repo is not None and self.repo.is_in_write_group():
178
                self.repo.abort_write_group()
179
            raise
180
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
181
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
182
        # Commit the current write group.
183
        self.repo.commit_write_group()
184
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
185
        # Update the branches
186
        self.note("Updating branch information ...")
0.64.37 by Ian Clatworthy
create branches as required
187
        updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
188
            helpers.invert_dict(self.cache_mgr.heads),
189
            self.cache_mgr.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
190
        branches_updated, branches_lost = updater.update()
191
        self._branch_count = len(branches_updated)
192
193
        # Tell the user about branches that were not created
194
        if branches_lost:
0.64.37 by Ian Clatworthy
create branches as required
195
            if not self.repo.is_shared():
196
                self.warning("Cannot import multiple branches into "
197
                    "an unshared repository")
198
            self.warning("Not creating branches for these head revisions:")
0.64.34 by Ian Clatworthy
report lost branches
199
            for lost_info in branches_lost:
200
                head_revision = lost_info[1]
201
                branch_name = lost_info[0]
202
                note("\t %s = %s", head_revision, branch_name)
203
204
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
205
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
206
        remind_about_update = True
0.64.38 by Ian Clatworthy
clean-up doc ready for initial release
207
        if self.params.get('trees', False):
0.64.41 by Ian Clatworthy
update multiple working trees if requested
208
            trees = self._get_working_trees(branches_updated)
209
            if trees:
210
                self.note("Updating the working trees ...")
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
211
                if self.verbose:
212
                    report = delta._ChangeReporter()
213
                else:
214
                    reporter = None
0.64.41 by Ian Clatworthy
update multiple working trees if requested
215
                for wt in trees:
216
                    wt.update(reporter)
217
                    self._tree_count += 1
0.64.34 by Ian Clatworthy
report lost branches
218
                remind_about_update = False
0.64.41 by Ian Clatworthy
update multiple working trees if requested
219
            else:
220
                self.warning("No working trees available to update")
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
221
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
222
        if remind_about_update:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
223
            self.note("To refresh the working tree for a branch, "
224
                "use 'bzr update'")
225
226
    def _get_working_trees(self, branches):
227
        """Get the working trees for branches in the repository."""
228
        result = []
229
        wt_expected = self.repo.make_working_trees()
230
        for br in branches:
231
            if br == self.branch and br is not None:
232
                wt = self.working_tree
233
            elif wt_expected:
234
                try:
235
                    wt = br.bzrdir.open_workingtree()
236
                except errors.NoWorkingTree:
237
                    self.warning("No working tree for branch %s", br)
238
                    continue
239
            else:
240
                continue
241
            result.append(wt)
242
        return result
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
243
244
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
245
        self._revision_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
246
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
247
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
248
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
249
        rc = self._revision_count
250
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
251
        wtc = self._tree_count
252
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
253
            rc, helpers.single_plural(rc, "revision", "revisions"),
254
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
255
            wtc, helpers.single_plural(wtc, "tree", "trees"),
256
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
257
0.64.5 by Ian Clatworthy
first cut at generic processing method
258
    def blob_handler(self, cmd):
259
        """Process a BlobCommand."""
260
        if cmd.mark is not None:
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
261
            dataref = cmd.id
0.64.5 by Ian Clatworthy
first cut at generic processing method
262
        else:
263
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
264
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
265
266
    def checkpoint_handler(self, cmd):
267
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
268
        # Commit the current write group and start a new one
269
        self.repo.commit_write_group()
270
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
271
272
    def commit_handler(self, cmd):
273
        """Process a CommitCommand."""
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
274
        # 'Commit' the revision
0.64.7 by Ian Clatworthy
start of multiple commit handling
275
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
276
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
277
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
278
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
279
        # Update caches
280
        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
281
        self.cache_mgr.last_ids[cmd.ref] = cmd.id
282
        self.cache_mgr.last_ref = cmd.ref
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
283
284
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
285
        self._revision_count += 1
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
286
        self.report_progress("(%s)" % cmd.id)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
287
288
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
289
        if (self.max_commits is not None and
290
            self._revision_count >= self.max_commits):
291
            self.note("stopping after reaching requested count of commits")
292
            self.finished = True
293
        elif self._revision_count % self.checkpoint_every == 0:
294
            self.note("%d commits - automatic checkpoint triggered",
295
                self._revision_count)
296
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
297
0.64.25 by Ian Clatworthy
slightly better progress reporting
298
    def report_progress(self, details=''):
299
        # TODO: use a progress bar with ETA enabled
0.64.41 by Ian Clatworthy
update multiple working trees if requested
300
        if self._revision_count % self.progress_every == 0:
0.64.26 by Ian Clatworthy
more progress reporting tweaks
301
            if self.total_commits is not None:
302
                counts = "%d/%d" % (self._revision_count, self.total_commits)
303
                eta = progress.get_eta(self._start_time, self._revision_count,
304
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
305
                eta_str = progress.str_tdelta(eta)
306
                if eta_str.endswith('--'):
307
                    eta_str = ''
308
                else:
309
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
310
            else:
311
                counts = "%d" % (self._revision_count,)
312
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
313
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
314
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
315
    def progress_handler(self, cmd):
316
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
317
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
318
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
319
320
    def reset_handler(self, cmd):
321
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
322
        if cmd.ref.startswith('refs/tags/'):
323
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
324
        else:
0.64.41 by Ian Clatworthy
update multiple working trees if requested
325
            self.warning("resets are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
326
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
327
328
    def tag_handler(self, cmd):
329
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
330
        self._set_tag(cmd.id, cmd.from_)
331
332
    def _set_tag(self, name, from_):
333
        """Define a tag given a name an import 'from' reference."""
334
        bzr_tag_name = name.decode('utf-8', 'replace')
335
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
336
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
337
338
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
339
class GenericCacheManager(object):
340
    """A manager of caches for the GenericProcessor."""
341
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
342
    def __init__(self, info, verbose=False, inventory_cache_size=10):
343
        """Create a manager of caches.
344
345
        :param info: a ConfigObj holding the output from
346
            the --info processor, or None if no hints are available
347
        """
348
        self.verbose = verbose
349
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
350
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
351
        # Sticky blobs aren't removed after being referenced.
352
        self._blobs = {}
353
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
354
355
        # revision-id -> Inventory cache
356
        # these are large and we probably don't need too many as
357
        # most parents are recent in history
358
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
359
360
        # import-ref -> revision-id lookup table
361
        # we need to keep all of these but they are small
362
        self.revision_ids = {}
363
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
364
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
365
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
366
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
367
        # Head tracking: last ref, last id per ref & map of commit mark to ref
368
        self.last_ref = None
369
        self.last_ids = {}
370
        self.heads = {}
371
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
372
        # Cache of recent serialised inventories
373
        self.inv_parent_texts = lru_cache.LRUCache(inventory_cache_size)
374
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
375
        # Work out the blobs to make sticky - None means all
376
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
377
        self._blobs_to_keep = None
378
        if info is not None:
379
            try:
380
                self._blobs_to_keep = info['Blob usage tracking']['multi']
381
            except KeyError:
382
                # info not in file - possible when no blobs used
383
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
384
385
    def store_blob(self, id, data):
386
        """Store a blob of data."""
387
        if (self._blobs_to_keep is None or data == '' or
388
            id in self._blobs_to_keep):
389
            self._sticky_blobs[id] = data
390
        else:
391
            self._blobs[id] = data
392
393
    def fetch_blob(self, id):
394
        """Fetch a blob of data."""
395
        try:
396
            return self._sticky_blobs[id]
397
        except KeyError:
398
            return self._blobs.pop(id)
399
0.64.16 by Ian Clatworthy
safe processing tweaks
400
    def _delete_path(self, path):
401
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
402
        # we actually want to remember what file-id we gave a path,
403
        # even when that file is deleted, so doing nothing is correct
404
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
405
406
    def _rename_path(self, old_path, new_path):
407
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
408
        # we actually want to remember what file-id we gave a path,
409
        # even when that file is renamed, so both paths should have
410
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
411
        self.file_ids[new_path] = self.file_ids[old_path]
412
413
0.64.5 by Ian Clatworthy
first cut at generic processing method
414
class GenericCommitHandler(processor.CommitHandler):
415
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
416
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
417
        processor.CommitHandler.__init__(self, command)
418
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
419
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
420
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
421
        # smart loader that uses these caches
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
422
        self.loader = revisionloader.ImportRevisionLoader(repo,
423
            lambda revision_ids: self._get_inventories(revision_ids),
424
            cache_mgr.inv_parent_texts)
425
        #self.loader = revisionloader.RevisionLoader(repo,
426
        #    lambda revision_ids: self._get_inventories(revision_ids))
0.64.5 by Ian Clatworthy
first cut at generic processing method
427
0.64.43 by Ian Clatworthy
verbose mode cleanup
428
    def note(self, msg, *args):
429
        """Output a note but add context."""
430
        msg = "%s (%s)" % (msg, self.command.id)
431
        note(msg, *args)
432
433
    def warning(self, msg, *args):
434
        """Output a warning but add context."""
435
        msg = "WARNING: %s (%s)" % (msg, self.command.id)
436
        warning(msg, *args)
437
0.64.5 by Ian Clatworthy
first cut at generic processing method
438
    def pre_process_files(self):
439
        """Prepare for committing."""
440
        self.revision_id = self.gen_revision_id()
441
        self.inv_delta = []
442
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
443
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
444
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
445
        # Work out the true set of parents
446
        cmd = self.command
447
        if cmd.mark is None:
448
            last_id = self.cache_mgr.last_ids.get(cmd.ref)
449
            if last_id is not None:
450
                parents = [last_id]
451
            else:
452
                parents = []
453
        else:
454
            parents = cmd.parents
455
456
        # Track the heads
457
        for parent in parents:
458
            try:
459
                del self.cache_mgr.heads[parent]
460
            except KeyError:
0.64.42 by Ian Clatworthy
removed parent not found warnings as not a problem
461
                # it's ok if the parent isn't there - another
462
                # commit may have already removed it
463
                pass
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
464
        self.cache_mgr.heads[cmd.id] = cmd.ref
465
0.64.14 by Ian Clatworthy
commit of modified files working
466
        # Get the parent inventories
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
467
        if parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
468
            self.parents = [self.cache_mgr.revision_ids[p]
0.64.36 by Ian Clatworthy
fix head tracking when unmarked commits used
469
                for p in parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
470
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
471
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
472
0.64.14 by Ian Clatworthy
commit of modified files working
473
        # Seed the inventory from the previous one
474
        if len(self.parents) == 0:
475
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
476
        else:
477
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
478
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
479
        if not self.repo.supports_rich_root():
480
            # In this repository, root entries have no knit or weave. When
481
            # serializing out to disk and back in, root.revision is always
482
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
483
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
484
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
485
        # directory-path -> inventory-entry for current inventory
486
        self.directory_entries = dict(self.inventory.directories())
487
0.64.14 by Ian Clatworthy
commit of modified files working
488
    def post_process_files(self):
489
        """Save the revision."""
490
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
491
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.5 by Ian Clatworthy
first cut at generic processing method
492
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
493
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
494
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
495
        committer = self.command.committer
496
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
497
        author = self.command.author
498
        if author is not None:
499
            author_id = "%s <%s>" % (author[0],author[1])
500
            if author_id != who:
501
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
502
        rev = revision.Revision(
503
           timestamp=committer[2],
504
           timezone=committer[3],
505
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
506
           message=self._escape_commit_message(self.command.message),
507
           revision_id=self.revision_id,
508
           properties=rev_props,
509
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
510
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
511
            lambda file_id: self._get_lines(file_id))
512
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
513
    def _escape_commit_message(self, message):
514
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
515
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
516
        # Code copied from bzrlib.commit.
517
        
518
        # Python strings can include characters that can't be
519
        # represented in well-formed XML; escape characters that
520
        # aren't listed in the XML specification
521
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
522
        message, _ = re.subn(
523
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
524
            lambda match: match.group(0).encode('unicode_escape'),
525
            message)
526
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
527
528
    def modify_handler(self, filecmd):
529
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
530
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
531
        else:
532
            data = filecmd.data
533
        self._modify_inventory(filecmd.path, filecmd.kind,
534
            filecmd.is_executable, data)
535
536
    def delete_handler(self, filecmd):
537
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
538
        try:
539
            del self.inventory[self.bzr_file_id(path)]
540
        except errors.NoSuchId:
0.64.43 by Ian Clatworthy
verbose mode cleanup
541
            self.warning("ignoring delete of %s as not in inventory", path)
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
542
        finally:
543
            try:
544
                self.cache_mgr._delete_path(path)
545
            except KeyError:
546
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
547
548
    def copy_handler(self, filecmd):
549
        raise NotImplementedError(self.copy_handler)
550
551
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
552
        old_path = filecmd.old_path
553
        new_path = filecmd.new_path
554
        file_id = self.bzr_file_id(old_path)
555
        ie = self.inventory[file_id]
556
        self.inv_delta.append((old_path, new_path, file_id, ie))
557
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
558
559
    def deleteall_handler(self, filecmd):
560
        raise NotImplementedError(self.deleteall_handler)
561
0.64.16 by Ian Clatworthy
safe processing tweaks
562
    def bzr_file_id_and_new(self, path):
563
        """Get a Bazaar file identifier and new flag for a path.
564
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
565
        :return: file_id, is_new where
566
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
567
        """
568
        try:
569
            return self.cache_mgr.file_ids[path], False
570
        except KeyError:
571
            id = generate_ids.gen_file_id(path)
572
            self.cache_mgr.file_ids[path] = id
573
            return id, True
574
0.64.5 by Ian Clatworthy
first cut at generic processing method
575
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
576
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
577
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
578
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
579
    def gen_initial_inventory(self):
580
        """Generate an inventory for a parentless revision."""
581
        inv = inventory.Inventory(revision_id=self.revision_id)
582
        return inv
583
0.64.5 by Ian Clatworthy
first cut at generic processing method
584
    def gen_revision_id(self):
585
        """Generate a revision id.
586
587
        Subclasses may override this to produce deterministic ids say.
588
        """
589
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
590
        # Perhaps 'who' being the person running the import is ok? If so,
591
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
592
        who = "%s <%s>" % (committer[0],committer[1])
593
        timestamp = committer[2]
594
        return generate_ids.gen_revision_id(who, timestamp)
595
0.64.7 by Ian Clatworthy
start of multiple commit handling
596
    def get_inventory(self, revision_id):
597
        """Get the inventory for a revision id."""
598
        try:
599
            inv = self.cache_mgr.inventories[revision_id]
600
        except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
601
            if self.verbose:
602
                self.note("get_inventory cache miss for %s", revision_id)
0.64.7 by Ian Clatworthy
start of multiple commit handling
603
            # Not cached so reconstruct from repository
604
            inv = self.repo.revision_tree(revision_id).inventory
605
            self.cache_mgr.inventories[revision_id] = inv
606
        return inv
607
0.64.5 by Ian Clatworthy
first cut at generic processing method
608
    def _get_inventories(self, revision_ids):
609
        """Get the inventories for revision-ids.
610
        
611
        This is a callback used by the RepositoryLoader to
612
        speed up inventory reconstruction."""
613
        present = []
614
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
615
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
616
        # successfully loaded into the repsoitory
617
        for revision_id in revision_ids:
618
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
619
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
620
                present.append(revision_id)
621
            except KeyError:
0.64.43 by Ian Clatworthy
verbose mode cleanup
622
                if self.verbose:
623
                    self.note("get_inventories cache miss for %s", revision_id)
0.64.5 by Ian Clatworthy
first cut at generic processing method
624
                # Not cached so reconstruct from repository
625
                if self.repo.has_revision(revision_id):
626
                    rev_tree = self.repo.revision_tree(revision_id)
627
                    present.append(revision_id)
628
                else:
629
                    rev_tree = self.repo.revision_tree(None)
630
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
631
                self.cache_mgr.inventories[revision_id] = inv
632
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
633
        return present, inventories
634
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
635
    def _get_lines(self, file_id):
636
        """Get the lines for a file-id."""
637
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
638
639
    def _modify_inventory(self, path, kind, is_executable, data):
640
        """Add to or change an item in the inventory."""
641
        # Create the new InventoryEntry
642
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
643
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
644
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
645
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
646
        if isinstance(ie, inventory.InventoryFile):
647
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
648
            lines = osutils.split_lines(data)
649
            ie.text_sha1 = osutils.sha_strings(lines)
650
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
651
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
652
        elif isinstance(ie, inventory.InventoryLnk):
653
            ie.symlink_target = data
654
        else:
655
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
656
                (kind,))
657
0.64.16 by Ian Clatworthy
safe processing tweaks
658
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
659
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
660
            # HACK: no API for this (del+add does more than it needs to)
661
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
662
        else:
663
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
664
665
    def _ensure_directory(self, path):
666
        """Ensure that the containing directory exists for 'path'"""
667
        dirname, basename = osutils.split(path)
668
        if dirname == '':
669
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
670
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
671
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
672
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
673
        except KeyError:
674
            # We will create this entry, since it doesn't exist
675
            pass
676
        else:
677
            return basename, ie
678
679
        # No directory existed, we will just create one, first, make sure
680
        # the parent exists
681
        dir_basename, parent_ie = self._ensure_directory(dirname)
682
        dir_file_id = self.bzr_file_id(dirname)
683
        ie = inventory.entry_factory['directory'](dir_file_id,
684
                                                  dir_basename,
685
                                                  parent_ie.file_id)
686
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
687
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
688
        # There are no lines stored for a directory so
689
        # make sure the cache used by get_lines knows that
690
        self.lines_for_commit[dir_file_id] = []
691
        #print "adding dir %s" % path
692
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
693
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
694
695
0.64.34 by Ian Clatworthy
report lost branches
696
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
697
0.64.37 by Ian Clatworthy
create branches as required
698
    def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
699
        """Create an object responsible for updating branches.
700
701
        :param heads_by_ref: a dictionary where
702
          names are git-style references like refs/heads/master;
703
          values are one item lists of commits marks.
704
        """
0.64.37 by Ian Clatworthy
create branches as required
705
        self.repo = repo
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
706
        self.branch = branch
707
        self.cache_mgr = cache_mgr
708
        self.heads_by_ref = heads_by_ref
709
        self.last_ref = last_ref
710
711
    def update(self):
712
        """Update the Bazaar branches and tips matching the heads.
713
714
        If the repository is shared, this routine creates branches
715
        as required. If it isn't, warnings are produced about the
716
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
717
0.64.34 by Ian Clatworthy
report lost branches
718
        :return: updated, lost_heads where
719
          updated = the list of branches updated
720
          lost_heads = a list of (bazaar-name,revision) for branches that
721
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
722
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
723
        updated = []
0.64.37 by Ian Clatworthy
create branches as required
724
        branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
725
        for br, tip in branch_tips:
726
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
727
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
728
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
729
730
    def _get_matching_branches(self):
731
        """Get the Bazaar branches.
732
0.64.34 by Ian Clatworthy
report lost branches
733
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
734
          default_tip = the last commit mark for the default branch
735
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
736
          lost_heads = a list of (bazaar-name,revision) for branches that
0.64.37 by Ian Clatworthy
create branches as required
737
            would have been created had the repository been shared and
738
            everything succeeded
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
739
        """
0.64.37 by Ian Clatworthy
create branches as required
740
        branch_tips = []
741
        lost_heads = []
742
        ref_names = self.heads_by_ref.keys()
743
        if self.branch is not None:
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
744
            trunk = self.select_trunk(ref_names)
745
            default_tip = self.heads_by_ref[trunk][0]
0.64.37 by Ian Clatworthy
create branches as required
746
            branch_tips.append((self.branch, default_tip))
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
747
            ref_names.remove(trunk)
0.64.34 by Ian Clatworthy
report lost branches
748
749
        # Convert the reference names into Bazaar speak
750
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
751
0.64.37 by Ian Clatworthy
create branches as required
752
        # Policy for locating branches
753
        def dir_under_current(name, ref_name):
754
            # Using the Bazaar name, get a directory under the current one
755
            return name
756
        def dir_sister_branch(name, ref_name):
757
            # Using the Bazaar name, get a sister directory to the branch
758
            return osutils.pathjoin(self.branch.base, "..", name)
759
        if self.branch is not None:
760
            dir_policy = dir_sister_branch
761
        else:
762
            dir_policy = dir_under_current
763
0.64.34 by Ian Clatworthy
report lost branches
764
        # Create/track missing branches
765
        shared_repo = self.repo.is_shared()
766
        for name in sorted(bzr_names.keys()):
767
            ref_name = bzr_names[name]
768
            tip = self.heads_by_ref[ref_name][0]
769
            if shared_repo:
0.64.37 by Ian Clatworthy
create branches as required
770
                location = dir_policy(name, ref_name)
771
                try:
772
                    br = self.make_branch(location)
773
                    branch_tips.append((br,tip))
774
                    continue
775
                except errors.BzrError, ex:
776
                    error("ERROR: failed to create branch %s: %s",
777
                        location, ex)
778
            lost_head = self.cache_mgr.revision_ids[tip]
779
            lost_info = (name, lost_head)
780
            lost_heads.append(lost_info)
781
        return branch_tips, lost_heads
782
0.64.40 by Ian Clatworthy
always use heads/master as the trunk if it is present
783
    def select_trunk(self, ref_names):
784
        """Given a set of ref names, choose one as the trunk."""
785
        for candidate in ['refs/heads/master']:
786
            if candidate in ref_names:
787
                return candidate
788
        # Use the last reference in the import stream
789
        return self.last_ref
790
0.64.37 by Ian Clatworthy
create branches as required
791
    def make_branch(self, location):
792
        """Create a branch in the repository."""
793
        return bzrdir.BzrDir.create_branch_convenience(location)
0.64.34 by Ian Clatworthy
report lost branches
794
795
    def _get_bzr_names_from_ref_names(self, ref_names):
0.64.37 by Ian Clatworthy
create branches as required
796
        """Generate Bazaar branch names from import ref names.
797
        
798
        :return: a dictionary with Bazaar names as keys and
799
          the original reference names as values.
800
        """
0.64.34 by Ian Clatworthy
report lost branches
801
        bazaar_names = {}
802
        for ref_name in sorted(ref_names):
803
            parts = ref_name.split('/')
804
            if parts[0] == 'refs':
805
                parts.pop(0)
806
            full_name = "--".join(parts)
807
            bazaar_name = parts[-1]
808
            if bazaar_name in bazaar_names:
809
                bazaar_name = full_name
810
            bazaar_names[bazaar_name] = ref_name
811
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
812
813
    def _update_branch(self, br, last_mark):
814
        """Update a branch with last revision and tag information."""
815
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
816
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
817
        br.set_last_revision_info(revno, last_rev_id)
818
        # TODO: apply tags known in this branch
819
        #if self.tags:
820
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
821
        note("\t branch %s has %d revisions", br.nick, revno)