/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that supports all Bazaar repository formats."""
18
19
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
20
import re
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
21
import time
0.64.5 by Ian Clatworthy
first cut at generic processing method
22
from bzrlib import (
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
23
    delta,
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
    errors,
25
    generate_ids,
26
    inventory,
27
    lru_cache,
28
    osutils,
0.64.26 by Ian Clatworthy
more progress reporting tweaks
29
    progress,
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
    revision,
31
    revisiontree,
32
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
33
from bzrlib.trace import (
34
    note,
35
    warning,
36
    )
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
37
import bzrlib.util.configobj.configobj as configobj
0.64.5 by Ian Clatworthy
first cut at generic processing method
38
from bzrlib.plugins.fastimport import (
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
39
    helpers,
0.64.5 by Ian Clatworthy
first cut at generic processing method
40
    processor,
41
    revisionloader,
42
    )
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
43
44
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
45
# How many commits before automatically checkpointing
46
_DEFAULT_AUTO_CHECKPOINT = 10000
47
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
48
class GenericProcessor(processor.ImportProcessor):
49
    """An import processor that handles basic imports.
50
51
    Current features supported:
52
0.64.16 by Ian Clatworthy
safe processing tweaks
53
    * blobs are cached in memory
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
54
    * files and symlinks commits are supported
55
    * checkpoints automatically happen at a configurable frequency
56
      over and above the stream requested checkpoints
57
    * timestamped progress reporting, both automatic and stream requested
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
58
    * LATER: named branch support, tags for each branch
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
59
    * some basic statistics are dumped on completion.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
60
61
    Here are the supported parameters:
62
63
    * info - name of a config file holding the analysis generated
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
64
      by running the --info processor in verbose mode. When
65
      importing large repositories, this parameter is needed so
66
      that the importer knows what blobs to intelligently cache.
67
68
    * trees - update the working tree before completing.
69
      By default, the importer updates the repository
70
      and branches and the user needs to run 'bzr update' for the
71
      branches of interest afterwards. In the future, this parameter
72
      might be more flexible, e.g. take a pattern of trees to update.
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
73
74
    * checkpoint - automatically checkpoint every n commits over and
75
      above any checkpoints contained in the import stream.
76
      The default is 10000.
77
78
    * count - only import this many commits then exit. If not set,
79
      all commits are imported.
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
80
    """
81
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
82
    known_params = ['info', 'trees', 'checkpoint', 'count']
83
84
    def note(self, msg, *args):
85
        """Output a note but timestamp it."""
86
        msg = "%s %s" % (self._time_of_day(), msg)
87
        note(msg, *args)
88
89
    def warning(self, msg, *args):
90
        """Output a warning but timestamp it."""
0.64.34 by Ian Clatworthy
report lost branches
91
        msg = "%s WARNING: %s" % (self._time_of_day(), msg)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
92
        warning(msg, *args)
93
94
    def _time_of_day(self):
95
        """Time of day as a string."""
96
        # Note: this is a separate method so tests can patch in a fixed value
97
        return time.strftime("%H:%M:%S")
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
98
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
99
    def pre_process(self):
0.64.26 by Ian Clatworthy
more progress reporting tweaks
100
        self._start_time = time.time()
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
101
        self._load_info_and_params()
102
        self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
103
        self.init_stats()
104
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
105
        # Head tracking: last ref & map of commit mark to ref
106
        self.last_ref = None
107
        self.heads = {}
108
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
109
        # mapping of tag name to revision_id
110
        self.tags = {}
111
112
        # Create a write group. This is committed at the end of the import.
113
        # Checkpointing closes the current one and starts a new one.
114
        self.repo.start_write_group()
115
116
    def _load_info_and_params(self):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
117
        # Load the info file, if any
118
        info_path = self.params.get('info')
119
        if info_path is not None:
120
            self.info = configobj.ConfigObj(info_path)
121
        else:
122
            self.info = None
123
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
124
        # Decide how often to automatically checkpoint
125
        self.checkpoint_every = int(self.params.get('checkpoint',
126
            _DEFAULT_AUTO_CHECKPOINT))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
127
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
128
        # Find the maximum number of commits to import (None means all)
129
        # and prepare progress reporting. Just in case the info file
130
        # has an outdated count of commits, we store the max counts
131
        # at which we need to terminate separately to the total used
132
        # for progress tracking.
133
        try:
134
            self.max_commits = int(self.params['count'])
135
        except KeyError:
136
            self.max_commits = None
0.64.25 by Ian Clatworthy
slightly better progress reporting
137
        if self.info is not None:
138
            self.total_commits = int(self.info['Command counts']['commit'])
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
139
            if (self.max_commits is not None and
140
                self.total_commits > self.max_commits):
141
                self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
142
        else:
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
143
            self.total_commits = self.max_commits
0.64.25 by Ian Clatworthy
slightly better progress reporting
144
0.64.27 by Ian Clatworthy
1st cut at performance tuning
145
146
    def _process(self, command_iter):
147
        # if anything goes wrong, abort the write group if any
148
        try:
149
            processor.ImportProcessor._process(self, command_iter)
150
        except:
151
            if self.repo is not None and self.repo.is_in_write_group():
152
                self.repo.abort_write_group()
153
            raise
154
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
155
    def post_process(self):
0.64.27 by Ian Clatworthy
1st cut at performance tuning
156
        # Commit the current write group.
157
        self.repo.commit_write_group()
158
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
159
        # Update the branches
160
        self.note("Updating branch information ...")
0.64.34 by Ian Clatworthy
report lost branches
161
        updater = GenericBranchUpdater(self.branch, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
162
            helpers.invert_dict(self.heads), self.last_ref)
0.64.34 by Ian Clatworthy
report lost branches
163
        branches_updated, branches_lost = updater.update()
164
        self._branch_count = len(branches_updated)
165
166
        # Tell the user about branches that were not created
167
        if branches_lost:
168
            self.warning("Unshared repository - not creating branches for "
169
                "these head revisions:")
170
            for lost_info in branches_lost:
171
                head_revision = lost_info[1]
172
                branch_name = lost_info[0]
173
                note("\t %s = %s", head_revision, branch_name)
174
175
        # Update the working trees as requested and dump stats
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
176
        self._tree_count = 0
0.64.34 by Ian Clatworthy
report lost branches
177
        remind_about_update = True
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
178
        if self.params.get('trees'):
179
            if self.working_tree is None:
180
                self.warning("No working tree available to update")
181
            else:
182
                if self.verbose:
183
                    report = delta._ChangeReporter()
184
                else:
185
                    reporter = None
186
                self.note("Updating the working tree ...")
187
                self.working_tree.update(reporter)
188
                self._tree_count = 1
0.64.34 by Ian Clatworthy
report lost branches
189
                remind_about_update = False
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
190
        self.dump_stats()
0.64.34 by Ian Clatworthy
report lost branches
191
        if remind_about_update:
192
            self.note("NOTE: To refresh working trees, use 'bzr update'")
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
193
194
    def init_stats(self):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
195
        self._revision_count = 0
0.64.5 by Ian Clatworthy
first cut at generic processing method
196
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
197
    def dump_stats(self):
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
198
        time_required = progress.str_tdelta(time.time() - self._start_time)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
199
        rc = self._revision_count
200
        bc = self._branch_count
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
201
        wtc = self._tree_count
202
        self.note("Imported %d %s, updating %d %s and %d %s in %s",
0.64.32 by Ian Clatworthy
move single_plural into helpers
203
            rc, helpers.single_plural(rc, "revision", "revisions"),
204
            bc, helpers.single_plural(bc, "branch", "branches"),
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
205
            wtc, helpers.single_plural(wtc, "tree", "trees"),
206
            time_required)
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
207
0.64.5 by Ian Clatworthy
first cut at generic processing method
208
    def blob_handler(self, cmd):
209
        """Process a BlobCommand."""
210
        if cmd.mark is not None:
211
            dataref = ":%s" % (cmd.mark,)
212
        else:
213
            dataref = osutils.sha_strings(cmd.data)
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
214
        self.cache_mgr.store_blob(dataref, cmd.data)
0.64.5 by Ian Clatworthy
first cut at generic processing method
215
216
    def checkpoint_handler(self, cmd):
217
        """Process a CheckpointCommand."""
0.64.27 by Ian Clatworthy
1st cut at performance tuning
218
        # Commit the current write group and start a new one
219
        self.repo.commit_write_group()
220
        self.repo.start_write_group()
0.64.5 by Ian Clatworthy
first cut at generic processing method
221
222
    def commit_handler(self, cmd):
223
        """Process a CommitCommand."""
0.64.7 by Ian Clatworthy
start of multiple commit handling
224
        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
225
            self.verbose)
0.64.27 by Ian Clatworthy
1st cut at performance tuning
226
        handler.process()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
227
        mark = ":" + cmd.mark
228
        self.cache_mgr.revision_ids[mark] = handler.revision_id
229
230
        # Track the heads
231
        for parent in cmd.parents:
232
            try:
233
                del self.heads[parent]
234
            except KeyError:
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
235
                self.warning("didn't find parent %s while tracking heads",
236
                    parent)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
237
        self.heads[mark] = cmd.ref
238
        self.last_ref = cmd.ref
239
240
        # Report progress
0.64.27 by Ian Clatworthy
1st cut at performance tuning
241
        self._revision_count += 1
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
242
        self.report_progress("(%s)" % mark)
243
244
        # Check if we should finish up or automatically checkpoint
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
245
        if (self.max_commits is not None and
246
            self._revision_count >= self.max_commits):
247
            self.note("stopping after reaching requested count of commits")
248
            self.finished = True
249
        elif self._revision_count % self.checkpoint_every == 0:
250
            self.note("%d commits - automatic checkpoint triggered",
251
                self._revision_count)
252
            self.checkpoint_handler(None)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
253
0.64.25 by Ian Clatworthy
slightly better progress reporting
254
    def report_progress(self, details=''):
255
        # TODO: use a progress bar with ETA enabled
0.64.26 by Ian Clatworthy
more progress reporting tweaks
256
        if self.verbose or self._revision_count % 10 == 0:
257
            if self.total_commits is not None:
258
                counts = "%d/%d" % (self._revision_count, self.total_commits)
259
                eta = progress.get_eta(self._start_time, self._revision_count,
260
                    self.total_commits)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
261
                eta_str = progress.str_tdelta(eta)
262
                if eta_str.endswith('--'):
263
                    eta_str = ''
264
                else:
265
                    eta_str = '[%s] ' % eta_str
0.64.26 by Ian Clatworthy
more progress reporting tweaks
266
            else:
267
                counts = "%d" % (self._revision_count,)
268
                eta_str = ''
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
269
            self.note("%s commits processed %s%s" % (counts, eta_str, details))
0.64.25 by Ian Clatworthy
slightly better progress reporting
270
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
271
    def progress_handler(self, cmd):
272
        """Process a ProgressCommand."""
0.64.34 by Ian Clatworthy
report lost branches
273
        # We could use a progress bar here instead
0.64.28 by Ian Clatworthy
checkpoint and count params to generic processor
274
        self.note("progress %s" % (cmd.message,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
275
276
    def reset_handler(self, cmd):
277
        """Process a ResetCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
278
        if cmd.ref.startswith('refs/tags/'):
279
            self._set_tag(cmd.ref[len('refs/tags/'):], cmd.from_)
280
        else:
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
281
            self.warning("named branches are not supported yet"
0.64.16 by Ian Clatworthy
safe processing tweaks
282
                " - ignoring reset of '%s'", cmd.ref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
283
284
    def tag_handler(self, cmd):
285
        """Process a TagCommand."""
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
286
        self._set_tag(cmd.id, cmd.from_)
287
288
    def _set_tag(self, name, from_):
289
        """Define a tag given a name an import 'from' reference."""
290
        bzr_tag_name = name.decode('utf-8', 'replace')
291
        bzr_rev_id = self.cache_mgr.revision_ids[from_]
0.64.11 by Ian Clatworthy
tag support
292
        self.tags[bzr_tag_name] = bzr_rev_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
293
294
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
295
class GenericCacheManager(object):
296
    """A manager of caches for the GenericProcessor."""
297
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
298
    def __init__(self, info, verbose=False, inventory_cache_size=10):
299
        """Create a manager of caches.
300
301
        :param info: a ConfigObj holding the output from
302
            the --info processor, or None if no hints are available
303
        """
304
        self.verbose = verbose
305
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
306
        # dataref -> data. datref is either :mark or the sha-1.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
307
        # Sticky blobs aren't removed after being referenced.
308
        self._blobs = {}
309
        self._sticky_blobs = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
310
311
        # revision-id -> Inventory cache
312
        # these are large and we probably don't need too many as
313
        # most parents are recent in history
314
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
315
316
        # import-ref -> revision-id lookup table
317
        # we need to keep all of these but they are small
318
        self.revision_ids = {}
319
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
320
        # path -> file-ids - as generated
0.64.14 by Ian Clatworthy
commit of modified files working
321
        self.file_ids = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
322
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
323
        # Work out the blobs to make sticky - None means all
324
        #print "%r" % (info,)
0.64.25 by Ian Clatworthy
slightly better progress reporting
325
        self._blobs_to_keep = None
326
        if info is not None:
327
            try:
328
                self._blobs_to_keep = info['Blob usage tracking']['multi']
329
            except KeyError:
330
                # info not in file - possible when no blobs used
331
                pass
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
332
333
    def store_blob(self, id, data):
334
        """Store a blob of data."""
335
        if (self._blobs_to_keep is None or data == '' or
336
            id in self._blobs_to_keep):
337
            self._sticky_blobs[id] = data
338
            if self.verbose:
339
                print "making blob %s sticky" % (id,)
340
        else:
341
            self._blobs[id] = data
342
343
    def fetch_blob(self, id):
344
        """Fetch a blob of data."""
345
        try:
346
            return self._sticky_blobs[id]
347
        except KeyError:
348
            return self._blobs.pop(id)
349
0.64.16 by Ian Clatworthy
safe processing tweaks
350
    def _delete_path(self, path):
351
        """Remove a path from caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
352
        # we actually want to remember what file-id we gave a path,
353
        # even when that file is deleted, so doing nothing is correct
354
        pass
0.64.16 by Ian Clatworthy
safe processing tweaks
355
356
    def _rename_path(self, old_path, new_path):
357
        """Rename a path in the caches."""
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
358
        # we actually want to remember what file-id we gave a path,
359
        # even when that file is renamed, so both paths should have
360
        # the same value and we don't delete any information
0.64.16 by Ian Clatworthy
safe processing tweaks
361
        self.file_ids[new_path] = self.file_ids[old_path]
362
363
0.64.5 by Ian Clatworthy
first cut at generic processing method
364
class GenericCommitHandler(processor.CommitHandler):
365
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
366
    def __init__(self, command, repo, cache_mgr, verbose=False):
0.64.5 by Ian Clatworthy
first cut at generic processing method
367
        processor.CommitHandler.__init__(self, command)
368
        self.repo = repo
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
369
        self.cache_mgr = cache_mgr
0.64.14 by Ian Clatworthy
commit of modified files working
370
        self.verbose = verbose
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
371
        # smart loader that uses these caches
0.64.5 by Ian Clatworthy
first cut at generic processing method
372
        self.loader = revisionloader.RevisionLoader(repo,
373
            lambda revision_ids: self._get_inventories(revision_ids))
374
375
    def pre_process_files(self):
376
        """Prepare for committing."""
377
        self.revision_id = self.gen_revision_id()
378
        self.inv_delta = []
379
        # cache of texts for this commit, indexed by file-id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
380
        self.lines_for_commit = {}
0.64.5 by Ian Clatworthy
first cut at generic processing method
381
0.64.14 by Ian Clatworthy
commit of modified files working
382
        # Get the parent inventories
0.64.7 by Ian Clatworthy
start of multiple commit handling
383
        if self.command.parents:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
384
            self.parents = [self.cache_mgr.revision_ids[p]
385
                for p in self.command.parents]
0.64.7 by Ian Clatworthy
start of multiple commit handling
386
        else:
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
387
            self.parents = []
0.64.7 by Ian Clatworthy
start of multiple commit handling
388
0.64.14 by Ian Clatworthy
commit of modified files working
389
        # Seed the inventory from the previous one
390
        if len(self.parents) == 0:
391
            self.inventory = self.gen_initial_inventory()
0.64.5 by Ian Clatworthy
first cut at generic processing method
392
        else:
393
            # use the bzr_revision_id to lookup the inv cache
0.64.14 by Ian Clatworthy
commit of modified files working
394
            self.inventory = self.get_inventory(self.parents[0]).copy()
0.64.13 by Ian Clatworthy
commit of new files working
395
        if not self.repo.supports_rich_root():
396
            # In this repository, root entries have no knit or weave. When
397
            # serializing out to disk and back in, root.revision is always
398
            # the new revision_id.
0.64.14 by Ian Clatworthy
commit of modified files working
399
            self.inventory.root.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
400
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
401
        # directory-path -> inventory-entry for current inventory
402
        self.directory_entries = dict(self.inventory.directories())
403
0.64.14 by Ian Clatworthy
commit of modified files working
404
    def post_process_files(self):
405
        """Save the revision."""
406
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
407
            note("applying inventory delta ...")
0.64.14 by Ian Clatworthy
commit of modified files working
408
            for entry in self.inv_delta:
0.64.16 by Ian Clatworthy
safe processing tweaks
409
                note("  %r" % (entry,))
0.64.14 by Ian Clatworthy
commit of modified files working
410
        self.inventory.apply_delta(self.inv_delta)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
411
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.64.14 by Ian Clatworthy
commit of modified files working
412
        if self.verbose:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
413
            note("created inventory ...")
0.64.14 by Ian Clatworthy
commit of modified files working
414
            for entry in self.inventory:
0.64.16 by Ian Clatworthy
safe processing tweaks
415
                note("  %r" % (entry,))
0.64.5 by Ian Clatworthy
first cut at generic processing method
416
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
417
        # Load the revision into the repository
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
418
        rev_props = {}
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
419
        committer = self.command.committer
420
        who = "%s <%s>" % (committer[0],committer[1])
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
421
        author = self.command.author
422
        if author is not None:
423
            author_id = "%s <%s>" % (author[0],author[1])
424
            if author_id != who:
425
                rev_props['author'] = author_id
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
426
        rev = revision.Revision(
427
           timestamp=committer[2],
428
           timezone=committer[3],
429
           committer=who,
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
430
           message=self._escape_commit_message(self.command.message),
431
           revision_id=self.revision_id,
432
           properties=rev_props,
433
           parent_ids=self.parents)
0.64.14 by Ian Clatworthy
commit of modified files working
434
        self.loader.load(rev, self.inventory, None,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
435
            lambda file_id: self._get_lines(file_id))
436
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
437
    def _escape_commit_message(self, message):
438
        """Replace xml-incompatible control characters."""
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
439
        # It's crap that we need to do this at this level (but we do)
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
440
        # Code copied from bzrlib.commit.
441
        
442
        # Python strings can include characters that can't be
443
        # represented in well-formed XML; escape characters that
444
        # aren't listed in the XML specification
445
        # (http://www.w3.org/TR/REC-xml/#NT-Char).
446
        message, _ = re.subn(
447
            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
448
            lambda match: match.group(0).encode('unicode_escape'),
449
            message)
450
        return message
0.64.5 by Ian Clatworthy
first cut at generic processing method
451
452
    def modify_handler(self, filecmd):
453
        if filecmd.dataref is not None:
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
454
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.64.5 by Ian Clatworthy
first cut at generic processing method
455
        else:
456
            data = filecmd.data
457
        self._modify_inventory(filecmd.path, filecmd.kind,
458
            filecmd.is_executable, data)
459
460
    def delete_handler(self, filecmd):
461
        path = filecmd.path
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
462
        try:
463
            del self.inventory[self.bzr_file_id(path)]
464
        except errors.NoSuchId:
465
            warning("ignoring delete of %s - not in inventory" % (path,))
466
        finally:
467
            try:
468
                self.cache_mgr._delete_path(path)
469
            except KeyError:
470
                pass
0.64.5 by Ian Clatworthy
first cut at generic processing method
471
472
    def copy_handler(self, filecmd):
473
        raise NotImplementedError(self.copy_handler)
474
475
    def rename_handler(self, filecmd):
0.64.16 by Ian Clatworthy
safe processing tweaks
476
        old_path = filecmd.old_path
477
        new_path = filecmd.new_path
478
        file_id = self.bzr_file_id(old_path)
479
        ie = self.inventory[file_id]
480
        self.inv_delta.append((old_path, new_path, file_id, ie))
481
        self.cache_mgr._rename_path(old_path, new_path)
0.64.5 by Ian Clatworthy
first cut at generic processing method
482
483
    def deleteall_handler(self, filecmd):
484
        raise NotImplementedError(self.deleteall_handler)
485
0.64.16 by Ian Clatworthy
safe processing tweaks
486
    def bzr_file_id_and_new(self, path):
487
        """Get a Bazaar file identifier and new flag for a path.
488
        
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
489
        :return: file_id, is_new where
490
          is_new = True if the file_id is newly created
0.64.16 by Ian Clatworthy
safe processing tweaks
491
        """
492
        try:
493
            return self.cache_mgr.file_ids[path], False
494
        except KeyError:
495
            id = generate_ids.gen_file_id(path)
496
            self.cache_mgr.file_ids[path] = id
497
            return id, True
498
0.64.5 by Ian Clatworthy
first cut at generic processing method
499
    def bzr_file_id(self, path):
0.64.14 by Ian Clatworthy
commit of modified files working
500
        """Get a Bazaar file identifier for a path."""
0.64.16 by Ian Clatworthy
safe processing tweaks
501
        return self.bzr_file_id_and_new(path)[0]
0.64.5 by Ian Clatworthy
first cut at generic processing method
502
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
503
    def gen_initial_inventory(self):
504
        """Generate an inventory for a parentless revision."""
505
        inv = inventory.Inventory(revision_id=self.revision_id)
506
        return inv
507
0.64.5 by Ian Clatworthy
first cut at generic processing method
508
    def gen_revision_id(self):
509
        """Generate a revision id.
510
511
        Subclasses may override this to produce deterministic ids say.
512
        """
513
        committer = self.command.committer
0.64.16 by Ian Clatworthy
safe processing tweaks
514
        # Perhaps 'who' being the person running the import is ok? If so,
515
        # it might be a bit quicker and give slightly better compression?
0.64.5 by Ian Clatworthy
first cut at generic processing method
516
        who = "%s <%s>" % (committer[0],committer[1])
517
        timestamp = committer[2]
518
        return generate_ids.gen_revision_id(who, timestamp)
519
0.64.7 by Ian Clatworthy
start of multiple commit handling
520
    def get_inventory(self, revision_id):
521
        """Get the inventory for a revision id."""
522
        try:
523
            inv = self.cache_mgr.inventories[revision_id]
524
        except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
525
            print "Hmm - get_inventory cache miss for %s" % revision_id
0.64.7 by Ian Clatworthy
start of multiple commit handling
526
            # Not cached so reconstruct from repository
527
            inv = self.repo.revision_tree(revision_id).inventory
528
            self.cache_mgr.inventories[revision_id] = inv
529
        return inv
530
0.64.5 by Ian Clatworthy
first cut at generic processing method
531
    def _get_inventories(self, revision_ids):
532
        """Get the inventories for revision-ids.
533
        
534
        This is a callback used by the RepositoryLoader to
535
        speed up inventory reconstruction."""
536
        present = []
537
        inventories = []
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
538
        # If an inventory is in the cache, we assume it was
0.64.5 by Ian Clatworthy
first cut at generic processing method
539
        # successfully loaded into the repsoitory
540
        for revision_id in revision_ids:
541
            try:
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
542
                inv = self.cache_mgr.inventories[revision_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
543
                present.append(revision_id)
544
            except KeyError:
0.64.17 by Ian Clatworthy
escape commit messages, diff author to committer and cache fixes
545
                print "Hmm - get_inventories cache miss for %s" % revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
546
                # Not cached so reconstruct from repository
547
                if self.repo.has_revision(revision_id):
548
                    rev_tree = self.repo.revision_tree(revision_id)
549
                    present.append(revision_id)
550
                else:
551
                    rev_tree = self.repo.revision_tree(None)
552
                inv = rev_tree.inventory
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
553
                self.cache_mgr.inventories[revision_id] = inv
554
            inventories.append(inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
555
        return present, inventories
556
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
557
    def _get_lines(self, file_id):
558
        """Get the lines for a file-id."""
559
        return self.lines_for_commit[file_id]
0.64.5 by Ian Clatworthy
first cut at generic processing method
560
561
    def _modify_inventory(self, path, kind, is_executable, data):
562
        """Add to or change an item in the inventory."""
563
        # Create the new InventoryEntry
564
        basename, parent_ie = self._ensure_directory(path)
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
565
        file_id = self.bzr_file_id(path)
0.64.16 by Ian Clatworthy
safe processing tweaks
566
        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
567
        ie.revision = self.revision_id
0.64.5 by Ian Clatworthy
first cut at generic processing method
568
        if isinstance(ie, inventory.InventoryFile):
569
            ie.executable = is_executable
0.64.13 by Ian Clatworthy
commit of new files working
570
            lines = osutils.split_lines(data)
571
            ie.text_sha1 = osutils.sha_strings(lines)
572
            ie.text_size = sum(map(len, lines))
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
573
            self.lines_for_commit[file_id] = lines
0.64.5 by Ian Clatworthy
first cut at generic processing method
574
        elif isinstance(ie, inventory.InventoryLnk):
575
            ie.symlink_target = data
576
        else:
577
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
578
                (kind,))
579
0.64.16 by Ian Clatworthy
safe processing tweaks
580
        # Record this new inventory entry
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
581
        if file_id in self.inventory:
0.64.21 by Ian Clatworthy
fix one inventory lookup bug
582
            # HACK: no API for this (del+add does more than it needs to)
583
            self.inventory._byid[file_id] = ie
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
584
        else:
585
            self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
586
587
    def _ensure_directory(self, path):
588
        """Ensure that the containing directory exists for 'path'"""
589
        dirname, basename = osutils.split(path)
590
        if dirname == '':
591
            # the root node doesn't get updated
0.64.16 by Ian Clatworthy
safe processing tweaks
592
            return basename, self.inventory.root
0.64.5 by Ian Clatworthy
first cut at generic processing method
593
        try:
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
594
            ie = self.directory_entries[dirname]
0.64.5 by Ian Clatworthy
first cut at generic processing method
595
        except KeyError:
596
            # We will create this entry, since it doesn't exist
597
            pass
598
        else:
599
            return basename, ie
600
601
        # No directory existed, we will just create one, first, make sure
602
        # the parent exists
603
        dir_basename, parent_ie = self._ensure_directory(dirname)
604
        dir_file_id = self.bzr_file_id(dirname)
605
        ie = inventory.entry_factory['directory'](dir_file_id,
606
                                                  dir_basename,
607
                                                  parent_ie.file_id)
608
        ie.revision = self.revision_id
0.64.22 by Ian Clatworthy
fix more inventory lookup bugs
609
        self.directory_entries[dirname] = ie
0.64.16 by Ian Clatworthy
safe processing tweaks
610
        # There are no lines stored for a directory so
611
        # make sure the cache used by get_lines knows that
612
        self.lines_for_commit[dir_file_id] = []
613
        #print "adding dir %s" % path
614
        self.inventory.add(ie)
0.64.5 by Ian Clatworthy
first cut at generic processing method
615
        return basename, ie
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
616
617
0.64.34 by Ian Clatworthy
report lost branches
618
class GenericBranchUpdater(object):
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
619
620
    def __init__(self, branch, cache_mgr, heads_by_ref, last_ref):
621
        """Create an object responsible for updating branches.
622
623
        :param heads_by_ref: a dictionary where
624
          names are git-style references like refs/heads/master;
625
          values are one item lists of commits marks.
626
        """
627
        self.branch = branch
628
        self.repo = branch.repository
629
        self.cache_mgr = cache_mgr
630
        self.heads_by_ref = heads_by_ref
631
        self.last_ref = last_ref
632
633
    def update(self):
634
        """Update the Bazaar branches and tips matching the heads.
635
636
        If the repository is shared, this routine creates branches
637
        as required. If it isn't, warnings are produced about the
638
        lost of information.
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
639
0.64.34 by Ian Clatworthy
report lost branches
640
        :return: updated, lost_heads where
641
          updated = the list of branches updated
642
          lost_heads = a list of (bazaar-name,revision) for branches that
643
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
644
        """
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
645
        updated = []
0.64.34 by Ian Clatworthy
report lost branches
646
        default_tip, branch_tips, lost_heads = self._get_matching_branches()
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
647
        self._update_branch(self.branch, default_tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
648
        updated.append(self.branch)
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
649
        for br, tip in branch_tips:
650
            self._update_branch(br, tip)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
651
            updated.append(br)
0.64.34 by Ian Clatworthy
report lost branches
652
        return updated, lost_heads
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
653
654
    def _get_matching_branches(self):
655
        """Get the Bazaar branches.
656
0.64.34 by Ian Clatworthy
report lost branches
657
        :return: default_tip, branch_tips, lost_tips where
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
658
          default_tip = the last commit mark for the default branch
659
          branch_tips = a list of (branch,tip) tuples for other branches.
0.64.34 by Ian Clatworthy
report lost branches
660
          lost_heads = a list of (bazaar-name,revision) for branches that
661
            would have been created had the repository been shared
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
662
        """
0.64.34 by Ian Clatworthy
report lost branches
663
        # Until there's a good reason to be more sellective,
664
        # use the last imported revision as the tip of the default branch
665
        default_tip = self.heads_by_ref[self.last_ref][0]
666
667
        # Convert the reference names into Bazaar speak
668
        ref_names = self.heads_by_ref.keys()
669
        ref_names.remove(self.last_ref)
670
        bzr_names = self._get_bzr_names_from_ref_names(ref_names)
671
672
        # Create/track missing branches
673
        branch_tips = []
674
        lost_heads = []
675
        shared_repo = self.repo.is_shared()
676
        for name in sorted(bzr_names.keys()):
677
            ref_name = bzr_names[name]
678
            tip = self.heads_by_ref[ref_name][0]
679
            if shared_repo:
680
                # TODO: create the branch
681
                pass
682
            else:
683
                lost_head = self.cache_mgr.revision_ids[tip]
684
                lost_info = (name, lost_head)
685
                lost_heads.append(lost_info)
686
        return default_tip, branch_tips, lost_heads
687
688
    def _get_bzr_names_from_ref_names(self, ref_names):
689
        """Map reference names to Bazaar branch names."""
690
        bazaar_names = {}
691
        for ref_name in sorted(ref_names):
692
            parts = ref_name.split('/')
693
            if parts[0] == 'refs':
694
                parts.pop(0)
695
            full_name = "--".join(parts)
696
            bazaar_name = parts[-1]
697
            if bazaar_name in bazaar_names:
698
                bazaar_name = full_name
699
            bazaar_names[bazaar_name] = ref_name
700
        return bazaar_names
0.64.31 by Ian Clatworthy
fix branch updating for the single branch case
701
702
    def _update_branch(self, br, last_mark):
703
        """Update a branch with last revision and tag information."""
704
        last_rev_id = self.cache_mgr.revision_ids[last_mark]
705
        revno = len(list(self.repo.iter_reverse_revision_history(last_rev_id)))
706
        br.set_last_revision_info(revno, last_rev_id)
707
        # TODO: apply tags known in this branch
708
        #if self.tags:
709
        #    br.tags._set_tag_dict(self.tags)
0.64.33 by Ian Clatworthy
make tree updating optional and minor UI improvements
710
        note("\t branch %s has %d revisions", br.nick, revno)