/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/fastimport/exporter.py

  • Committer: Breezy landing bot
  • Author(s): Jelmer Vernooij
  • Date: 2020-01-26 13:46:16 UTC
  • mfrom: (7463.3.8 git-remote-brz-refspec)
  • Revision ID: breezy.the.bot@gmail.com-20200126134616-pmrqyrpwdcqn24ud
Attempt to batch operations when fastexporting a remote bzr repository.

Merged from https://code.launchpad.net/~jelmer/brz/fastexport-perf/+merge/378079

Show diffs side-by-side

added added

removed removed

Lines of Context:
59
59
import breezy.revision
60
60
from ... import (
61
61
    builtins,
62
 
    errors as bazErrors,
 
62
    errors,
63
63
    lazy_import,
 
64
    lru_cache,
64
65
    osutils,
65
66
    progress,
66
67
    trace,
81
82
from fastimport import commands
82
83
""")
83
84
 
 
85
REVISIONS_CHUNK_SIZE = 1000
 
86
 
84
87
 
85
88
def _get_output_stream(destination):
86
89
    if destination is None or destination == '-':
188
191
        self.rewrite_tags = rewrite_tags
189
192
        self.no_tags = no_tags
190
193
        self.baseline = baseline
 
194
        self.tree_cache = lru_cache.LRUCache(max_cache=20)
191
195
        self._multi_author_api_available = hasattr(breezy.revision.Revision,
192
196
                                                   'get_apparent_authors')
193
197
        self.properties_to_exclude = ['authors', 'author']
214
218
 
215
219
    def interesting_history(self):
216
220
        if self.revision:
217
 
            rev1, rev2 = builtins._get_revision_range(self.revision,
218
 
                                                      self.branch, "fast-export")
 
221
            rev1, rev2 = builtins._get_revision_range(
 
222
                self.revision, self.branch, "fast-export")
219
223
            start_rev_id = rev1.rev_id
220
224
            end_rev_id = rev2.rev_id
221
225
        else:
230
234
        # revisions to exclude now ...
231
235
        if start_rev_id is not None:
232
236
            self.note("Calculating the revisions to exclude ...")
233
 
            self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
234
 
                                           self.branch.iter_merge_sorted_revisions(start_rev_id)])
 
237
            self.excluded_revisions = set(
 
238
                [rev_id for rev_id, _, _, _ in self.branch.iter_merge_sorted_revisions(start_rev_id)])
235
239
            if self.baseline:
236
240
                # needed so the first relative commit knows its parent
237
241
                self.excluded_revisions.remove(start_rev_id)
238
242
                view_revisions.insert(0, start_rev_id)
239
243
        return list(view_revisions)
240
244
 
 
245
    def emit_commits(self, interesting):
 
246
        if self.baseline:
 
247
            revobj = self.branch.repository.get_revision(interesting.pop(0))
 
248
            self.emit_baseline(revobj, self.ref)
 
249
        for i in range(0, len(interesting), REVISIONS_CHUNK_SIZE):
 
250
            chunk = interesting[i:i + REVISIONS_CHUNK_SIZE]
 
251
            history = dict(self.branch.repository.iter_revisions(chunk))
 
252
            trees_needed = set()
 
253
            trees = {}
 
254
            for revid in chunk:
 
255
                trees_needed.update(self.preprocess_commit(revid, history[revid], self.ref))
 
256
 
 
257
            for tree in self._get_revision_trees(trees_needed):
 
258
                trees[tree.get_revision_id()] = tree
 
259
 
 
260
            for revid in chunk:
 
261
                revobj = history[revid]
 
262
                if len(revobj.parent_ids) == 0:
 
263
                    parent = breezy.revision.NULL_REVISION
 
264
                else:
 
265
                    parent = revobj.parent_ids[0]
 
266
                self.emit_commit(revobj, self.ref, trees[parent], trees[revid])
 
267
 
241
268
    def run(self):
242
269
        # Export the data
243
270
        with self.branch.repository.lock_read():
247
274
                      self._commit_total)
248
275
            if not self.plain_format:
249
276
                self.emit_features()
250
 
            if self.baseline:
251
 
                self.emit_baseline(interesting.pop(0), self.ref)
252
 
            for revid in interesting:
253
 
                self.emit_commit(revid, self.ref)
 
277
            self.emit_commits(interesting)
254
278
            if self.branch.supports_tags() and not self.no_tags:
255
279
                self.emit_tags()
256
280
 
310
334
        try:
311
335
            if tree.kind(path) != 'directory':
312
336
                return False
313
 
        except bazErrors.NoSuchFile:
 
337
        except errors.NoSuchFile:
314
338
            self.warning("Skipping empty_dir detection - no file_id for %s" %
315
339
                         (path,))
316
340
            return False
326
350
        for feature in sorted(commands.FEATURE_NAMES):
327
351
            self.print_cmd(commands.FeatureCommand(feature))
328
352
 
329
 
    def emit_baseline(self, revid, ref):
 
353
    def emit_baseline(self, revobj, ref):
330
354
        # Emit a full source tree of the first commit's parent
331
 
        revobj = self.branch.repository.get_revision(revid)
332
355
        mark = 1
333
 
        self.revid_to_mark[revid] = mark
334
 
        file_cmds = self._get_filecommands(
335
 
            breezy.revision.NULL_REVISION, revid)
 
356
        self.revid_to_mark[revobj.revision_id] = mark
 
357
        tree_old = self.branch.repository.revision_tree(
 
358
            breezy.revision.NULL_REVISION)
 
359
        [tree_new] = list(self._get_revision_trees([revobj.revision_id]))
 
360
        file_cmds = self._get_filecommands(tree_old, tree_new)
336
361
        self.print_cmd(commands.ResetCommand(ref, None))
337
362
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
338
363
 
339
 
    def emit_commit(self, revid, ref):
 
364
    def preprocess_commit(self, revid, revobj, ref):
340
365
        if revid in self.revid_to_mark or revid in self.excluded_revisions:
341
366
            return
342
 
 
343
 
        # Get the Revision object
344
 
        try:
345
 
            revobj = self.branch.repository.get_revision(revid)
346
 
        except bazErrors.NoSuchRevision:
 
367
        if revobj is None:
347
368
            # This is a ghost revision. Mark it as not found and next!
348
369
            self.revid_to_mark[revid] = -1
349
370
            return
350
 
 
351
371
        # Get the primary parent
352
372
        # TODO: Consider the excluded revisions when deciding the parents.
353
373
        # Currently, a commit with parents that are excluded ought to be
354
374
        # triggering the ref calculation below (and it is not).
355
375
        # IGC 20090824
356
 
        ncommits = len(self.revid_to_mark)
357
 
        nparents = len(revobj.parent_ids)
358
 
        if nparents == 0:
 
376
        if len(revobj.parent_ids) == 0:
359
377
            parent = breezy.revision.NULL_REVISION
360
378
        else:
361
379
            parent = revobj.parent_ids[0]
362
380
 
 
381
        # Print the commit
 
382
        mark = len(self.revid_to_mark) + 1
 
383
        self.revid_to_mark[revobj.revision_id] = mark
 
384
 
 
385
        return [parent, revobj.revision_id]
 
386
 
 
387
    def emit_commit(self, revobj, ref, tree_old, tree_new):
363
388
        # For parentless commits we need to issue reset command first, otherwise
364
389
        # git-fast-import will assume previous commit was this one's parent
365
 
        if nparents == 0:
 
390
        if tree_old.get_revision_id() == breezy.revision.NULL_REVISION:
366
391
            self.print_cmd(commands.ResetCommand(ref, None))
367
392
 
368
 
        # Print the commit
369
 
        mark = ncommits + 1
370
 
        self.revid_to_mark[revid] = mark
371
 
        file_cmds = self._get_filecommands(parent, revid)
 
393
        file_cmds = self._get_filecommands(tree_old, tree_new)
 
394
        mark = self.revid_to_mark[revobj.revision_id]
372
395
        self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
373
396
 
374
397
        # Report progress and checkpoint if it's time for that
 
398
        ncommits = len(self.revid_to_mark)
375
399
        self.report_progress(ncommits)
376
400
        if (self.checkpoint is not None and self.checkpoint > 0 and ncommits and
377
401
                ncommits % self.checkpoint == 0):
450
474
                    pass
451
475
 
452
476
        # Build and return the result
453
 
        return commands.CommitCommand(git_ref, mark, author_info,
454
 
                                      committer_info, revobj.message.encode(
455
 
                                          "utf-8"), from_, merges, file_cmds,
456
 
                                      more_authors=more_author_info, properties=properties)
457
 
 
458
 
    def _get_revision_trees(self, parent, revision_id):
459
 
        try:
460
 
            tree_old = self.branch.repository.revision_tree(parent)
461
 
        except bazErrors.UnexpectedInventoryFormat:
462
 
            self.warning(
463
 
                "Parent is malformed - diffing against previous parent")
464
 
            # We can't find the old parent. Let's diff against his parent
465
 
            pp = self.branch.repository.get_revision(parent)
466
 
            tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
467
 
        tree_new = None
468
 
        try:
469
 
            tree_new = self.branch.repository.revision_tree(revision_id)
470
 
        except bazErrors.UnexpectedInventoryFormat:
471
 
            # We can't really do anything anymore
472
 
            self.warning("Revision %s is malformed - skipping" % revision_id)
473
 
        return tree_old, tree_new
474
 
 
475
 
    def _get_filecommands(self, parent, revision_id):
 
477
        return commands.CommitCommand(
 
478
            git_ref, mark, author_info, committer_info,
 
479
            revobj.message.encode("utf-8"), from_, merges, file_cmds,
 
480
            more_authors=more_author_info, properties=properties)
 
481
 
 
482
    def _get_revision_trees(self, revids):
 
483
        missing = []
 
484
        by_revid = {}
 
485
        for revid in revids:
 
486
            if revid == breezy.revision.NULL_REVISION:
 
487
                by_revid[revid] = self.branch.repository.revision_tree(revid)
 
488
            elif revid not in self.tree_cache:
 
489
                missing.append(revid)
 
490
 
 
491
        for tree in self.branch.repository.revision_trees(missing):
 
492
            by_revid[tree.get_revision_id()] = tree
 
493
 
 
494
        for revid in revids:
 
495
            try:
 
496
                yield self.tree_cache[revid]
 
497
            except KeyError:
 
498
                yield by_revid[revid]
 
499
 
 
500
        for revid, tree in by_revid.items():
 
501
            self.tree_cache[revid] = tree
 
502
 
 
503
    def _get_filecommands(self, tree_old, tree_new):
476
504
        """Get the list of FileCommands for the changes between two revisions."""
477
 
        tree_old, tree_new = self._get_revision_trees(parent, revision_id)
478
 
        if not(tree_old and tree_new):
479
 
            # Something is wrong with this revision - ignore the filecommands
480
 
            return
481
 
 
482
505
        changes = tree_new.changes_from(tree_old)
483
506
 
484
507
        my_modified = list(changes.modified)
486
509
        # The potential interaction between renames and deletes is messy.
487
510
        # Handle it here ...
488
511
        file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
489
 
            changes.renamed, changes.removed, revision_id, tree_old)
 
512
            changes.renamed, changes.removed, tree_new.get_revision_id(), tree_old)
490
513
 
491
514
        for cmd in file_cmds:
492
515
            yield cmd
493
516
 
494
517
        # Map kind changes to a delete followed by an add
495
518
        for change in changes.kind_changed:
496
 
            path = self._adjust_path_for_renames(path, renamed, revision_id)
 
519
            path = self._adjust_path_for_renames(
 
520
                path, renamed, tree_new.get_revision_id())
497
521
            # IGC: I don't understand why a delete is needed here.
498
522
            # In fact, it seems harmful? If you uncomment this line,
499
523
            # please file a bug explaining why you needed to.
523
547
            else:
524
548
                self.warning("cannot export '%s' of kind %s yet - ignoring" %
525
549
                             (change.path[1], change.kind[1]))
526
 
        for (path, mode), chunks in tree_new.iter_files_bytes(
527
 
                files_to_get):
 
550
 
 
551
        # TODO(jelmer): Improve performance on remote repositories
 
552
        # by using Repository.iter_files_bytes for bzr repositories here.
 
553
        for (path, mode), chunks in tree_new.iter_files_bytes(files_to_get):
528
554
            yield commands.FileModifyCommand(
529
555
                path.encode("utf-8"), mode, None, b''.join(chunks))
530
556
 
580
606
 
581
607
            # Renaming a directory implies all children must be renamed.
582
608
            # Note: changes_from() doesn't handle this
583
 
            if kind == 'directory' and tree_old.kind(change.path[0]) == 'directory':
 
609
            if change.kind == ('directory', 'directory'):
584
610
                for p, e in tree_old.iter_entries_by_dir(specific_files=[change.path[0]]):
585
611
                    if e.kind == 'directory' and self.plain_format:
586
612
                        continue