/brz/remove-bazaar : revision 7027.2.3

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/fastimport/revision_store.py

Committer: Jelmer Vernooĳ
Date: 2018-07-04 00:06:43 UTC
mto: (7027.4.10 python3-blackbox)
mto: This revision was merged to the branch mainline in revision 7029.
Revision ID: jelmer@jelmer.uk-20180704000643-04xq2wy9y64pg00j

Drop old RevisionStore implementation.

files modified:
breezy/plugins/fastimport/processors/generic_processor.py

breezy/plugins/fastimport/revision_store.py

Show diffs side-by-side

added added

removed removed

breezy/plugins/fastimport/revision_store.py

165

yield change

166

167

168

class AbstractRevisionStore(object):

168

class RevisionStore(object):

169

170

def __init__(self, repo):

171

"""An object responsible for loading revisions into a repository.

450

entries = iter([ie for path, ie in path_entries])

451

return entries

452

453

def _load_texts(self, revision_id, entries, text_provider,

454

parents_provider):

455

"""Load texts to a repository for inventory entries.

456

457

This method is provided for subclasses to use or override.

458

459

:param revision_id: the revision identifier

460

:param entries: iterator over the inventory entries

461

:param text_provider: a callable expecting a file_id parameter

462

that returns the text for that file-id

463

:param parents_provider: a callable expecting a file_id parameter

464

that return the list of parent-ids for that file-id

465

"""

466

raise NotImplementedError(self._load_texts)

467

468

453

def _add_inventory(self, revision_id, inv, parents, parent_invs):

469

454

"""Add the inventory inv to the repository as revision_id.

470

455

537

522

inventories.append(rev_tree.root_inventory)

538

523

return present, inventories

539

524

540

541

class RevisionStore1(AbstractRevisionStore):

542

"""A RevisionStore that uses the old breezy Repository API.

543

544

The old API was present until bzr.dev rev 3510.

545

"""

546

547

def _load_texts(self, revision_id, entries, text_provider, parents_provider):

548

"""See RevisionStore._load_texts()."""

549

# Add the texts that are not already present

550

tx = self.repo.get_transaction()

551

for ie in entries:

552

# This test is *really* slow: over 50% of import time

553

#w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)

554

#if ie.revision in w:

555

# continue

556

# Try another way, realising that this assumes that the

557

# version is not already there. In the general case,

558

# a shared repository might already have the revision but

559

# we arguably don't need that check when importing from

560

# a foreign system.

561

if ie.revision != revision_id:

562

continue

563

file_id = ie.file_id

564

text_parents = [(file_id, p) for p in parents_provider(file_id)]

565

lines = text_provider(file_id)

566

vfile = self.repo.weave_store.get_weave_or_empty(file_id, tx)

567

import pdb; pdb.set_trace()

568

vfile.add_lines(revision_id, text_parents, lines)

569

570

def get_file_lines(self, revision_id, file_id):

571

tx = self.repo.get_transaction()

572

w = self.repo.weave_store.get_weave(file_id, tx)

573

return w.get_lines(revision_id)

574

575

def _add_revision(self, rev, inv):

576

# There's no need to do everything repo.add_revision does and

577

# doing so (since bzr.dev 3392) can be pretty slow for long

578

# delta chains on inventories. Just do the essentials here ...

579

_mod_revision.check_not_reserved_id(rev.revision_id)

580

self.repo._revision_store.add_revision(rev, self.repo.get_transaction())

581

582

583

class RevisionStore2(AbstractRevisionStore):

584

"""A RevisionStore that uses the new breezy Repository API."""

585

586

def _load_texts(self, revision_id, entries, text_provider, parents_provider):

587

"""See RevisionStore._load_texts()."""

525

def _load_texts(self, revision_id, entries, text_provider, parents_provider):

526

"""Load texts to a repository for inventory entries.

527

528

This method is provided for subclasses to use or override.

529

530

:param revision_id: the revision identifier

531

:param entries: iterator over the inventory entries

532

:param text_provider: a callable expecting a file_id parameter

533

that returns the text for that file-id

534

:param parents_provider: a callable expecting a file_id parameter

535

that return the list of parent-ids for that file-id

536

"""

588

537

text_keys = {}

589

538

for ie in entries:

590

539

text_keys[(ie.file_id, ie.revision)] = ie

591

540

text_parent_map = self.repo.texts.get_parent_map(text_keys)

592

541

missing_texts = set(text_keys) - set(text_parent_map)

593

self._load_texts_for_file_rev_ids(missing_texts, text_provider,

594

parents_provider)

595

596

def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider,

597

parents_provider):

598

"""Load texts to a repository for file-ids, revision-id tuples.

599

600

:param file_rev_ids: iterator over the (file_id, revision_id) tuples

601

:param text_provider: a callable expecting a file_id parameter

602

that returns the text for that file-id

603

:param parents_provider: a callable expecting a file_id parameter

604

that return the list of parent-ids for that file-id

605

"""

606

for file_id, revision_id in file_rev_ids:

542

for file_id, revision_id in missing_texts:

607

543

text_key = (file_id, revision_id)

608

544

text_parents = [(file_id, p) for p in parents_provider(file_id)]

609

545

lines = text_provider(file_id)

624

560

# # delta chains on inventories. Just do the essentials here ...

625

561

# _mod_revision.check_not_reserved_id(rev.revision_id)

626

562

# self.repo._add_revision(rev)

627

628

629

class ImportRevisionStore1(RevisionStore1):

630

"""A RevisionStore (old Repository API) optimised for importing.

631

632

This implementation caches serialised inventory texts and provides

633

fine-grained control over when inventories are stored as fulltexts.

634

"""

635

636

def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,

637

random_ids=True):

638

"""See AbstractRevisionStore.__init__.

639

640

:param repository: the target repository

641

:param parent_text_to_cache: the number of parent texts to cache

642

:para fulltext_when: if non None, a function to call to decide

643

whether to fulltext the inventory or not. The revision count

644

is passed as a parameter and the result is treated as a boolean.

645

"""

646

RevisionStore1.__init__(self, repo)

647

self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)

648

self.fulltext_when = fulltext_when

649

self.random_ids = random_ids

650

self.revision_count = 0

651

652

def _add_inventory(self, revision_id, inv, parents, parent_invs):

653

"""See RevisionStore._add_inventory."""

654

# Code taken from breezy.repository.add_inventory

655

assert self.repo.is_in_write_group()

656

_mod_revision.check_not_reserved_id(revision_id)

657

assert inv.revision_id is None or inv.revision_id == revision_id, \

658

"Mismatch between inventory revision" \

659

" id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)

660

assert inv.root is not None

661

inv_lines = self.repo._serialise_inventory_to_lines(inv)

662

inv_vf = self.repo.get_inventory_weave()

663

sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,

664

revision_id, parents, inv_lines, self.inv_parent_texts)

665

self.inv_parent_texts[revision_id] = parent_text

666

return sha1

667

668

def _inventory_add_lines(self, inv_vf, version_id, parents, lines,

669

parent_texts):

670

"""See Repository._inventory_add_lines()."""

671

# setup parameters used in original code but not this API

672

self.revision_count += 1

673

if self.fulltext_when is not None:

674

delta = not self.fulltext_when(self.revision_count)

675

else:

676

delta = inv_vf.delta

677

left_matching_blocks = None

678

random_id = self.random_ids

679

check_content = False

680

681

# breezy.knit.add_lines() but error checking optimised

682

inv_vf._check_add(version_id, lines, random_id, check_content)

683

684

####################################################################

685

# breezy.knit._add() but skip checking if fulltext better than delta

686

####################################################################

687

688

line_bytes = b''.join(lines)

689

digest = osutils.sha_string(line_bytes)

690

present_parents = []

691

for parent in parents:

692

if inv_vf.has_version(parent):

693

present_parents.append(parent)

694

if parent_texts is None:

695

parent_texts = {}

696

697

# can only compress against the left most present parent.

698

if (delta and

699

(len(present_parents) == 0 or

700

present_parents[0] != parents[0])):

701

delta = False

702

703

text_length = len(line_bytes)

704

options = []

705

if lines:

706

if not lines[-1].endswith(b'\n'):

707

# copy the contents of lines.

708

lines = lines[:]

709

options.append(b'no-eol')

710

lines[-1] = lines[-1] + b'\n'

711

line_bytes += b'\n'

712

713

#if delta:

714

# # To speed the extract of texts the delta chain is limited

715

# # to a fixed number of deltas. This should minimize both

716

# # I/O and the time spend applying deltas.

717

# delta = inv_vf._check_should_delta(present_parents)

718

719

assert isinstance(version_id, str)

720

content = inv_vf.factory.make(lines, version_id)

721

if delta or (inv_vf.factory.annotated and len(present_parents) > 0):

722

# Merge annotations from parent texts if needed.

723

delta_hunks = inv_vf._merge_annotations(content, present_parents,

724

parent_texts, delta, inv_vf.factory.annotated,

725

left_matching_blocks)

726

727

if delta:

728

options.append(b'line-delta')

729

store_lines = inv_vf.factory.lower_line_delta(delta_hunks)

730

size, bytes = inv_vf._data._record_to_data(version_id, digest,

731

store_lines)

732

else:

733

options.append(b'fulltext')

734

# isinstance is slower and we have no hierarchy.

735

if inv_vf.factory.__class__ == knit.KnitPlainFactory:

736

# Use the already joined bytes saving iteration time in

737

# _record_to_data.

738

size, bytes = inv_vf._data._record_to_data(version_id, digest,

739

lines, [line_bytes])

740

else:

741

# get mixed annotation + content and feed it into the

742

# serialiser.

743

store_lines = inv_vf.factory.lower_fulltext(content)

744

size, bytes = inv_vf._data._record_to_data(version_id, digest,

745

store_lines)

746

747

access_memo = inv_vf._data.add_raw_records([size], bytes)[0]

748

inv_vf._index.add_versions(

749

((version_id, options, access_memo, parents),),

750

random_id=random_id)

751

return digest, text_length, content

Older »