/brz/remove-bazaar : revision 2955.5.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

Committer: Vincent Ladeuil
Date: 2007-11-14 08:20:26 UTC
mfrom: (2974 +trunk)
mto: (2990.1.1 trunk)
mto: This revision was merged to the branch mainline in revision 2991.
Revision ID: v.ladeuil+lp@free.fr-20071114082026-4d27f52n5r0t82rw

merge bzr.dev

files added:
bzrlib/tests/blackbox/test_check.py

doc/en/user-guide/authentication_conf.txt

files modified:
Makefile

NEWS

bzrlib/__init__.py

bzrlib/builtins.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/debug.py

bzrlib/dirstate.py

bzrlib/export/__init__.py

bzrlib/help_topics.py

bzrlib/info.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/smtp_connection.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/memory.py

bzrlib/transport/ssh.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/authentication-ring.txt

doc/developers/knitpack.txt

doc/en/user-guide/configuration.txt

tools/win32/bzr-win32-bdist-postinstall.py

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/pack_repo.py

475

self.knit_access.set_writer(None, None, (None, None))

476

477

478

class RepositoryPackCollection(object):

479

"""Management of packs within a repository."""

480

481

def __init__(self, repo, transport, index_transport, upload_transport,

482

pack_transport):

483

"""Create a new RepositoryPackCollection.

484

485

:param transport: Addresses the repository base directory

486

(typically .bzr/repository/).

487

:param index_transport: Addresses the directory containing indices.

488

:param upload_transport: Addresses the directory into which packs are written

489

while they're being created.

490

:param pack_transport: Addresses the directory of existing complete packs.

491

"""

492

self.repo = repo

493

self.transport = transport

494

self._index_transport = index_transport

495

self._upload_transport = upload_transport

496

self._pack_transport = pack_transport

497

self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}

498

self.packs = []

499

# name:Pack mapping

500

self._packs_by_name = {}

501

# the previous pack-names content

502

self._packs_at_load = None

503

# when a pack is being created by this object, the state of that pack.

504

self._new_pack = None

505

# aggregated revision index data

506

self.revision_index = AggregateIndex()

507

self.inventory_index = AggregateIndex()

508

self.text_index = AggregateIndex()

509

self.signature_index = AggregateIndex()

510

511

def add_pack_to_memory(self, pack):

512

"""Make a Pack object available to the repository to satisfy queries.

513

514

:param pack: A Pack object.

515

"""

516

assert pack.name not in self._packs_by_name

517

self.packs.append(pack)

518

self._packs_by_name[pack.name] = pack

519

self.revision_index.add_index(pack.revision_index, pack)

520

self.inventory_index.add_index(pack.inventory_index, pack)

521

self.text_index.add_index(pack.text_index, pack)

522

self.signature_index.add_index(pack.signature_index, pack)

523

524

def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,

525

nostore_sha, random_revid):

526

file_id_index = GraphIndexPrefixAdapter(

527

self.text_index.combined_index,

528

(file_id, ), 1,

529

add_nodes_callback=self.text_index.add_callback)

530

self.repo._text_knit._index._graph_index = file_id_index

531

self.repo._text_knit._index._add_callback = file_id_index.add_nodes

532

return self.repo._text_knit.add_lines_with_ghosts(

533

revision_id, parents, new_lines, nostore_sha=nostore_sha,

534

random_id=random_revid, check_content=False)[0:2]

535

536

def all_packs(self):

537

"""Return a list of all the Pack objects this repository has.

538

539

Note that an in-progress pack being created is not returned.

540

541

:return: A list of Pack objects for all the packs in the repository.

542

"""

543

result = []

544

for name in self.names():

545

result.append(self.get_pack_by_name(name))

546

return result

547

548

def autopack(self):

549

"""Pack the pack collection incrementally.

550

551

This will not attempt global reorganisation or recompression,

552

rather it will just ensure that the total number of packs does

553

not grow without bound. It uses the _max_pack_count method to

554

determine if autopacking is needed, and the pack_distribution

555

method to determine the number of revisions in each pack.

556

557

If autopacking takes place then the packs name collection will have

558

been flushed to disk - packing requires updating the name collection

559

in synchronisation with certain steps. Otherwise the names collection

560

is not flushed.

561

562

:return: True if packing took place.

563

"""

564

# XXX: Should not be needed when the management of indices is sane.

565

total_revisions = self.revision_index.combined_index.key_count()

566

total_packs = len(self._names)

567

if self._max_pack_count(total_revisions) >= total_packs:

568

return False

569

# XXX: the following may want to be a class, to pack with a given

570

# policy.

571

mutter('Auto-packing repository %s, which has %d pack files, '

572

'containing %d revisions into %d packs.', self, total_packs,

573

total_revisions, self._max_pack_count(total_revisions))

574

# determine which packs need changing

575

pack_distribution = self.pack_distribution(total_revisions)

576

existing_packs = []

577

for pack in self.all_packs():

578

revision_count = pack.get_revision_count()

579

if revision_count == 0:

580

# revision less packs are not generated by normal operation,

581

# only by operations like sign-my-commits, and thus will not

582

# tend to grow rapdily or without bound like commit containing

583

# packs do - leave them alone as packing them really should

584

# group their data with the relevant commit, and that may

585

# involve rewriting ancient history - which autopack tries to

586

# avoid. Alternatively we could not group the data but treat

587

# each of these as having a single revision, and thus add

588

# one revision for each to the total revision count, to get

589

# a matching distribution.

590

continue

591

existing_packs.append((revision_count, pack))

592

pack_operations = self.plan_autopack_combinations(

593

existing_packs, pack_distribution)

594

self._execute_pack_operations(pack_operations)

595

return True

596

597

def create_pack_from_packs(self, packs, suffix, revision_ids=None):

478

class Packer(object):

479

"""Create a pack from packs."""

480

481

def __init__(self, pack_collection, packs, suffix, revision_ids=None):

482

"""Create a Packer.

483

484

:param pack_collection: A RepositoryPackCollection object where the

485

new pack is being written to.

486

:param packs: The packs to combine.

487

:param suffix: The suffix to use on the temporary files for the pack.

488

:param revision_ids: Revision ids to limit the pack to.

489

"""

490

self.packs = packs

491

self.suffix = suffix

492

self.revision_ids = revision_ids

493

self._pack_collection = pack_collection

494

495

def pack(self, pb=None):

598

496

"""Create a new pack by reading data from other packs.

599

497

600

498

This does little more than a bulk copy of data. One key difference

604

502

source packs are not altered and are not required to be in the current

605

503

pack collection.

606

504

607

:param packs: An iterable of Packs to combine.

608

:param revision_ids: Either None, to copy all data, or a list

609

of revision_ids to limit the copied data to the data they

610

introduced.

505

:param pb: An optional progress bar to use. A nested bar is created if

506

this is None.

611

507

:return: A Pack object, or None if nothing was copied.

612

508

"""

613

509

# open a pack - using the same name as the last temporary file

614

510

# - which has already been flushed, so its safe.

615

511

# XXX: - duplicate code warning with start_write_group; fix before

616

512

# considering 'done'.

617

if self._new_pack is not None:

513

if self._pack_collection._new_pack is not None:

618

514

raise errors.BzrError('call to create_pack_from_packs while '

619

515

'another pack is being written.')

620

if revision_ids is not None:

621

if len(revision_ids) == 0:

516

if self.revision_ids is not None:

517

if len(self.revision_ids) == 0:

622

518

# silly fetch request.

623

519

return None

624

520

else:

625

revision_ids = frozenset(revision_ids)

626

pb = ui.ui_factory.nested_progress_bar()

521

self.revision_ids = frozenset(self.revision_ids)

522

if pb is None:

523

self.pb = ui.ui_factory.nested_progress_bar()

524

else:

525

self.pb = pb

627

526

try:

628

return self._create_pack_from_packs(packs, suffix, revision_ids,

629

pb)

527

return self._create_pack_from_packs()

630

528

finally:

631

pb.finished()

632

633

def _create_pack_from_packs(self, packs, suffix, revision_ids, pb):

634

pb.update("Opening pack", 0, 5)

635

new_pack = NewPack(self._upload_transport, self._index_transport,

636

self._pack_transport, upload_suffix=suffix)

529

if pb is None:

530

self.pb.finished()

531

532

def open_pack(self):

533

"""Open a pack for the pack we are creating."""

534

return NewPack(self._pack_collection._upload_transport,

535

self._pack_collection._index_transport,

536

self._pack_collection._pack_transport, upload_suffix=self.suffix)

537

538

def _create_pack_from_packs(self):

539

self.pb.update("Opening pack", 0, 5)

540

new_pack = self.open_pack()

637

541

# buffer data - we won't be reading-back during the pack creation and

638

542

# this makes a significant difference on sftp pushes.

639

543

new_pack.set_write_cache_size(1024*1024)

640

544

if 'pack' in debug.debug_flags:

641

545

plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)

642

for a_pack in packs]

643

if revision_ids is not None:

644

rev_count = len(revision_ids)

546

for a_pack in self.packs]

547

if self.revision_ids is not None:

548

rev_count = len(self.revision_ids)

645

549

else:

646

550

rev_count = 'all'

647

551

mutter('%s: create_pack: creating pack from source packs: '

648

552

'%s%s %s revisions wanted %s t=0',

649

time.ctime(), self._upload_transport.base, new_pack.random_name,

553

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

650

554

plain_pack_list, rev_count)

651

555

# select revisions

652

if revision_ids:

653

revision_keys = [(revision_id,) for revision_id in revision_ids]

556

if self.revision_ids:

557

revision_keys = [(revision_id,) for revision_id in self.revision_ids]

654

558

else:

655

559

revision_keys = None

656

560

657

561

# select revision keys

658

revision_index_map = self._packs_list_to_pack_map_and_index_list(

659

packs, 'revision_index')[0]

660

revision_nodes = self._index_contents(revision_index_map, revision_keys)

562

revision_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

563

self.packs, 'revision_index')[0]

564

revision_nodes = self._pack_collection._index_contents(revision_index_map, revision_keys)

661

565

# copy revision keys and adjust values

662

pb.update("Copying revision texts", 1)

566

self.pb.update("Copying revision texts", 1)

663

567

list(self._copy_nodes_graph(revision_nodes, revision_index_map,

664

568

new_pack._writer, new_pack.revision_index))

665

569

if 'pack' in debug.debug_flags:

666

570

mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',

667

time.ctime(), self._upload_transport.base, new_pack.random_name,

571

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

668

572

new_pack.revision_index.key_count(),

669

573

time.time() - new_pack.start_time)

670

574

# select inventory keys

672

576

# querying for keys here could introduce a bug where an inventory item

673

577

# is missed, so do not change it to query separately without cross

674

578

# checking like the text key check below.

675

inventory_index_map = self._packs_list_to_pack_map_and_index_list(

676

packs, 'inventory_index')[0]

677

inv_nodes = self._index_contents(inventory_index_map, inv_keys)

579

inventory_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

580

self.packs, 'inventory_index')[0]

581

inv_nodes = self._pack_collection._index_contents(inventory_index_map, inv_keys)

678

582

# copy inventory keys and adjust values

679

583

# XXX: Should be a helper function to allow different inv representation

680

584

# at this point.

681

pb.update("Copying inventory texts", 2)

585

self.pb.update("Copying inventory texts", 2)

682

586

inv_lines = self._copy_nodes_graph(inv_nodes, inventory_index_map,

683

587

new_pack._writer, new_pack.inventory_index, output_lines=True)

684

if revision_ids:

685

fileid_revisions = self.repo._find_file_ids_from_xml_inventory_lines(

686

inv_lines, revision_ids)

588

if self.revision_ids:

589

fileid_revisions = self._pack_collection.repo._find_file_ids_from_xml_inventory_lines(

590

inv_lines, self.revision_ids)

687

591

text_filter = []

688

592

for fileid, file_revids in fileid_revisions.iteritems():

689

593

text_filter.extend(

694

598

text_filter = None

695

599

if 'pack' in debug.debug_flags:

696

600

mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',

697

time.ctime(), self._upload_transport.base, new_pack.random_name,

601

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

698

602

new_pack.inventory_index.key_count(),

699

603

time.time() - new_pack.start_time)

700

604

# select text keys

701

text_index_map = self._packs_list_to_pack_map_and_index_list(

702

packs, 'text_index')[0]

703

text_nodes = self._index_contents(text_index_map, text_filter)

605

text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

606

self.packs, 'text_index')[0]

607

text_nodes = self._pack_collection._index_contents(text_index_map, text_filter)

704

608

if text_filter is not None:

705

609

# We could return the keys copied as part of the return value from

706

610

# _copy_nodes_graph but this doesn't work all that well with the

718

622

raise errors.RevisionNotPresent(a_missing_key[1],

719

623

a_missing_key[0])

720

624

# copy text keys and adjust values

721

pb.update("Copying content texts", 3)

625

self.pb.update("Copying content texts", 3)

722

626

list(self._copy_nodes_graph(text_nodes, text_index_map,

723

627

new_pack._writer, new_pack.text_index))

724

628

if 'pack' in debug.debug_flags:

725

629

mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',

726

time.ctime(), self._upload_transport.base, new_pack.random_name,

630

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

727

631

new_pack.text_index.key_count(),

728

632

time.time() - new_pack.start_time)

729

633

# select signature keys

730

634

signature_filter = revision_keys # same keyspace

731

signature_index_map = self._packs_list_to_pack_map_and_index_list(

732

packs, 'signature_index')[0]

733

signature_nodes = self._index_contents(signature_index_map,

635

signature_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

636

self.packs, 'signature_index')[0]

637

signature_nodes = self._pack_collection._index_contents(signature_index_map,

734

638

signature_filter)

735

639

# copy signature keys and adjust values

736

pb.update("Copying signature texts", 4)

640

self.pb.update("Copying signature texts", 4)

737

641

self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,

738

642

new_pack.signature_index)

739

643

if 'pack' in debug.debug_flags:

740

644

mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',

741

time.ctime(), self._upload_transport.base, new_pack.random_name,

645

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

742

646

new_pack.signature_index.key_count(),

743

647

time.time() - new_pack.start_time)

744

648

if not new_pack.data_inserted():

745

649

new_pack.abort()

746

650

return None

747

pb.update("Finishing pack", 5)

651

self.pb.update("Finishing pack", 5)

748

652

new_pack.finish()

749

self.allocate(new_pack)

653

self._pack_collection.allocate(new_pack)

750

654

return new_pack

751

655

656

def _copy_nodes(self, nodes, index_map, writer, write_index):

657

"""Copy knit nodes between packs with no graph references."""

658

pb = ui.ui_factory.nested_progress_bar()

659

try:

660

return self._do_copy_nodes(nodes, index_map, writer,

661

write_index, pb)

662

finally:

663

pb.finished()

664

665

def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):

666

# for record verification

667

knit_data = _KnitData(None)

668

# plan a readv on each source pack:

669

# group by pack

670

nodes = sorted(nodes)

671

# how to map this into knit.py - or knit.py into this?

672

# we don't want the typical knit logic, we want grouping by pack

673

# at this point - perhaps a helper library for the following code

674

# duplication points?

675

request_groups = {}

676

for index, key, value in nodes:

677

if index not in request_groups:

678

request_groups[index] = []

679

request_groups[index].append((key, value))

680

record_index = 0

681

pb.update("Copied record", record_index, len(nodes))

682

for index, items in request_groups.iteritems():

683

pack_readv_requests = []

684

for key, value in items:

685

# ---- KnitGraphIndex.get_position

686

bits = value[1:].split(' ')

687

offset, length = int(bits[0]), int(bits[1])

688

pack_readv_requests.append((offset, length, (key, value[0])))

689

# linear scan up the pack

690

pack_readv_requests.sort()

691

# copy the data

692

transport, path = index_map[index]

693

reader = pack.make_readv_reader(transport, path,

694

[offset[0:2] for offset in pack_readv_requests])

695

for (names, read_func), (_1, _2, (key, eol_flag)) in \

696

izip(reader.iter_records(), pack_readv_requests):

697

raw_data = read_func(None)

698

# check the header only

699

df, _ = knit_data._parse_record_header(key[-1], raw_data)

700

df.close()

701

pos, size = writer.add_bytes_record(raw_data, names)

702

write_index.add_node(key, eol_flag + "%d %d" % (pos, size))

703

pb.update("Copied record", record_index)

704

record_index += 1

705

706

def _copy_nodes_graph(self, nodes, index_map, writer, write_index,

707

output_lines=False):

708

"""Copy knit nodes between packs.

709

710

:param output_lines: Return lines present in the copied data as

711

an iterator.

712

"""

713

pb = ui.ui_factory.nested_progress_bar()

714

try:

715

return self._do_copy_nodes_graph(nodes, index_map, writer,

716

write_index, output_lines, pb)

717

finally:

718

pb.finished()

719

720

def _do_copy_nodes_graph(self, nodes, index_map, writer, write_index,

721

output_lines, pb):

722

# for record verification

723

knit_data = _KnitData(None)

724

# for line extraction when requested (inventories only)

725

if output_lines:

726

factory = knit.KnitPlainFactory()

727

# plan a readv on each source pack:

728

# group by pack

729

nodes = sorted(nodes)

730

# how to map this into knit.py - or knit.py into this?

731

# we don't want the typical knit logic, we want grouping by pack

732

# at this point - perhaps a helper library for the following code

733

# duplication points?

734

request_groups = {}

735

record_index = 0

736

pb.update("Copied record", record_index, len(nodes))

737

for index, key, value, references in nodes:

738

if index not in request_groups:

739

request_groups[index] = []

740

request_groups[index].append((key, value, references))

741

for index, items in request_groups.iteritems():

742

pack_readv_requests = []

743

for key, value, references in items:

744

# ---- KnitGraphIndex.get_position

745

bits = value[1:].split(' ')

746

offset, length = int(bits[0]), int(bits[1])

747

pack_readv_requests.append((offset, length, (key, value[0], references)))

748

# linear scan up the pack

749

pack_readv_requests.sort()

750

# copy the data

751

transport, path = index_map[index]

752

reader = pack.make_readv_reader(transport, path,

753

[offset[0:2] for offset in pack_readv_requests])

754

for (names, read_func), (_1, _2, (key, eol_flag, references)) in \

755

izip(reader.iter_records(), pack_readv_requests):

756

raw_data = read_func(None)

757

if output_lines:

758

# read the entire thing

759

content, _ = knit_data._parse_record(key[-1], raw_data)

760

if len(references[-1]) == 0:

761

line_iterator = factory.get_fulltext_content(content)

762

else:

763

line_iterator = factory.get_linedelta_content(content)

764

for line in line_iterator:

765

yield line

766

else:

767

# check the header only

768

df, _ = knit_data._parse_record_header(key[-1], raw_data)

769

df.close()

770

pos, size = writer.add_bytes_record(raw_data, names)

771

write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)

772

pb.update("Copied record", record_index)

773

record_index += 1

774

775

776

class ReconcilePacker(Packer):

777

"""A packer which regenerates indices etc as it copies.

778

779

This is used by ``bzr reconcile`` to cause parent text pointers to be

780

regenerated.

781

"""

782

783

784

class RepositoryPackCollection(object):

785

"""Management of packs within a repository."""

786

787

def __init__(self, repo, transport, index_transport, upload_transport,

788

pack_transport):

789

"""Create a new RepositoryPackCollection.

790

791

:param transport: Addresses the repository base directory

792

(typically .bzr/repository/).

793

:param index_transport: Addresses the directory containing indices.

794

:param upload_transport: Addresses the directory into which packs are written

795

while they're being created.

796

:param pack_transport: Addresses the directory of existing complete packs.

797

"""

798

self.repo = repo

799

self.transport = transport

800

self._index_transport = index_transport

801

self._upload_transport = upload_transport

802

self._pack_transport = pack_transport

803

self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}

804

self.packs = []

805

# name:Pack mapping

806

self._packs_by_name = {}

807

# the previous pack-names content

808

self._packs_at_load = None

809

# when a pack is being created by this object, the state of that pack.

810

self._new_pack = None

811

# aggregated revision index data

812

self.revision_index = AggregateIndex()

813

self.inventory_index = AggregateIndex()

814

self.text_index = AggregateIndex()

815

self.signature_index = AggregateIndex()

816

817

def add_pack_to_memory(self, pack):

818

"""Make a Pack object available to the repository to satisfy queries.

819

820

:param pack: A Pack object.

821

"""

822

assert pack.name not in self._packs_by_name

823

self.packs.append(pack)

824

self._packs_by_name[pack.name] = pack

825

self.revision_index.add_index(pack.revision_index, pack)

826

self.inventory_index.add_index(pack.inventory_index, pack)

827

self.text_index.add_index(pack.text_index, pack)

828

self.signature_index.add_index(pack.signature_index, pack)

829

830

def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,

831

nostore_sha, random_revid):

832

file_id_index = GraphIndexPrefixAdapter(

833

self.text_index.combined_index,

834

(file_id, ), 1,

835

add_nodes_callback=self.text_index.add_callback)

836

self.repo._text_knit._index._graph_index = file_id_index

837

self.repo._text_knit._index._add_callback = file_id_index.add_nodes

838

return self.repo._text_knit.add_lines_with_ghosts(

839

revision_id, parents, new_lines, nostore_sha=nostore_sha,

840

random_id=random_revid, check_content=False)[0:2]

841

842

def all_packs(self):

843

"""Return a list of all the Pack objects this repository has.

844

845

Note that an in-progress pack being created is not returned.

846

847

:return: A list of Pack objects for all the packs in the repository.

848

"""

849

result = []

850

for name in self.names():

851

result.append(self.get_pack_by_name(name))

852

return result

853

854

def autopack(self):

855

"""Pack the pack collection incrementally.

856

857

This will not attempt global reorganisation or recompression,

858

rather it will just ensure that the total number of packs does

859

not grow without bound. It uses the _max_pack_count method to

860

determine if autopacking is needed, and the pack_distribution

861

method to determine the number of revisions in each pack.

862

863

If autopacking takes place then the packs name collection will have

864

been flushed to disk - packing requires updating the name collection

865

in synchronisation with certain steps. Otherwise the names collection

866

is not flushed.

867

868

:return: True if packing took place.

869

"""

870

# XXX: Should not be needed when the management of indices is sane.

871

total_revisions = self.revision_index.combined_index.key_count()

872

total_packs = len(self._names)

873

if self._max_pack_count(total_revisions) >= total_packs:

874

return False

875

# XXX: the following may want to be a class, to pack with a given

876

# policy.

877

mutter('Auto-packing repository %s, which has %d pack files, '

878

'containing %d revisions into %d packs.', self, total_packs,

879

total_revisions, self._max_pack_count(total_revisions))

880

# determine which packs need changing

881

pack_distribution = self.pack_distribution(total_revisions)

882

existing_packs = []

883

for pack in self.all_packs():

884

revision_count = pack.get_revision_count()

885

if revision_count == 0:

886

# revision less packs are not generated by normal operation,

887

# only by operations like sign-my-commits, and thus will not

888

# tend to grow rapdily or without bound like commit containing

889

# packs do - leave them alone as packing them really should

890

# group their data with the relevant commit, and that may

891

# involve rewriting ancient history - which autopack tries to

892

# avoid. Alternatively we could not group the data but treat

893

# each of these as having a single revision, and thus add

894

# one revision for each to the total revision count, to get

895

# a matching distribution.

896

continue

897

existing_packs.append((revision_count, pack))

898

pack_operations = self.plan_autopack_combinations(

899

existing_packs, pack_distribution)

900

self._execute_pack_operations(pack_operations)

901

return True

902

752

903

def _execute_pack_operations(self, pack_operations):

753

904

"""Execute a series of pack operations.

754

905

759

910

# we may have no-ops from the setup logic

760

911

if len(packs) == 0:

761

912

continue

762

# have a progress bar?

763

self.create_pack_from_packs(packs, '.autopack')

913

Packer(self, packs, '.autopack').pack()

764

914

for pack in packs:

765

915

self._remove_pack_from_memory(pack)

766

916

# record the newly available packs and stop advertising the old

841

991

842

992

return pack_operations

843

993

844

def _copy_nodes(self, nodes, index_map, writer, write_index):

845

"""Copy knit nodes between packs with no graph references."""

846

pb = ui.ui_factory.nested_progress_bar()

847

try:

848

return self._do_copy_nodes(nodes, index_map, writer,

849

write_index, pb)

850

finally:

851

pb.finished()

852

853

def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):

854

# for record verification

855

knit_data = _KnitData(None)

856

# plan a readv on each source pack:

857

# group by pack

858

nodes = sorted(nodes)

859

# how to map this into knit.py - or knit.py into this?

860

# we don't want the typical knit logic, we want grouping by pack

861

# at this point - perhaps a helper library for the following code

862

# duplication points?

863

request_groups = {}

864

for index, key, value in nodes:

865

if index not in request_groups:

866

request_groups[index] = []

867

request_groups[index].append((key, value))

868

record_index = 0

869

pb.update("Copied record", record_index, len(nodes))

870

for index, items in request_groups.iteritems():

871

pack_readv_requests = []

872

for key, value in items:

873

# ---- KnitGraphIndex.get_position

874

bits = value[1:].split(' ')

875

offset, length = int(bits[0]), int(bits[1])

876

pack_readv_requests.append((offset, length, (key, value[0])))

877

# linear scan up the pack

878

pack_readv_requests.sort()

879

# copy the data

880

transport, path = index_map[index]

881

reader = pack.make_readv_reader(transport, path,

882

[offset[0:2] for offset in pack_readv_requests])

883

for (names, read_func), (_1, _2, (key, eol_flag)) in \

884

izip(reader.iter_records(), pack_readv_requests):

885

raw_data = read_func(None)

886

# check the header only

887

df, _ = knit_data._parse_record_header(key[-1], raw_data)

888

df.close()

889

pos, size = writer.add_bytes_record(raw_data, names)

890

write_index.add_node(key, eol_flag + "%d %d" % (pos, size))

891

pb.update("Copied record", record_index)

892

record_index += 1

893

894

def _copy_nodes_graph(self, nodes, index_map, writer, write_index,

895

output_lines=False):

896

"""Copy knit nodes between packs.

897

898

:param output_lines: Return lines present in the copied data as

899

an iterator.

900

"""

901

pb = ui.ui_factory.nested_progress_bar()

902

try:

903

return self._do_copy_nodes_graph(nodes, index_map, writer,

904

write_index, output_lines, pb)

905

finally:

906

pb.finished()

907

908

def _do_copy_nodes_graph(self, nodes, index_map, writer, write_index,

909

output_lines, pb):

910

# for record verification

911

knit_data = _KnitData(None)

912

# for line extraction when requested (inventories only)

913

if output_lines:

914

factory = knit.KnitPlainFactory()

915

# plan a readv on each source pack:

916

# group by pack

917

nodes = sorted(nodes)

918

# how to map this into knit.py - or knit.py into this?

919

# we don't want the typical knit logic, we want grouping by pack

920

# at this point - perhaps a helper library for the following code

921

# duplication points?

922

request_groups = {}

923

record_index = 0

924

pb.update("Copied record", record_index, len(nodes))

925

for index, key, value, references in nodes:

926

if index not in request_groups:

927

request_groups[index] = []

928

request_groups[index].append((key, value, references))

929

for index, items in request_groups.iteritems():

930

pack_readv_requests = []

931

for key, value, references in items:

932

# ---- KnitGraphIndex.get_position

933

bits = value[1:].split(' ')

934

offset, length = int(bits[0]), int(bits[1])

935

pack_readv_requests.append((offset, length, (key, value[0], references)))

936

# linear scan up the pack

937

pack_readv_requests.sort()

938

# copy the data

939

transport, path = index_map[index]

940

reader = pack.make_readv_reader(transport, path,

941

[offset[0:2] for offset in pack_readv_requests])

942

for (names, read_func), (_1, _2, (key, eol_flag, references)) in \

943

izip(reader.iter_records(), pack_readv_requests):

944

raw_data = read_func(None)

945

if output_lines:

946

# read the entire thing

947

content, _ = knit_data._parse_record(key[-1], raw_data)

948

if len(references[-1]) == 0:

949

line_iterator = factory.get_fulltext_content(content)

950

else:

951

line_iterator = factory.get_linedelta_content(content)

952

for line in line_iterator:

953

yield line

954

else:

955

# check the header only

956

df, _ = knit_data._parse_record_header(key[-1], raw_data)

957

df.close()

958

pos, size = writer.add_bytes_record(raw_data, names)

959

write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)

960

pb.update("Copied record", record_index)

961

record_index += 1

962

963

994

def ensure_loaded(self):

964

995

# NB: if you see an assertion error here, its probably access against

965

996

# an unlocked repo. Naughty.

1295

1326

self._save_pack_names()

1296

1327

else:

1297

1328

self._new_pack.abort()

1329

self._new_pack = None

1298

1330

self.repo._text_knit = None

1299

1331

1300

1332

1467

1499

self._write_lock_count = 0

1468

1500

self._transaction = None

1469

1501

# for tests

1470

self._reconcile_does_inventory_gc = False

1502

self._reconcile_does_inventory_gc = True

1471

1503

self._reconcile_fixes_text_parents = False

1504

self._reconcile_backsup_inventory = False

1472

1505

1473

1506

def _abort_write_group(self):

1474

1507

self._pack_collection._abort_write_group()

1482

1515

return 'w'

1483

1516

return 'r'

1484

1517

1518

def _find_inconsistent_revision_parents(self):

1519

"""Find revisions with incorrectly cached parents.

1520

1521

:returns: an iterator yielding tuples of (revison-id, parents-in-index,

1522

parents-in-revision).

1523

"""

1524

assert self.is_locked()

1525

pb = ui.ui_factory.nested_progress_bar()

1526

result = []

1527

try:

1528

revision_nodes = self._pack_collection.revision_index \

1529

.combined_index.iter_all_entries()

1530

index_positions = []

1531

# Get the cached index values for all revisions, and also the location

1532

# in each index of the revision text so we can perform linear IO.

1533

for index, key, value, refs in revision_nodes:

1534

pos, length = value[1:].split(' ')

1535

index_positions.append((index, int(pos), key[0],

1536

tuple(parent[0] for parent in refs[0])))

1537

pb.update("Reading revision index.", 0, 0)

1538

index_positions.sort()

1539

batch_count = len(index_positions) / 1000 + 1

1540

pb.update("Checking cached revision graph.", 0, batch_count)

1541

for offset in xrange(batch_count):

1542

pb.update("Checking cached revision graph.", offset)

1543

to_query = index_positions[offset * 1000:(offset + 1) * 1000]

1544

if not to_query:

1545

break

1546

rev_ids = [item[2] for item in to_query]

1547

revs = self.get_revisions(rev_ids)

1548

for revision, item in zip(revs, to_query):

1549

index_parents = item[3]

1550

rev_parents = tuple(revision.parent_ids)

1551

if index_parents != rev_parents:

1552

result.append((revision.revision_id, index_parents, rev_parents))

1553

finally:

1554

pb.finished()

1555

return result

1556

1485

1557

def get_parents(self, revision_ids):

1486

1558

"""See StackedParentsProvider.get_parents.

1487

1559

1514

1586

return self

1515

1587

1516

1588

def _refresh_data(self):

1517

if self._write_lock_count == 1 or self.control_files._lock_count == 1:

1589

if self._write_lock_count == 1 or (

1590

self.control_files._lock_count == 1 and

1591

self.control_files._lock_mode == 'r'):

1518

1592

# forget what names there are

1519

1593

self._pack_collection.reset()

1520

1594

# XXX: Better to do an in-memory merge when acquiring a new lock -

Older »