/brz/remove-bazaar : revision 3031.3.2

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

Committer: Robert Collins
Date: 2008-02-06 04:06:42 UTC
mfrom: (3216 +trunk)
mto: This revision was merged to the branch mainline in revision 3217.
Revision ID: robertc@robertcollins.net-20080206040642-2efx3l4iv5f95lxp

Merge up with bzr.dev.

files added:
bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/test_http_implementations.py

bzrlib/version_info_formats/format_custom.py

contrib/bzr_access

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/best_practice_intro.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/revnos.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-reference/readme.txt

index.txt

files removed:
bzrlib/plugins/multiparent.py

doc/en/user-reference/index.txt

files renamed:
bzrlib/help_topics.py => bzrlib/help_topics/__init__.py

doc/en/user-guide/authentication_conf.txt => bzrlib/help_topics/en/authentication.txt

doc/en/user-guide/configuration.txt => bzrlib/help_topics/en/configuration.txt

doc/en/user-guide/conflicts.txt => bzrlib/help_topics/en/conflicts.txt

doc/en/user-reference/hooks.txt => bzrlib/help_topics/en/hooks.txt

bzrlib/tests/HttpServer.py => bzrlib/tests/http_server.py

bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py

doc/en/user-guide/centralized_workflow.txt => doc/en/tutorials/centralized_workflow.txt

doc/en/user-guide/tutorial.txt => doc/en/tutorials/tutorial.txt

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/_patiencediff_c.c

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisiontree.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_log.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/urlutils.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/bundles.txt

doc/developers/index.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/en/mini-tutorial/index.txt

doc/en/quick-reference/quick-start-summary.svg

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/index.txt

setup.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/repofmt/pack_repo.py

from bzrlib import (

debug,

graph,

pack,

ui,

)

from bzrlib.osutils import rand_chars

from bzrlib.pack import ContainerWriter

from bzrlib.store import revision

from bzrlib import tsort

""")

from bzrlib import (

bzrdir,

lockable_files,

lockdir,

osutils,

symbol_versioning,

transactions,

xml5,

xml6,

added text, reducing memory and object pressure.

"""

def __init__(self, repository, parents, config, timestamp=None,

timezone=None, committer=None, revprops=None,

revision_id=None):

CommitBuilder.__init__(self, repository, parents, config,

timestamp=timestamp, timezone=timezone, committer=committer,

revprops=revprops, revision_id=revision_id)

self._file_graph = graph.Graph(

repository._pack_collection.text_index.combined_index)

def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):

return self.repository._pack_collection._add_text_to_weave(file_id,

self._new_revision_id, new_lines, parents, nostore_sha,

self.random_revid)

def _heads(self, file_id, revision_ids):

keys = [(file_id, revision_id) for revision_id in revision_ids]

return set([key[1] for key in self._file_graph.heads(keys)])

class PackRootCommitBuilder(RootCommitBuilder):

"""A subclass of RootCommitBuilder to add texts with pack semantics.

102

added text, reducing memory and object pressure.

103

"""

104

105

def __init__(self, repository, parents, config, timestamp=None,

106

timezone=None, committer=None, revprops=None,

107

revision_id=None):

108

CommitBuilder.__init__(self, repository, parents, config,

109

timestamp=timestamp, timezone=timezone, committer=committer,

110

revprops=revprops, revision_id=revision_id)

111

self._file_graph = graph.Graph(

112

repository._pack_collection.text_index.combined_index)

113

114

def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):

115

return self.repository._pack_collection._add_text_to_weave(file_id,

116

self._new_revision_id, new_lines, parents, nostore_sha,

117

self.random_revid)

118

119

def _heads(self, file_id, revision_ids):

120

keys = [(file_id, revision_id) for revision_id in revision_ids]

121

return set([key[1] for key in self._file_graph.heads(keys)])

122

123

124

class Pack(object):

125

"""An in memory proxy for a pack and its indices.

146

175

"""The text index is the name + .tix."""

147

176

return self.index_name('text', name)

148

177

178

def _external_compression_parents_of_texts(self):

179

keys = set()

180

refs = set()

181

for node in self.text_index.iter_all_entries():

182

keys.add(node[1])

183

refs.update(node[3][1])

184

return refs - keys

185

149

186

150

187

class ExistingPack(Pack):

151

188

"""An in memory proxy for an existing .pack and its disk indices."""

346

383

self.pack_transport, self.name,

347

384

time.time() - self.start_time)

348

385

386

def flush(self):

387

"""Flush any current data."""

388

if self._buffer[1]:

389

bytes = ''.join(self._buffer[0])

390

self.write_stream.write(bytes)

391

self._hash.update(bytes)

392

self._buffer[:] = [[], 0]

393

349

394

def index_name(self, index_type, name):

350

395

"""Get the disk name of an index type for pack name 'name'."""

351

396

return name + NewPack.index_definitions[index_type][0]

495

540

self.packs = packs

496

541

self.suffix = suffix

497

542

self.revision_ids = revision_ids

543

# The pack object we are creating.

544

self.new_pack = None

498

545

self._pack_collection = pack_collection

546

# The index layer keys for the revisions being copied. None for 'all

547

# objects'.

548

self._revision_keys = None

549

# What text keys to copy. None for 'all texts'. This is set by

550

# _copy_inventory_texts

551

self._text_filter = None

552

self._extra_init()

553

554

def _extra_init(self):

555

"""A template hook to allow extending the constructor trivially."""

499

556

500

557

def pack(self, pb=None):

501

558

"""Create a new pack by reading data from other packs.

541

598

self._pack_collection._pack_transport, upload_suffix=self.suffix,

542

599

file_mode=self._pack_collection.repo.control_files._file_mode)

543

600

544

def _create_pack_from_packs(self):

545

self.pb.update("Opening pack", 0, 5)

546

new_pack = self.open_pack()

547

# buffer data - we won't be reading-back during the pack creation and

548

# this makes a significant difference on sftp pushes.

549

new_pack.set_write_cache_size(1024*1024)

550

if 'pack' in debug.debug_flags:

551

plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)

552

for a_pack in self.packs]

553

if self.revision_ids is not None:

554

rev_count = len(self.revision_ids)

555

else:

556

rev_count = 'all'

557

mutter('%s: create_pack: creating pack from source packs: '

558

'%s%s %s revisions wanted %s t=0',

559

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

560

plain_pack_list, rev_count)

601

def _copy_revision_texts(self):

602

"""Copy revision data to the new pack."""

561

603

# select revisions

562

604

if self.revision_ids:

563

605

revision_keys = [(revision_id,) for revision_id in self.revision_ids]

564

606

else:

565

607

revision_keys = None

566

567

608

# select revision keys

568

609

revision_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

569

610

self.packs, 'revision_index')[0]

570

611

revision_nodes = self._pack_collection._index_contents(revision_index_map, revision_keys)

571

612

# copy revision keys and adjust values

572

613

self.pb.update("Copying revision texts", 1)

573

list(self._copy_nodes_graph(revision_nodes, revision_index_map,

574

new_pack._writer, new_pack.revision_index))

614

total_items, readv_group_iter = self._revision_node_readv(revision_nodes)

615

list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,

616

self.new_pack.revision_index, readv_group_iter, total_items))

575

617

if 'pack' in debug.debug_flags:

576

618

mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',

577

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

578

new_pack.revision_index.key_count(),

579

time.time() - new_pack.start_time)

619

time.ctime(), self._pack_collection._upload_transport.base,

620

self.new_pack.random_name,

621

self.new_pack.revision_index.key_count(),

622

time.time() - self.new_pack.start_time)

623

self._revision_keys = revision_keys

624

625

def _copy_inventory_texts(self):

626

"""Copy the inventory texts to the new pack.

627

628

self._revision_keys is used to determine what inventories to copy.

629

630

Sets self._text_filter appropriately.

631

"""

580

632

# select inventory keys

581

inv_keys = revision_keys # currently the same keyspace, and note that

633

inv_keys = self._revision_keys # currently the same keyspace, and note that

582

634

# querying for keys here could introduce a bug where an inventory item

583

635

# is missed, so do not change it to query separately without cross

584

636

# checking like the text key check below.

589

641

# XXX: Should be a helper function to allow different inv representation

590

642

# at this point.

591

643

self.pb.update("Copying inventory texts", 2)

592

inv_lines = self._copy_nodes_graph(inv_nodes, inventory_index_map,

593

new_pack._writer, new_pack.inventory_index, output_lines=True)

644

total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)

645

inv_lines = self._copy_nodes_graph(inventory_index_map,

646

self.new_pack._writer, self.new_pack.inventory_index,

647

readv_group_iter, total_items, output_lines=True)

594

648

if self.revision_ids:

595

fileid_revisions = self._pack_collection.repo._find_file_ids_from_xml_inventory_lines(

596

inv_lines, self.revision_ids)

597

text_filter = []

598

for fileid, file_revids in fileid_revisions.iteritems():

599

text_filter.extend(

600

[(fileid, file_revid) for file_revid in file_revids])

649

self._process_inventory_lines(inv_lines)

601

650

else:

602

651

# eat the iterator to cause it to execute.

603

652

list(inv_lines)

604

text_filter = None

653

self._text_filter = None

605

654

if 'pack' in debug.debug_flags:

606

655

mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',

607

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

608

new_pack.inventory_index.key_count(),

656

time.ctime(), self._pack_collection._upload_transport.base,

657

self.new_pack.random_name,

658

self.new_pack.inventory_index.key_count(),

609

659

time.time() - new_pack.start_time)

660

661

def _copy_text_texts(self):

610

662

# select text keys

611

text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

612

self.packs, 'text_index')[0]

613

text_nodes = self._pack_collection._index_contents(text_index_map, text_filter)

614

if text_filter is not None:

663

text_index_map, text_nodes = self._get_text_nodes()

664

if self._text_filter is not None:

615

665

# We could return the keys copied as part of the return value from

616

666

# _copy_nodes_graph but this doesn't work all that well with the

617

667

# need to get line output too, so we check separately, and as we're

620

670

# mising records.

621

671

text_nodes = set(text_nodes)

622

672

present_text_keys = set(_node[1] for _node in text_nodes)

623

missing_text_keys = set(text_filter) - present_text_keys

673

missing_text_keys = set(self._text_filter) - present_text_keys

624

674

if missing_text_keys:

625

675

# TODO: raise a specific error that can handle many missing

626

676

# keys.

629

679

a_missing_key[0])

630

680

# copy text keys and adjust values

631

681

self.pb.update("Copying content texts", 3)

632

list(self._copy_nodes_graph(text_nodes, text_index_map,

633

new_pack._writer, new_pack.text_index))

682

total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)

683

list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,

684

self.new_pack.text_index, readv_group_iter, total_items))

685

self._log_copied_texts()

686

687

def _check_references(self):

688

"""Make sure our external refereneces are present."""

689

external_refs = self.new_pack._external_compression_parents_of_texts()

690

if external_refs:

691

index = self._pack_collection.text_index.combined_index

692

found_items = list(index.iter_entries(external_refs))

693

if len(found_items) != len(external_refs):

694

found_keys = set(k for idx, k, refs, value in found_items)

695

missing_items = external_refs - found_keys

696

missing_file_id, missing_revision_id = missing_items.pop()

697

raise errors.RevisionNotPresent(missing_revision_id,

698

missing_file_id)

699

700

def _create_pack_from_packs(self):

701

self.pb.update("Opening pack", 0, 5)

702

self.new_pack = self.open_pack()

703

new_pack = self.new_pack

704

# buffer data - we won't be reading-back during the pack creation and

705

# this makes a significant difference on sftp pushes.

706

new_pack.set_write_cache_size(1024*1024)

634

707

if 'pack' in debug.debug_flags:

635

mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',

708

plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)

709

for a_pack in self.packs]

710

if self.revision_ids is not None:

711

rev_count = len(self.revision_ids)

712

else:

713

rev_count = 'all'

714

mutter('%s: create_pack: creating pack from source packs: '

715

'%s%s %s revisions wanted %s t=0',

636

716

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

637

new_pack.text_index.key_count(),

638

time.time() - new_pack.start_time)

717

plain_pack_list, rev_count)

718

self._copy_revision_texts()

719

self._copy_inventory_texts()

720

self._copy_text_texts()

639

721

# select signature keys

640

signature_filter = revision_keys # same keyspace

722

signature_filter = self._revision_keys # same keyspace

641

723

signature_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

642

724

self.packs, 'signature_index')[0]

643

725

signature_nodes = self._pack_collection._index_contents(signature_index_map,

651

733

time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,

652

734

new_pack.signature_index.key_count(),

653

735

time.time() - new_pack.start_time)

654

if not new_pack.data_inserted():

736

self._check_references()

737

if not self._use_pack(new_pack):

655

738

new_pack.abort()

656

739

return None

657

740

self.pb.update("Finishing pack", 5)

709

792

pb.update("Copied record", record_index)

710

793

record_index += 1

711

794

712

def _copy_nodes_graph(self, nodes, index_map, writer, write_index,

713

output_lines=False):

795

def _copy_nodes_graph(self, index_map, writer, write_index,

796

readv_group_iter, total_items, output_lines=False):

714

797

"""Copy knit nodes between packs.

715

798

716

799

:param output_lines: Return lines present in the copied data as

718

801

"""

719

802

pb = ui.ui_factory.nested_progress_bar()

720

803

try:

721

return self._do_copy_nodes_graph(nodes, index_map, writer,

722

write_index, output_lines, pb)

723

finally:

804

for result in self._do_copy_nodes_graph(index_map, writer,

805

write_index, output_lines, pb, readv_group_iter, total_items):

806

yield result

807

except Exception:

808

# Python 2.4 does not permit try:finally: in a generator.

809

pb.finished()

810

raise

811

else:

724

812

pb.finished()

725

813

726

def _do_copy_nodes_graph(self, nodes, index_map, writer, write_index,

727

output_lines, pb):

814

def _do_copy_nodes_graph(self, index_map, writer, write_index,

815

output_lines, pb, readv_group_iter, total_items):

728

816

# for record verification

729

817

knit_data = _KnitData(None)

730

818

# for line extraction when requested (inventories only)

731

819

if output_lines:

732

820

factory = knit.KnitPlainFactory()

733

# plan a readv on each source pack:

734

# group by pack

735

nodes = sorted(nodes)

736

# how to map this into knit.py - or knit.py into this?

737

# we don't want the typical knit logic, we want grouping by pack

738

# at this point - perhaps a helper library for the following code

739

# duplication points?

740

request_groups = {}

741

821

record_index = 0

742

pb.update("Copied record", record_index, len(nodes))

743

for index, key, value, references in nodes:

744

if index not in request_groups:

745

request_groups[index] = []

746

request_groups[index].append((key, value, references))

747

for index, items in request_groups.iteritems():

748

pack_readv_requests = []

749

for key, value, references in items:

750

# ---- KnitGraphIndex.get_position

751

bits = value[1:].split(' ')

752

offset, length = int(bits[0]), int(bits[1])

753

pack_readv_requests.append((offset, length, (key, value[0], references)))

754

# linear scan up the pack

755

pack_readv_requests.sort()

822

pb.update("Copied record", record_index, total_items)

823

for index, readv_vector, node_vector in readv_group_iter:

756

824

# copy the data

757

825

transport, path = index_map[index]

758

reader = pack.make_readv_reader(transport, path,

759

[offset[0:2] for offset in pack_readv_requests])

760

for (names, read_func), (_1, _2, (key, eol_flag, references)) in \

761

izip(reader.iter_records(), pack_readv_requests):

826

reader = pack.make_readv_reader(transport, path, readv_vector)

827

for (names, read_func), (key, eol_flag, references) in \

828

izip(reader.iter_records(), node_vector):

762

829

raw_data = read_func(None)

763

830

version_id = key[-1]

764

831

if output_lines:

779

846

pb.update("Copied record", record_index)

780

847

record_index += 1

781

848

849

def _get_text_nodes(self):

850

text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(

851

self.packs, 'text_index')[0]

852

return text_index_map, self._pack_collection._index_contents(text_index_map,

853

self._text_filter)

854

855

def _least_readv_node_readv(self, nodes):

856

"""Generate request groups for nodes using the least readv's.

857

858

:param nodes: An iterable of graph index nodes.

859

:return: Total node count and an iterator of the data needed to perform

860

readvs to obtain the data for nodes. Each item yielded by the

861

iterator is a tuple with:

862

index, readv_vector, node_vector. readv_vector is a list ready to

863

hand to the transport readv method, and node_vector is a list of

864

(key, eol_flag, references) for the the node retrieved by the

865

matching readv_vector.

866

"""

867

# group by pack so we do one readv per pack

868

nodes = sorted(nodes)

869

total = len(nodes)

870

request_groups = {}

871

for index, key, value, references in nodes:

872

if index not in request_groups:

873

request_groups[index] = []

874

request_groups[index].append((key, value, references))

875

result = []

876

for index, items in request_groups.iteritems():

877

pack_readv_requests = []

878

for key, value, references in items:

879

# ---- KnitGraphIndex.get_position

880

bits = value[1:].split(' ')

881

offset, length = int(bits[0]), int(bits[1])

882

pack_readv_requests.append(

883

((offset, length), (key, value[0], references)))

884

# linear scan up the pack to maximum range combining.

885

pack_readv_requests.sort()

886

# split out the readv and the node data.

887

pack_readv = [readv for readv, node in pack_readv_requests]

888

node_vector = [node for readv, node in pack_readv_requests]

889

result.append((index, pack_readv, node_vector))

890

return total, result

891

892

def _log_copied_texts(self):

893

if 'pack' in debug.debug_flags:

894

mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',

895

time.ctime(), self._pack_collection._upload_transport.base,

896

self.new_pack.random_name,

897

self.new_pack.text_index.key_count(),

898

time.time() - self.new_pack.start_time)

899

900

def _process_inventory_lines(self, inv_lines):

901

"""Use up the inv_lines generator and setup a text key filter."""

902

repo = self._pack_collection.repo

903

fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(

904

inv_lines, self.revision_ids)

905

text_filter = []

906

for fileid, file_revids in fileid_revisions.iteritems():

907

text_filter.extend([(fileid, file_revid) for file_revid in file_revids])

908

self._text_filter = text_filter

909

910

def _revision_node_readv(self, revision_nodes):

911

"""Return the total revisions and the readv's to issue.

912

913

:param revision_nodes: The revision index contents for the packs being

914

incorporated into the new pack.

915

:return: As per _least_readv_node_readv.

916

"""

917

return self._least_readv_node_readv(revision_nodes)

918

919

def _use_pack(self, new_pack):

920

"""Return True if new_pack should be used.

921

922

:param new_pack: The pack that has just been created.

923

:return: True if the pack should be used.

924

"""

925

return new_pack.data_inserted()

926

927

928

class OptimisingPacker(Packer):

929

"""A packer which spends more time to create better disk layouts."""

930

931

def _revision_node_readv(self, revision_nodes):

932

"""Return the total revisions and the readv's to issue.

933

934

This sort places revisions in topological order with the ancestors

935

after the children.

936

937

:param revision_nodes: The revision index contents for the packs being

938

incorporated into the new pack.

939

:return: As per _least_readv_node_readv.

940

"""

941

# build an ancestors dict

942

ancestors = {}

943

by_key = {}

944

for index, key, value, references in revision_nodes:

945

ancestors[key] = references[0]

946

by_key[key] = (index, value, references)

947

order = tsort.topo_sort(ancestors)

948

total = len(order)

949

# Single IO is pathological, but it will work as a starting point.

950

requests = []

951

for key in reversed(order):

952

index, value, references = by_key[key]

953

# ---- KnitGraphIndex.get_position

954

bits = value[1:].split(' ')

955

offset, length = int(bits[0]), int(bits[1])

956

requests.append(

957

(index, [(offset, length)], [(key, value[0], references)]))

958

# TODO: combine requests in the same index that are in ascending order.

959

return total, requests

960

782

961

783

962

class ReconcilePacker(Packer):

784

963

"""A packer which regenerates indices etc as it copies.

787

966

regenerated.

788

967

"""

789

968

969

def _extra_init(self):

970

self._data_changed = False

971

972

def _process_inventory_lines(self, inv_lines):

973

"""Generate a text key reference map rather for reconciling with."""

974

repo = self._pack_collection.repo

975

refs = repo._find_text_key_references_from_xml_inventory_lines(

976

inv_lines)

977

self._text_refs = refs

978

# during reconcile we:

979

# - convert unreferenced texts to full texts

980

# - correct texts which reference a text not copied to be full texts

981

# - copy all others as-is but with corrected parents.

982

# - so at this point we don't know enough to decide what becomes a full

983

# text.

984

self._text_filter = None

985

986

def _copy_text_texts(self):

987

"""generate what texts we should have and then copy."""

988

self.pb.update("Copying content texts", 3)

989

# we have three major tasks here:

990

# 1) generate the ideal index

991

repo = self._pack_collection.repo

992

ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for

993

_1, key, _2, refs in

994

self.new_pack.revision_index.iter_all_entries()])

995

ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)

996

# 2) generate a text_nodes list that contains all the deltas that can

997

# be used as-is, with corrected parents.

998

ok_nodes = []

999

bad_texts = []

1000

discarded_nodes = []

1001

NULL_REVISION = _mod_revision.NULL_REVISION

1002

text_index_map, text_nodes = self._get_text_nodes()

1003

for node in text_nodes:

1004

# 0 - index

1005

# 1 - key

1006

# 2 - value

1007

# 3 - refs

1008

try:

1009

ideal_parents = tuple(ideal_index[node[1]])

1010

except KeyError:

1011

discarded_nodes.append(node)

1012

self._data_changed = True

1013

else:

1014

if ideal_parents == (NULL_REVISION,):

1015

ideal_parents = ()

1016

if ideal_parents == node[3][0]:

1017

# no change needed.

1018

ok_nodes.append(node)

1019

elif ideal_parents[0:1] == node[3][0][0:1]:

1020

# the left most parent is the same, or there are no parents

1021

# today. Either way, we can preserve the representation as

1022

# long as we change the refs to be inserted.

1023

self._data_changed = True

1024

ok_nodes.append((node[0], node[1], node[2],

1025

(ideal_parents, node[3][1])))

1026

self._data_changed = True

1027

else:

1028

# Reinsert this text completely

1029

bad_texts.append((node[1], ideal_parents))

1030

self._data_changed = True

1031

# we're finished with some data.

1032

del ideal_index

1033

del text_nodes

1034

# 3) bulk copy the ok data

1035

total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)

1036

list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,

1037

self.new_pack.text_index, readv_group_iter, total_items))

1038

# 4) adhoc copy all the other texts.

1039

# We have to topologically insert all texts otherwise we can fail to

1040

# reconcile when parts of a single delta chain are preserved intact,

1041

# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be

1042

# reinserted, and if d3 has incorrect parents it will also be

1043

# reinserted. If we insert d3 first, d2 is present (as it was bulk

1044

# copied), so we will try to delta, but d2 is not currently able to be

1045

# extracted because it's basis d1 is not present. Topologically sorting

1046

# addresses this. The following generates a sort for all the texts that

1047

# are being inserted without having to reference the entire text key

1048

# space (we only topo sort the revisions, which is smaller).

1049

topo_order = tsort.topo_sort(ancestors)

1050

rev_order = dict(zip(topo_order, range(len(topo_order))))

1051

bad_texts.sort(key=lambda key:rev_order[key[0][1]])

1052

transaction = repo.get_transaction()

1053

file_id_index = GraphIndexPrefixAdapter(

1054

self.new_pack.text_index,

1055

('blank', ), 1,

1056

add_nodes_callback=self.new_pack.text_index.add_nodes)

1057

knit_index = KnitGraphIndex(file_id_index,

1058

add_callback=file_id_index.add_nodes,

1059

deltas=True, parents=True)

1060

output_knit = knit.KnitVersionedFile('reconcile-texts',

1061

self._pack_collection.transport,

1062

None,

1063

index=knit_index,

1064

access_method=_PackAccess(

1065

{self.new_pack.text_index:self.new_pack.access_tuple()},

1066

(self.new_pack._writer, self.new_pack.text_index)),

1067

factory=knit.KnitPlainFactory())

1068

for key, parent_keys in bad_texts:

1069

# We refer to the new pack to delta data being output.

1070

# A possible improvement would be to catch errors on short reads

1071

# and only flush then.

1072

self.new_pack.flush()

1073

parents = []

1074

for parent_key in parent_keys:

1075

if parent_key[0] != key[0]:

1076

# Graph parents must match the fileid

1077

raise errors.BzrError('Mismatched key parent %r:%r' %

1078

(key, parent_keys))

1079

parents.append(parent_key[1])

1080

source_weave = repo.weave_store.get_weave(key[0], transaction)

1081

text_lines = source_weave.get_lines(key[1])

1082

# adapt the 'knit' to the current file_id.

1083

file_id_index = GraphIndexPrefixAdapter(

1084

self.new_pack.text_index,

1085

(key[0], ), 1,

1086

add_nodes_callback=self.new_pack.text_index.add_nodes)

1087

knit_index._graph_index = file_id_index

1088

knit_index._add_callback = file_id_index.add_nodes

1089

output_knit.add_lines_with_ghosts(

1090

key[1], parents, text_lines, random_id=True, check_content=False)

1091

# 5) check that nothing inserted has a reference outside the keyspace.

1092

missing_text_keys = self.new_pack._external_compression_parents_of_texts()

1093

if missing_text_keys:

1094

raise errors.BzrError('Reference to missing compression parents %r'

1095

% (refs - keys,))

1096

self._log_copied_texts()

1097

1098

def _use_pack(self, new_pack):

1099

"""Override _use_pack to check for reconcile having changed content."""

1100

# XXX: we might be better checking this at the copy time.

1101

original_inventory_keys = set()

1102

inv_index = self._pack_collection.inventory_index.combined_index

1103

for entry in inv_index.iter_all_entries():

1104

original_inventory_keys.add(entry[1])

1105

new_inventory_keys = set()

1106

for entry in new_pack.inventory_index.iter_all_entries():

1107

new_inventory_keys.add(entry[1])

1108

if new_inventory_keys != original_inventory_keys:

1109

self._data_changed = True

1110

return new_pack.data_inserted() and self._data_changed

1111

790

1112

791

1113

class RepositoryPackCollection(object):

792

1114

"""Management of packs within a repository."""

907

1229

self._execute_pack_operations(pack_operations)

908

1230

return True

909

1231

910

def _execute_pack_operations(self, pack_operations):

1232

def _execute_pack_operations(self, pack_operations, _packer_class=Packer):

911

1233

"""Execute a series of pack operations.

912

1234

913

1235

:param pack_operations: A list of [revision_count, packs_to_combine].

1236

:param _packer_class: The class of packer to use (default: Packer).

914

1237

:return: None.

915

1238

"""

916

1239

for revision_count, packs in pack_operations:

917

1240

# we may have no-ops from the setup logic

918

1241

if len(packs) == 0:

919

1242

continue

920

Packer(self, packs, '.autopack').pack()

1243

_packer_class(self, packs, '.autopack').pack()

921

1244

for pack in packs:

922

1245

self._remove_pack_from_memory(pack)

923

1246

# record the newly available packs and stop advertising the old

940

1263

self.ensure_loaded()

941

1264

total_packs = len(self._names)

942

1265

if total_packs < 2:

1266

# This is arguably wrong because we might not be optimal, but for

1267

# now lets leave it in. (e.g. reconcile -> one pack. But not

1268

# optimal.

943

1269

return

944

1270

total_revisions = self.revision_index.combined_index.key_count()

945

1271

# XXX: the following may want to be a class, to pack with a given

951

1277

pack_distribution = [1]

952

1278

pack_operations = [[0, []]]

953

1279

for pack in self.all_packs():

954

revision_count = pack.get_revision_count()

955

pack_operations[-1][0] += revision_count

1280

pack_operations[-1][0] += pack.get_revision_count()

956

1281

pack_operations[-1][1].append(pack)

957

self._execute_pack_operations(pack_operations)

1282

self._execute_pack_operations(pack_operations, OptimisingPacker)

958

1283

959

1284

def plan_autopack_combinations(self, existing_packs, pack_distribution):

960

1285

"""Plan a pack operation.

1001

1326

def ensure_loaded(self):

1002

1327

# NB: if you see an assertion error here, its probably access against

1003

1328

# an unlocked repo. Naughty.

1004

assert self.repo.is_locked()

1329

if not self.repo.is_locked():

1330

raise errors.ObjectNotLocked(self.repo)

1005

1331

if self._names is None:

1006

1332

self._names = {}

1007

1333

self._packs_at_load = set()

1042

1368

"""

1043

1369

self.ensure_loaded()

1044

1370

if a_new_pack.name in self._names:

1045

# a collision with the packs we know about (not the only possible

1046

# collision, see NewPack.finish() for some discussion). Remove our

1047

# prior reference to it.

1048

self._remove_pack_from_memory(a_new_pack)

1371

raise errors.BzrError(

1372

'Pack %r already exists in %s' % (a_new_pack.name, self))

1049

1373

self._names[a_new_pack.name] = tuple(a_new_pack.index_sizes)

1050

1374

self.add_pack_to_memory(a_new_pack)

1051

1375

1324

1648

def _abort_write_group(self):

1325

1649

# FIXME: just drop the transient index.

1326

1650

# forget what names there are

1327

self._new_pack.abort()

1328

self._remove_pack_indices(self._new_pack)

1329

self._new_pack = None

1651

if self._new_pack is not None:

1652

self._new_pack.abort()

1653

self._remove_pack_indices(self._new_pack)

1654

self._new_pack = None

1330

1655

self.repo._text_knit = None

1331

1656

1332

1657

def _commit_write_group(self):

1516

1841

self._transaction = None

1517

1842

# for tests

1518

1843

self._reconcile_does_inventory_gc = True

1519

self._reconcile_fixes_text_parents = False

1844

self._reconcile_fixes_text_parents = True

1520

1845

self._reconcile_backsup_inventory = False

1521

1846

1522

1847

def _abort_write_group(self):

1537

1862

:returns: an iterator yielding tuples of (revison-id, parents-in-index,

1538

1863

parents-in-revision).

1539

1864

"""

1540

assert self.is_locked()

1865

if not self.is_locked():

1866

raise errors.ObjectNotLocked(self)

1541

1867

pb = ui.ui_factory.nested_progress_bar()

1542

1868

result = []

1543

1869

try:

1570

1896

pb.finished()

1571

1897

return result

1572

1898

1899

@symbol_versioning.deprecated_method(symbol_versioning.one_one)

1573

1900

def get_parents(self, revision_ids):

1574

"""See StackedParentsProvider.get_parents.

1575

1901

"""See graph._StackedParentsProvider.get_parents."""

1902

parent_map = self.get_parent_map(revision_ids)

1903

return [parent_map.get(r, None) for r in revision_ids]

1904

1905

def get_parent_map(self, keys):

1906

"""See graph._StackedParentsProvider.get_parent_map

1907

1576

1908

This implementation accesses the combined revision index to provide

1577

1909

answers.

1578

1910

"""

1579

1911

self._pack_collection.ensure_loaded()

1580

1912

index = self._pack_collection.revision_index.combined_index

1581

search_keys = set()

1582

for revision_id in revision_ids:

1583

if revision_id != _mod_revision.NULL_REVISION:

1584

search_keys.add((revision_id,))

1585

found_parents = {_mod_revision.NULL_REVISION:[]}

1913

keys = set(keys)

1914

if _mod_revision.NULL_REVISION in keys:

1915

keys.discard(_mod_revision.NULL_REVISION)

1916

found_parents = {_mod_revision.NULL_REVISION:()}

1917

else:

1918

found_parents = {}

1919

search_keys = set((revision_id,) for revision_id in keys)

1586

1920

for index, key, value, refs in index.iter_entries(search_keys):

1587

1921

parents = refs[0]

1588

1922

if not parents:

1590

1924

else:

1591

1925

parents = tuple(parent[0] for parent in parents)

1592

1926

found_parents[key[0]] = parents

1593

result = []

1594

for revision_id in revision_ids:

1595

try:

1596

result.append(found_parents[revision_id])

1597

except KeyError:

1598

result.append(None)

1927

return found_parents

1928

1929

def has_revisions(self, revision_ids):

1930

"""See Repository.has_revisions()."""

1931

revision_ids = set(revision_ids)

1932

result = revision_ids.intersection(

1933

set([None, _mod_revision.NULL_REVISION]))

1934

revision_ids.difference_update(result)

1935

index = self._pack_collection.revision_index.combined_index

1936

keys = [(revision_id,) for revision_id in revision_ids]

1937

result.update(node[1][0] for node in index.iter_entries(keys))

1599

1938

return result

1600

1939

1601

1940

def _make_parents_provider(self):

1602

return self

1941

return graph.CachingParentsProvider(self)

1603

1942

1604

1943

def _refresh_data(self):

1605

1944

if self._write_lock_count == 1 or (

1802

2141

1803

2142

1804

2143

class RepositoryFormatKnitPack1(RepositoryFormatPack):

1805

"""A no-subtrees parameterised Pack repository.

2144

"""A no-subtrees parameterized Pack repository.

1806

2145

1807

2146

This format was introduced in 0.92.

1808

2147

"""

1832

2171

1833

2172

1834

2173

class RepositoryFormatKnitPack3(RepositoryFormatPack):

1835

"""A subtrees parameterised Pack repository.

2174

"""A subtrees parameterized Pack repository.

1836

2175

1837

2176

This repository format uses the xml7 serializer to get:

1838

2177

- support for recording full info about the tree root

1874

2213

1875

2214

1876

2215

class RepositoryFormatKnitPack4(RepositoryFormatPack):

1877

"""A rich-root, no subtrees parameterised Pack repository.

2216

"""A rich-root, no subtrees parameterized Pack repository.

1878

2217

1879

2218

This repository format uses the xml6 serializer to get:

1880

2219

- support for recording full info about the tree root

Older »