/brz/remove-bazaar : revision 1551.21.8

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Aaron Bentley
Date: 2009-03-09 00:48:43 UTC
mto: This revision was merged to the branch mainline in revision 4095.
Revision ID: aaron@aaronbentley.com-20090309004843-twuto34yqkzez2u3

Remove now-redundant DummyProgress from Merger.__init__

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2-windows.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

errors,

osutils,

patiencediff,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

SHA1KnitCorrupt,

)

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_string,

sha_strings,

split_lines,

)

100

from bzrlib.versionedfile import (

101

AbsentContentFactory,

102

adapter_registry,

103

ConstantMapper,

104

ContentFactory,

105

ChunkedContentFactory,

106

VersionedFile,

107

VersionedFiles,

108

)

109

110

111

# TODO: Split out code specific to this format into an associated object.

112

113

# TODO: Can we put in some kind of value to check that the index and data

114

# files belong together?

115

116

# TODO: accommodate binaries, perhaps by storing a byte count

117

118

# TODO: function to check whole file

119

120

# TODO: atomically append data, then measure backwards from the cursor

121

# position after writing to work out where it was located. we may need to

122

# bypass python file buffering.

123

124

DATA_SUFFIX = '.knit'

125

INDEX_SUFFIX = '.kndx'

126

_STREAM_MIN_BUFFER_SIZE = 5*1024*1024

127

128

129

class KnitAdapter(object):

130

"""Base class for knit record adaption."""

131

132

def __init__(self, basis_vf):

133

"""Create an adapter which accesses full texts from basis_vf.

134

135

:param basis_vf: A versioned file to access basis texts of deltas from.

136

May be None for adapters that do not need to access basis texts.

137

"""

138

self._data = KnitVersionedFiles(None, None)

139

self._annotate_factory = KnitAnnotateFactory()

140

self._plain_factory = KnitPlainFactory()

141

self._basis_vf = basis_vf

142

143

144

class FTAnnotatedToUnannotated(KnitAdapter):

145

"""An adapter from FT annotated knits to unannotated ones."""

146

147

def get_bytes(self, factory):

148

annotated_compressed_bytes = factory._raw_record

149

rec, contents = \

150

self._data._parse_record_unchecked(annotated_compressed_bytes)

151

content = self._annotate_factory.parse_fulltext(contents, rec[1])

152

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

153

return bytes

154

155

156

class DeltaAnnotatedToUnannotated(KnitAdapter):

157

"""An adapter for deltas from annotated to unannotated."""

158

159

def get_bytes(self, factory):

160

annotated_compressed_bytes = factory._raw_record

161

rec, contents = \

162

self._data._parse_record_unchecked(annotated_compressed_bytes)

163

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

164

plain=True)

165

contents = self._plain_factory.lower_line_delta(delta)

166

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

167

return bytes

168

169

170

class FTAnnotatedToFullText(KnitAdapter):

171

"""An adapter from FT annotated knits to unannotated ones."""

172

173

def get_bytes(self, factory):

174

annotated_compressed_bytes = factory._raw_record

175

rec, contents = \

176

self._data._parse_record_unchecked(annotated_compressed_bytes)

177

content, delta = self._annotate_factory.parse_record(factory.key[-1],

178

contents, factory._build_details, None)

179

return ''.join(content.text())

180

181

182

class DeltaAnnotatedToFullText(KnitAdapter):

183

"""An adapter for deltas from annotated to unannotated."""

184

185

def get_bytes(self, factory):

186

annotated_compressed_bytes = factory._raw_record

187

rec, contents = \

188

self._data._parse_record_unchecked(annotated_compressed_bytes)

189

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

190

plain=True)

191

compression_parent = factory.parents[0]

192

basis_entry = self._basis_vf.get_record_stream(

193

[compression_parent], 'unordered', True).next()

194

if basis_entry.storage_kind == 'absent':

195

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

196

basis_chunks = basis_entry.get_bytes_as('chunked')

197

basis_lines = osutils.chunks_to_lines(basis_chunks)

198

# Manually apply the delta because we have one annotated content and

199

# one plain.

200

basis_content = PlainKnitContent(basis_lines, compression_parent)

201

basis_content.apply_delta(delta, rec[1])

202

basis_content._should_strip_eol = factory._build_details[1]

203

return ''.join(basis_content.text())

204

205

206

class FTPlainToFullText(KnitAdapter):

207

"""An adapter from FT plain knits to unannotated ones."""

208

209

def get_bytes(self, factory):

210

compressed_bytes = factory._raw_record

211

rec, contents = \

212

self._data._parse_record_unchecked(compressed_bytes)

213

content, delta = self._plain_factory.parse_record(factory.key[-1],

214

contents, factory._build_details, None)

215

return ''.join(content.text())

216

217

218

class DeltaPlainToFullText(KnitAdapter):

219

"""An adapter for deltas from annotated to unannotated."""

220

221

def get_bytes(self, factory):

222

compressed_bytes = factory._raw_record

223

rec, contents = \

224

self._data._parse_record_unchecked(compressed_bytes)

225

delta = self._plain_factory.parse_line_delta(contents, rec[1])

226

compression_parent = factory.parents[0]

227

# XXX: string splitting overhead.

228

basis_entry = self._basis_vf.get_record_stream(

229

[compression_parent], 'unordered', True).next()

230

if basis_entry.storage_kind == 'absent':

231

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

232

basis_chunks = basis_entry.get_bytes_as('chunked')

233

basis_lines = osutils.chunks_to_lines(basis_chunks)

234

basis_content = PlainKnitContent(basis_lines, compression_parent)

235

# Manually apply the delta because we have one annotated content and

236

# one plain.

237

content, _ = self._plain_factory.parse_record(rec[1], contents,

238

factory._build_details, basis_content)

239

return ''.join(content.text())

240

241

242

class KnitContentFactory(ContentFactory):

243

"""Content factory for streaming from knits.

244

245

:seealso ContentFactory:

246

"""

247

248

def __init__(self, key, parents, build_details, sha1, raw_record,

249

annotated, knit=None, network_bytes=None):

250

"""Create a KnitContentFactory for key.

251

252

:param key: The key.

253

:param parents: The parents.

254

:param build_details: The build details as returned from

255

get_build_details.

256

:param sha1: The sha1 expected from the full text of this object.

257

:param raw_record: The bytes of the knit data from disk.

258

:param annotated: True if the raw data is annotated.

259

:param network_bytes: None to calculate the network bytes on demand,

260

not-none if they are already known.

261

"""

262

ContentFactory.__init__(self)

263

self.sha1 = sha1

264

self.key = key

265

self.parents = parents

266

if build_details[0] == 'line-delta':

267

kind = 'delta'

268

else:

269

kind = 'ft'

270

if annotated:

271

annotated_kind = 'annotated-'

272

else:

273

annotated_kind = ''

274

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

275

self._raw_record = raw_record

276

self._network_bytes = network_bytes

277

self._build_details = build_details

278

self._knit = knit

279

280

def _create_network_bytes(self):

281

"""Create a fully serialised network version for transmission."""

282

# storage_kind, key, parents, Noeol, raw_record

283

key_bytes = '\x00'.join(self.key)

284

if self.parents is None:

285

parent_bytes = 'None:'

286

else:

287

parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents)

288

if self._build_details[1]:

289

noeol = 'N'

290

else:

291

noeol = ' '

292

network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes,

293

parent_bytes, noeol, self._raw_record)

294

self._network_bytes = network_bytes

295

296

def get_bytes_as(self, storage_kind):

297

if storage_kind == self.storage_kind:

298

if self._network_bytes is None:

299

self._create_network_bytes()

300

return self._network_bytes

301

if self._knit is not None:

302

if storage_kind == 'chunked':

303

return self._knit.get_lines(self.key[0])

304

elif storage_kind == 'fulltext':

305

return self._knit.get_text(self.key[0])

306

raise errors.UnavailableRepresentation(self.key, storage_kind,

307

self.storage_kind)

308

309

310

class LazyKnitContentFactory(ContentFactory):

311

"""A ContentFactory which can either generate full text or a wire form.

312

313

:seealso ContentFactory:

314

"""

315

316

def __init__(self, key, parents, generator, first):

317

"""Create a LazyKnitContentFactory.

318

319

:param key: The key of the record.

320

:param parents: The parents of the record.

321

:param generator: A _ContentMapGenerator containing the record for this

322

key.

323

:param first: Is this the first content object returned from generator?

324

if it is, its storage kind is knit-delta-closure, otherwise it is

325

knit-delta-closure-ref

326

"""

327

self.key = key

328

self.parents = parents

329

self.sha1 = None

330

self._generator = generator

331

self.storage_kind = "knit-delta-closure"

332

if not first:

333

self.storage_kind = self.storage_kind + "-ref"

334

self._first = first

335

336

def get_bytes_as(self, storage_kind):

337

if storage_kind == self.storage_kind:

338

if self._first:

339

return self._generator._wire_bytes()

340

else:

341

# all the keys etc are contained in the bytes returned in the

342

# first record.

343

return ''

344

if storage_kind in ('chunked', 'fulltext'):

345

chunks = self._generator._get_one_work(self.key).text()

346

if storage_kind == 'chunked':

347

return chunks

348

else:

349

return ''.join(chunks)

350

raise errors.UnavailableRepresentation(self.key, storage_kind,

351

self.storage_kind)

352

353

354

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

355

"""Convert a network record to a iterator over stream records.

356

357

:param storage_kind: The storage kind of the record.

358

Must be 'knit-delta-closure'.

359

:param bytes: The bytes of the record on the network.

360

"""

361

generator = _NetworkContentMapGenerator(bytes, line_end)

362

return generator.get_record_stream()

363

364

365

def knit_network_to_record(storage_kind, bytes, line_end):

366

"""Convert a network record to a record object.

367

368

:param storage_kind: The storage kind of the record.

369

:param bytes: The bytes of the record on the network.

370

"""

371

start = line_end

372

line_end = bytes.find('\n', start)

373

key = tuple(bytes[start:line_end].split('\x00'))

374

start = line_end + 1

375

line_end = bytes.find('\n', start)

376

parent_line = bytes[start:line_end]

377

if parent_line == 'None:':

378

parents = None

379

else:

380

parents = tuple(

381

[tuple(segment.split('\x00')) for segment in parent_line.split('\t')

382

if segment])

383

start = line_end + 1

384

noeol = bytes[start] == 'N'

385

if 'ft' in storage_kind:

386

method = 'fulltext'

387

else:

388

method = 'line-delta'

389

build_details = (method, noeol)

390

start = start + 1

391

raw_record = bytes[start:]

392

annotated = 'annotated' in storage_kind

393

return [KnitContentFactory(key, parents, build_details, None, raw_record,

394

annotated, network_bytes=bytes)]

395

396

397

class KnitContent(object):

398

"""Content of a knit version to which deltas can be applied.

399

400

This is always stored in memory as a list of lines with \n at the end,

401

plus a flag saying if the final ending is really there or not, because that

402

corresponds to the on-disk knit representation.

403

"""

404

405

def __init__(self):

406

self._should_strip_eol = False

407

408

def apply_delta(self, delta, new_version_id):

409

"""Apply delta to this object to become new_version_id."""

410

raise NotImplementedError(self.apply_delta)

411

412

def line_delta_iter(self, new_lines):

413

"""Generate line-based delta from this content to new_lines."""

414

new_texts = new_lines.text()

415

old_texts = self.text()

416

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

417

for tag, i1, i2, j1, j2 in s.get_opcodes():

418

if tag == 'equal':

419

continue

420

# ofrom, oto, length, data

421

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

422

423

def line_delta(self, new_lines):

424

return list(self.line_delta_iter(new_lines))

425

426

@staticmethod

427

def get_line_delta_blocks(knit_delta, source, target):

428

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

429

target_len = len(target)

430

s_pos = 0

431

t_pos = 0

432

for s_begin, s_end, t_len, new_text in knit_delta:

433

true_n = s_begin - s_pos

434

n = true_n

435

if n > 0:

436

# knit deltas do not provide reliable info about whether the

437

# last line of a file matches, due to eol handling.

438

if source[s_pos + n -1] != target[t_pos + n -1]:

439

n-=1

440

if n > 0:

441

yield s_pos, t_pos, n

442

t_pos += t_len + true_n

443

s_pos = s_end

444

n = target_len - t_pos

445

if n > 0:

446

if source[s_pos + n -1] != target[t_pos + n -1]:

447

n-=1

448

if n > 0:

449

yield s_pos, t_pos, n

450

yield s_pos + (target_len - t_pos), target_len, 0

451

452

453

class AnnotatedKnitContent(KnitContent):

454

"""Annotated content."""

455

456

def __init__(self, lines):

457

KnitContent.__init__(self)

458

self._lines = lines

459

460

def annotate(self):

461

"""Return a list of (origin, text) for each content line."""

462

lines = self._lines[:]

463

if self._should_strip_eol:

464

origin, last_line = lines[-1]

465

lines[-1] = (origin, last_line.rstrip('\n'))

466

return lines

467

468

def apply_delta(self, delta, new_version_id):

469

"""Apply delta to this object to become new_version_id."""

470

offset = 0

471

lines = self._lines

472

for start, end, count, delta_lines in delta:

473

lines[offset+start:offset+end] = delta_lines

474

offset = offset + (start - end) + count

475

476

def text(self):

477

try:

478

lines = [text for origin, text in self._lines]

479

except ValueError, e:

480

# most commonly (only?) caused by the internal form of the knit

481

# missing annotation information because of a bug - see thread

482

# around 20071015

483

raise KnitCorrupt(self,

484

"line in annotated knit missing annotation information: %s"

485

% (e,))

486

if self._should_strip_eol:

487

lines[-1] = lines[-1].rstrip('\n')

488

return lines

489

490

def copy(self):

491

return AnnotatedKnitContent(self._lines[:])

492

493

494

class PlainKnitContent(KnitContent):

495

"""Unannotated content.

496

497

When annotate[_iter] is called on this content, the same version is reported

498

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

499

objects.

500

"""

501

502

def __init__(self, lines, version_id):

503

KnitContent.__init__(self)

504

self._lines = lines

505

self._version_id = version_id

506

507

def annotate(self):

508

"""Return a list of (origin, text) for each content line."""

509

return [(self._version_id, line) for line in self._lines]

510

511

def apply_delta(self, delta, new_version_id):

512

"""Apply delta to this object to become new_version_id."""

513

offset = 0

514

lines = self._lines

515

for start, end, count, delta_lines in delta:

516

lines[offset+start:offset+end] = delta_lines

517

offset = offset + (start - end) + count

518

self._version_id = new_version_id

519

520

def copy(self):

521

return PlainKnitContent(self._lines[:], self._version_id)

522

523

def text(self):

524

lines = self._lines

525

if self._should_strip_eol:

526

lines = lines[:]

527

lines[-1] = lines[-1].rstrip('\n')

528

return lines

529

530

531

class _KnitFactory(object):

532

"""Base class for common Factory functions."""

533

534

def parse_record(self, version_id, record, record_details,

535

base_content, copy_base_content=True):

536

"""Parse a record into a full content object.

537

538

:param version_id: The official version id for this content

539

:param record: The data returned by read_records_iter()

540

:param record_details: Details about the record returned by

541

get_build_details

542

:param base_content: If get_build_details returns a compression_parent,

543

you must return a base_content here, else use None

544

:param copy_base_content: When building from the base_content, decide

545

you can either copy it and return a new object, or modify it in

546

place.

547

:return: (content, delta) A Content object and possibly a line-delta,

548

delta may be None

549

"""

550

method, noeol = record_details

551

if method == 'line-delta':

552

if copy_base_content:

553

content = base_content.copy()

554

else:

555

content = base_content

556

delta = self.parse_line_delta(record, version_id)

557

content.apply_delta(delta, version_id)

558

else:

559

content = self.parse_fulltext(record, version_id)

560

delta = None

561

content._should_strip_eol = noeol

562

return (content, delta)

563

564

565

class KnitAnnotateFactory(_KnitFactory):

566

"""Factory for creating annotated Content objects."""

567

568

annotated = True

569

570

def make(self, lines, version_id):

571

num_lines = len(lines)

572

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

573

574

def parse_fulltext(self, content, version_id):

575

"""Convert fulltext to internal representation

576

577

fulltext content is of the format

578

revid(utf8) plaintext\n

579

internal representation is of the format:

580

(revid, plaintext)

581

"""

582

# TODO: jam 20070209 The tests expect this to be returned as tuples,

583

# but the code itself doesn't really depend on that.

584

# Figure out a way to not require the overhead of turning the

585

# list back into tuples.

586

lines = [tuple(line.split(' ', 1)) for line in content]

587

return AnnotatedKnitContent(lines)

588

589

def parse_line_delta_iter(self, lines):

590

return iter(self.parse_line_delta(lines))

591

592

def parse_line_delta(self, lines, version_id, plain=False):

593

"""Convert a line based delta into internal representation.

594

595

line delta is in the form of:

596

intstart intend intcount

597

1..count lines:

598

revid(utf8) newline\n

599

internal representation is

600

(start, end, count, [1..count tuples (revid, newline)])

601

602

:param plain: If True, the lines are returned as a plain

603

list without annotations, not as a list of (origin, content) tuples, i.e.

604

(start, end, count, [1..count newline])

605

"""

606

result = []

607

lines = iter(lines)

608

next = lines.next

609

610

cache = {}

611

def cache_and_return(line):

612

origin, text = line.split(' ', 1)

613

return cache.setdefault(origin, origin), text

614

615

# walk through the lines parsing.

616

# Note that the plain test is explicitly pulled out of the

617

# loop to minimise any performance impact

618

if plain:

619

for header in lines:

620

start, end, count = [int(n) for n in header.split(',')]

621

contents = [next().split(' ', 1)[1] for i in xrange(count)]

622

result.append((start, end, count, contents))

623

else:

624

for header in lines:

625

start, end, count = [int(n) for n in header.split(',')]

626

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

627

result.append((start, end, count, contents))

628

return result

629

630

def get_fulltext_content(self, lines):

631

"""Extract just the content lines from a fulltext."""

632

return (line.split(' ', 1)[1] for line in lines)

633

634

def get_linedelta_content(self, lines):

635

"""Extract just the content from a line delta.

636

637

This doesn't return all of the extra information stored in a delta.

638

Only the actual content lines.

639

"""

640

lines = iter(lines)

641

next = lines.next

642

for header in lines:

643

header = header.split(',')

644

count = int(header[2])

645

for i in xrange(count):

646

origin, text = next().split(' ', 1)

647

yield text

648

649

def lower_fulltext(self, content):

650

"""convert a fulltext content record into a serializable form.

651

652

see parse_fulltext which this inverts.

653

"""

654

# TODO: jam 20070209 We only do the caching thing to make sure that

655

# the origin is a valid utf-8 line, eventually we could remove it

656

return ['%s %s' % (o, t) for o, t in content._lines]

657

658

def lower_line_delta(self, delta):

659

"""convert a delta into a serializable form.

660

661

See parse_line_delta which this inverts.

662

"""

663

# TODO: jam 20070209 We only do the caching thing to make sure that

664

# the origin is a valid utf-8 line, eventually we could remove it

665

out = []

666

for start, end, c, lines in delta:

667

out.append('%d,%d,%d\n' % (start, end, c))

668

out.extend(origin + ' ' + text

669

for origin, text in lines)

670

return out

671

672

def annotate(self, knit, key):

673

content = knit._get_content(key)

674

# adjust for the fact that serialised annotations are only key suffixes

675

# for this factory.

676

if type(key) == tuple:

677

prefix = key[:-1]

678

origins = content.annotate()

679

result = []

680

for origin, line in origins:

681

result.append((prefix + (origin,), line))

682

return result

683

else:

684

# XXX: This smells a bit. Why would key ever be a non-tuple here?

685

# Aren't keys defined to be tuples? -- spiv 20080618

686

return content.annotate()

687

688

689

class KnitPlainFactory(_KnitFactory):

690

"""Factory for creating plain Content objects."""

691

692

annotated = False

693

694

def make(self, lines, version_id):

695

return PlainKnitContent(lines, version_id)

696

697

def parse_fulltext(self, content, version_id):

698

"""This parses an unannotated fulltext.

699

700

Note that this is not a noop - the internal representation

701

has (versionid, line) - its just a constant versionid.

702

"""

703

return self.make(content, version_id)

704

705

def parse_line_delta_iter(self, lines, version_id):

706

cur = 0

707

num_lines = len(lines)

708

while cur < num_lines:

709

header = lines[cur]

710

cur += 1

711

start, end, c = [int(n) for n in header.split(',')]

712

yield start, end, c, lines[cur:cur+c]

713

cur += c

714

715

def parse_line_delta(self, lines, version_id):

716

return list(self.parse_line_delta_iter(lines, version_id))

717

718

def get_fulltext_content(self, lines):

719

"""Extract just the content lines from a fulltext."""

720

return iter(lines)

721

722

def get_linedelta_content(self, lines):

723

"""Extract just the content from a line delta.

724

725

This doesn't return all of the extra information stored in a delta.

726

Only the actual content lines.

727

"""

728

lines = iter(lines)

729

next = lines.next

730

for header in lines:

731

header = header.split(',')

732

count = int(header[2])

733

for i in xrange(count):

734

yield next()

735

736

def lower_fulltext(self, content):

737

return content.text()

738

739

def lower_line_delta(self, delta):

740

out = []

741

for start, end, c, lines in delta:

742

out.append('%d,%d,%d\n' % (start, end, c))

743

out.extend(lines)

744

return out

745

746

def annotate(self, knit, key):

747

annotator = _KnitAnnotator(knit)

748

return annotator.annotate(key)

749

750

751

752

def make_file_factory(annotated, mapper):

753

"""Create a factory for creating a file based KnitVersionedFiles.

754

755

This is only functional enough to run interface tests, it doesn't try to

756

provide a full pack environment.

757

758

:param annotated: knit annotations are wanted.

759

:param mapper: The mapper from keys to paths.

760

"""

761

def factory(transport):

762

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

763

access = _KnitKeyAccess(transport, mapper)

764

return KnitVersionedFiles(index, access, annotated=annotated)

765

return factory

766

767

768

def make_pack_factory(graph, delta, keylength):

769

"""Create a factory for creating a pack based VersionedFiles.

770

771

This is only functional enough to run interface tests, it doesn't try to

772

provide a full pack environment.

773

774

:param graph: Store a graph.

775

:param delta: Delta compress contents.

776

:param keylength: How long should keys be.

777

"""

778

def factory(transport):

779

parents = graph or delta

780

ref_length = 0

781

if graph:

782

ref_length += 1

783

if delta:

784

ref_length += 1

785

max_delta_chain = 200

786

else:

787

max_delta_chain = 0

788

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

789

key_elements=keylength)

790

stream = transport.open_write_stream('newpack')

791

writer = pack.ContainerWriter(stream.write)

792

writer.begin()

793

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

794

deltas=delta, add_callback=graph_index.add_nodes)

795

access = _DirectPackAccess({})

796

access.set_writer(writer, graph_index, (transport, 'newpack'))

797

result = KnitVersionedFiles(index, access,

798

max_delta_chain=max_delta_chain)

799

result.stream = stream

800

result.writer = writer

801

return result

802

return factory

803

804

805

def cleanup_pack_knit(versioned_files):

806

versioned_files.stream.close()

807

versioned_files.writer.end()

808

809

810

def _get_total_build_size(self, keys, positions):

811

"""Determine the total bytes to build these keys.

812

813

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

814

don't inherit from a common base.)

815

816

:param keys: Keys that we want to build

817

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

818

as returned by _get_components_positions)

819

:return: Number of bytes to build those keys

820

"""

821

all_build_index_memos = {}

822

build_keys = keys

823

while build_keys:

824

next_keys = set()

825

for key in build_keys:

826

# This is mostly for the 'stacked' case

827

# Where we will be getting the data from a fallback

828

if key not in positions:

829

continue

830

_, index_memo, compression_parent = positions[key]

831

all_build_index_memos[key] = index_memo

832

if compression_parent not in all_build_index_memos:

833

next_keys.add(compression_parent)

834

build_keys = next_keys

835

return sum([index_memo[2] for index_memo

836

in all_build_index_memos.itervalues()])

837

838

839

class KnitVersionedFiles(VersionedFiles):

840

"""Storage for many versioned files using knit compression.

841

842

Backend storage is managed by indices and data objects.

843

844

:ivar _index: A _KnitGraphIndex or similar that can describe the

845

parents, graph, compression and data location of entries in this

846

KnitVersionedFiles. Note that this is only the index for

847

*this* vfs; if there are fallbacks they must be queried separately.

848

"""

849

850

def __init__(self, index, data_access, max_delta_chain=200,

851

annotated=False, reload_func=None):

852

"""Create a KnitVersionedFiles with index and data_access.

853

854

:param index: The index for the knit data.

855

:param data_access: The access object to store and retrieve knit

856

records.

857

:param max_delta_chain: The maximum number of deltas to permit during

858

insertion. Set to 0 to prohibit the use of deltas.

859

:param annotated: Set to True to cause annotations to be calculated and

860

stored during insertion.

861

:param reload_func: An function that can be called if we think we need

862

to reload the pack listing and try again. See

863

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

864

"""

865

self._index = index

866

self._access = data_access

867

self._max_delta_chain = max_delta_chain

868

if annotated:

869

self._factory = KnitAnnotateFactory()

870

else:

871

self._factory = KnitPlainFactory()

872

self._fallback_vfs = []

873

self._reload_func = reload_func

874

875

def __repr__(self):

876

return "%s(%r, %r)" % (

877

self.__class__.__name__,

878

self._index,

879

self._access)

880

881

def add_fallback_versioned_files(self, a_versioned_files):

882

"""Add a source of texts for texts not present in this knit.

883

884

:param a_versioned_files: A VersionedFiles object.

885

"""

886

self._fallback_vfs.append(a_versioned_files)

887

888

def add_lines(self, key, parents, lines, parent_texts=None,

889

left_matching_blocks=None, nostore_sha=None, random_id=False,

890

check_content=True):

891

"""See VersionedFiles.add_lines()."""

892

self._index._check_write_ok()

893

self._check_add(key, lines, random_id, check_content)

894

if parents is None:

895

# The caller might pass None if there is no graph data, but kndx

896

# indexes can't directly store that, so we give them

897

# an empty tuple instead.

898

parents = ()

899

return self._add(key, lines, parents,

900

parent_texts, left_matching_blocks, nostore_sha, random_id)

901

902

def _add(self, key, lines, parents, parent_texts,

903

left_matching_blocks, nostore_sha, random_id):

904

"""Add a set of lines on top of version specified by parents.

905

906

Any versions not present will be converted into ghosts.

907

"""

908

# first thing, if the content is something we don't need to store, find

909

# that out.

910

line_bytes = ''.join(lines)

911

digest = sha_string(line_bytes)

912

if nostore_sha == digest:

913

raise errors.ExistingContent

914

915

present_parents = []

916

if parent_texts is None:

917

parent_texts = {}

918

# Do a single query to ascertain parent presence; we only compress

919

# against parents in the same kvf.

920

present_parent_map = self._index.get_parent_map(parents)

921

for parent in parents:

922

if parent in present_parent_map:

923

present_parents.append(parent)

924

925

# Currently we can only compress against the left most present parent.

926

if (len(present_parents) == 0 or

927

present_parents[0] != parents[0]):

928

delta = False

929

else:

930

# To speed the extract of texts the delta chain is limited

931

# to a fixed number of deltas. This should minimize both

932

# I/O and the time spend applying deltas.

933

delta = self._check_should_delta(present_parents[0])

934

935

text_length = len(line_bytes)

936

options = []

937

if lines:

938

if lines[-1][-1] != '\n':

939

# copy the contents of lines.

940

lines = lines[:]

941

options.append('no-eol')

942

lines[-1] = lines[-1] + '\n'

943

line_bytes += '\n'

944

945

for element in key:

946

if type(element) != str:

947

raise TypeError("key contains non-strings: %r" % (key,))

948

# Knit hunks are still last-element only

949

version_id = key[-1]

950

content = self._factory.make(lines, version_id)

951

if 'no-eol' in options:

952

# Hint to the content object that its text() call should strip the

953

# EOL.

954

content._should_strip_eol = True

955

if delta or (self._factory.annotated and len(present_parents) > 0):

956

# Merge annotations from parent texts if needed.

957

delta_hunks = self._merge_annotations(content, present_parents,

958

parent_texts, delta, self._factory.annotated,

959

left_matching_blocks)

960

961

if delta:

962

options.append('line-delta')

963

store_lines = self._factory.lower_line_delta(delta_hunks)

964

size, bytes = self._record_to_data(key, digest,

965

store_lines)

966

else:

967

options.append('fulltext')

968

# isinstance is slower and we have no hierarchy.

969

if self._factory.__class__ == KnitPlainFactory:

970

# Use the already joined bytes saving iteration time in

971

# _record_to_data.

972

size, bytes = self._record_to_data(key, digest,

973

lines, [line_bytes])

974

else:

975

# get mixed annotation + content and feed it into the

976

# serialiser.

977

store_lines = self._factory.lower_fulltext(content)

978

size, bytes = self._record_to_data(key, digest,

979

store_lines)

980

981

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

982

self._index.add_records(

983

((key, options, access_memo, parents),),

984

random_id=random_id)

985

return digest, text_length, content

986

987

def annotate(self, key):

988

"""See VersionedFiles.annotate."""

989

return self._factory.annotate(self, key)

990

991

def check(self, progress_bar=None):

992

"""See VersionedFiles.check()."""

993

# This doesn't actually test extraction of everything, but that will

994

# impact 'bzr check' substantially, and needs to be integrated with

995

# care. However, it does check for the obvious problem of a delta with

996

# no basis.

997

keys = self._index.keys()

998

parent_map = self.get_parent_map(keys)

999

for key in keys:

1000

if self._index.get_method(key) != 'fulltext':

1001

compression_parent = parent_map[key][0]

1002

if compression_parent not in parent_map:

1003

raise errors.KnitCorrupt(self,

1004

"Missing basis parent %s for %s" % (

1005

compression_parent, key))

1006

for fallback_vfs in self._fallback_vfs:

1007

fallback_vfs.check()

1008

1009

def _check_add(self, key, lines, random_id, check_content):

1010

"""check that version_id and lines are safe to add."""

1011

version_id = key[-1]

1012

if contains_whitespace(version_id):

1013

raise InvalidRevisionId(version_id, self)

1014

self.check_not_reserved_id(version_id)

1015

# TODO: If random_id==False and the key is already present, we should

1016

# probably check that the existing content is identical to what is

1017

# being inserted, and otherwise raise an exception. This would make

1018

# the bundle code simpler.

1019

if check_content:

1020

self._check_lines_not_unicode(lines)

1021

self._check_lines_are_lines(lines)

1022

1023

def _check_header(self, key, line):

1024

rec = self._split_header(line)

1025

self._check_header_version(rec, key[-1])

1026

return rec

1027

1028

def _check_header_version(self, rec, version_id):

1029

"""Checks the header version on original format knit records.

1030

1031

These have the last component of the key embedded in the record.

1032

"""

1033

if rec[1] != version_id:

1034

raise KnitCorrupt(self,

1035

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

1036

1037

def _check_should_delta(self, parent):

1038

"""Iterate back through the parent listing, looking for a fulltext.

1039

1040

This is used when we want to decide whether to add a delta or a new

1041

fulltext. It searches for _max_delta_chain parents. When it finds a

1042

fulltext parent, it sees if the total size of the deltas leading up to

1043

it is large enough to indicate that we want a new full text anyway.

1044

1045

Return True if we should create a new delta, False if we should use a

1046

full text.

1047

"""

1048

delta_size = 0

1049

fulltext_size = None

1050

for count in xrange(self._max_delta_chain):

1051

try:

1052

# Note that this only looks in the index of this particular

1053

# KnitVersionedFiles, not in the fallbacks. This ensures that

1054

# we won't store a delta spanning physical repository

1055

# boundaries.

1056

build_details = self._index.get_build_details([parent])

1057

parent_details = build_details[parent]

1058

except (RevisionNotPresent, KeyError), e:

1059

# Some basis is not locally present: always fulltext

1060

return False

1061

index_memo, compression_parent, _, _ = parent_details

1062

_, _, size = index_memo

1063

if compression_parent is None:

1064

fulltext_size = size

1065

break

1066

delta_size += size

1067

# We don't explicitly check for presence because this is in an

1068

# inner loop, and if it's missing it'll fail anyhow.

1069

parent = compression_parent

1070

else:

1071

# We couldn't find a fulltext, so we must create a new one

1072

return False

1073

# Simple heuristic - if the total I/O wold be greater as a delta than

1074

# the originally installed fulltext, we create a new fulltext.

1075

return fulltext_size > delta_size

1076

1077

def _build_details_to_components(self, build_details):

1078

"""Convert a build_details tuple to a position tuple."""

1079

# record_details, access_memo, compression_parent

1080

return build_details[3], build_details[0], build_details[1]

1081

1082

def _get_components_positions(self, keys, allow_missing=False):

1083

"""Produce a map of position data for the components of keys.

1084

1085

This data is intended to be used for retrieving the knit records.

1086

1087

A dict of key to (record_details, index_memo, next, parents) is

1088

returned.

1089

method is the way referenced data should be applied.

1090

index_memo is the handle to pass to the data access to actually get the

1091

data

1092

next is the build-parent of the version, or None for fulltexts.

1093

parents is the version_ids of the parents of this version

1094

1095

:param allow_missing: If True do not raise an error on a missing component,

1096

just ignore it.

1097

"""

1098

component_data = {}

1099

pending_components = keys

1100

while pending_components:

1101

build_details = self._index.get_build_details(pending_components)

1102

current_components = set(pending_components)

1103

pending_components = set()

1104

for key, details in build_details.iteritems():

1105

(index_memo, compression_parent, parents,

1106

record_details) = details

1107

method = record_details[0]

1108

if compression_parent is not None:

1109

pending_components.add(compression_parent)

1110

component_data[key] = self._build_details_to_components(details)

1111

missing = current_components.difference(build_details)

1112

if missing and not allow_missing:

1113

raise errors.RevisionNotPresent(missing.pop(), self)

1114

return component_data

1115

1116

def _get_content(self, key, parent_texts={}):

1117

"""Returns a content object that makes up the specified

1118

version."""

1119

cached_version = parent_texts.get(key, None)

1120

if cached_version is not None:

1121

# Ensure the cache dict is valid.

1122

if not self.get_parent_map([key]):

1123

raise RevisionNotPresent(key, self)

1124

return cached_version

1125

generator = _VFContentMapGenerator(self, [key])

1126

return generator._get_content(key)

1127

1128

def get_parent_map(self, keys):

1129

"""Get a map of the graph parents of keys.

1130

1131

:param keys: The keys to look up parents for.

1132

:return: A mapping from keys to parents. Absent keys are absent from

1133

the mapping.

1134

"""

1135

return self._get_parent_map_with_sources(keys)[0]

1136

1137

def _get_parent_map_with_sources(self, keys):

1138

"""Get a map of the parents of keys.

1139

1140

:param keys: The keys to look up parents for.

1141

:return: A tuple. The first element is a mapping from keys to parents.

1142

Absent keys are absent from the mapping. The second element is a

1143

list with the locations each key was found in. The first element

1144

is the in-this-knit parents, the second the first fallback source,

1145

and so on.

1146

"""

1147

result = {}

1148

sources = [self._index] + self._fallback_vfs

1149

source_results = []

1150

missing = set(keys)

1151

for source in sources:

1152

if not missing:

1153

break

1154

new_result = source.get_parent_map(missing)

1155

source_results.append(new_result)

1156

result.update(new_result)

1157

missing.difference_update(set(new_result))

1158

return result, source_results

1159

1160

def _get_record_map(self, keys, allow_missing=False):

1161

"""Produce a dictionary of knit records.

1162

1163

:return: {key:(record, record_details, digest, next)}

1164

record

1165

data returned from read_records (a KnitContentobject)

1166

record_details

1167

opaque information to pass to parse_record

1168

digest

1169

SHA1 digest of the full text after all steps are done

1170

1171

build-parent of the version, i.e. the leftmost ancestor.

1172

Will be None if the record is not a delta.

1173

:param keys: The keys to build a map for

1174

:param allow_missing: If some records are missing, rather than

1175

error, just return the data that could be generated.

1176

"""

1177

raw_map = self._get_record_map_unparsed(keys,

1178

allow_missing=allow_missing)

1179

return self._raw_map_to_record_map(raw_map)

1180

1181

def _raw_map_to_record_map(self, raw_map):

1182

"""Parse the contents of _get_record_map_unparsed.

1183

1184

:return: see _get_record_map.

1185

"""

1186

result = {}

1187

for key in raw_map:

1188

data, record_details, next = raw_map[key]

1189

content, digest = self._parse_record(key[-1], data)

1190

result[key] = content, record_details, digest, next

1191

return result

1192

1193

def _get_record_map_unparsed(self, keys, allow_missing=False):

1194

"""Get the raw data for reconstructing keys without parsing it.

1195

1196

:return: A dict suitable for parsing via _raw_map_to_record_map.

1197

key-> raw_bytes, (method, noeol), compression_parent

1198

"""

1199

# This retries the whole request if anything fails. Potentially we

1200

# could be a bit more selective. We could track the keys whose records

1201

# we have successfully found, and then only request the new records

1202

# from there. However, _get_components_positions grabs the whole build

1203

# chain, which means we'll likely try to grab the same records again

1204

# anyway. Also, can the build chains change as part of a pack

1205

# operation? We wouldn't want to end up with a broken chain.

1206

while True:

1207

try:

1208

position_map = self._get_components_positions(keys,

1209

allow_missing=allow_missing)

1210

# key = component_id, r = record_details, i_m = index_memo,

1211

# n = next

1212

records = [(key, i_m) for key, (r, i_m, n)

1213

in position_map.iteritems()]

1214

# Sort by the index memo, so that we request records from the

1215

# same pack file together, and in forward-sorted order

1216

records.sort(key=operator.itemgetter(1))

1217

raw_record_map = {}

1218

for key, data in self._read_records_iter_unchecked(records):

1219

(record_details, index_memo, next) = position_map[key]

1220

raw_record_map[key] = data, record_details, next

1221

return raw_record_map

1222

except errors.RetryWithNewPacks, e:

1223

self._access.reload_or_raise(e)

1224

1225

@classmethod

1226

def _split_by_prefix(cls, keys):

1227

"""For the given keys, split them up based on their prefix.

1228

1229

To keep memory pressure somewhat under control, split the

1230

requests back into per-file-id requests, otherwise "bzr co"

1231

extracts the full tree into memory before writing it to disk.

1232

This should be revisited if _get_content_maps() can ever cross

1233

file-id boundaries.

1234

1235

The keys for a given file_id are kept in the same relative order.

1236

Ordering between file_ids is not, though prefix_order will return the

1237

order that the key was first seen.

1238

1239

:param keys: An iterable of key tuples

1240

:return: (split_map, prefix_order)

1241

split_map A dictionary mapping prefix => keys

1242

prefix_order The order that we saw the various prefixes

1243

"""

1244

split_by_prefix = {}

1245

prefix_order = []

1246

for key in keys:

1247

if len(key) == 1:

1248

prefix = ''

1249

else:

1250

prefix = key[0]

1251

1252

if prefix in split_by_prefix:

1253

split_by_prefix[prefix].append(key)

1254

else:

1255

split_by_prefix[prefix] = [key]

1256

prefix_order.append(prefix)

1257

return split_by_prefix, prefix_order

1258

1259

def _group_keys_for_io(self, keys, non_local_keys, positions,

1260

_min_buffer_size=_STREAM_MIN_BUFFER_SIZE):

1261

"""For the given keys, group them into 'best-sized' requests.

1262

1263

The idea is to avoid making 1 request per file, but to never try to

1264

unpack an entire 1.5GB source tree in a single pass. Also when

1265

possible, we should try to group requests to the same pack file

1266

together.

1267

1268

:return: list of (keys, non_local) tuples that indicate what keys

1269

should be fetched next.

1270

"""

1271

# TODO: Ideally we would group on 2 factors. We want to extract texts

1272

# from the same pack file together, and we want to extract all

1273

# the texts for a given build-chain together. Ultimately it

1274

# probably needs a better global view.

1275

total_keys = len(keys)

1276

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1277

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1278

cur_keys = []

1279

cur_non_local = set()

1280

cur_size = 0

1281

result = []

1282

sizes = []

1283

for prefix in prefix_order:

1284

keys = prefix_split_keys[prefix]

1285

non_local = prefix_split_non_local_keys.get(prefix, [])

1286

1287

this_size = self._index._get_total_build_size(keys, positions)

1288

cur_size += this_size

1289

cur_keys.extend(keys)

1290

cur_non_local.update(non_local)

1291

if cur_size > _min_buffer_size:

1292

result.append((cur_keys, cur_non_local))

1293

sizes.append(cur_size)

1294

cur_keys = []

1295

cur_non_local = set()

1296

cur_size = 0

1297

if cur_keys:

1298

result.append((cur_keys, cur_non_local))

1299

sizes.append(cur_size)

1300

trace.mutter('Collapsed %d keys into %d requests w/ %d file_ids'

1301

' w/ sizes: %s', total_keys, len(result),

1302

len(prefix_split_keys), sizes)

1303

return result

1304

1305

def get_record_stream(self, keys, ordering, include_delta_closure):

1306

"""Get a stream of records for keys.

1307

1308

:param keys: The keys to include.

1309

:param ordering: Either 'unordered' or 'topological'. A topologically

1310

sorted stream has compression parents strictly before their

1311

children.

1312

:param include_delta_closure: If True then the closure across any

1313

compression parents will be included (in the opaque data).

1314

:return: An iterator of ContentFactory objects, each of which is only

1315

valid until the iterator is advanced.

1316

"""

1317

# keys might be a generator

1318

keys = set(keys)

1319

if not keys:

1320

return

1321

if not self._index.has_graph:

1322

# Cannot topological order when no graph has been stored.

1323

ordering = 'unordered'

1324

1325

remaining_keys = keys

1326

while True:

1327

try:

1328

keys = set(remaining_keys)

1329

for content_factory in self._get_remaining_record_stream(keys,

1330

ordering, include_delta_closure):

1331

remaining_keys.discard(content_factory.key)

1332

yield content_factory

1333

return

1334

except errors.RetryWithNewPacks, e:

1335

self._access.reload_or_raise(e)

1336

1337

def _get_remaining_record_stream(self, keys, ordering,

1338

include_delta_closure):

1339

"""This function is the 'retry' portion for get_record_stream."""

1340

if include_delta_closure:

1341

positions = self._get_components_positions(keys, allow_missing=True)

1342

else:

1343

build_details = self._index.get_build_details(keys)

1344

# map from key to

1345

# (record_details, access_memo, compression_parent_key)

1346

positions = dict((key, self._build_details_to_components(details))

1347

for key, details in build_details.iteritems())

1348

absent_keys = keys.difference(set(positions))

1349

# There may be more absent keys : if we're missing the basis component

1350

# and are trying to include the delta closure.

1351

# XXX: We should not ever need to examine remote sources because we do

1352

# not permit deltas across versioned files boundaries.

1353

if include_delta_closure:

1354

needed_from_fallback = set()

1355

# Build up reconstructable_keys dict. key:True in this dict means

1356

# the key can be reconstructed.

1357

reconstructable_keys = {}

1358

for key in keys:

1359

# the delta chain

1360

try:

1361

chain = [key, positions[key][2]]

1362

except KeyError:

1363

needed_from_fallback.add(key)

1364

continue

1365

result = True

1366

while chain[-1] is not None:

1367

if chain[-1] in reconstructable_keys:

1368

result = reconstructable_keys[chain[-1]]

1369

break

1370

else:

1371

try:

1372

chain.append(positions[chain[-1]][2])

1373

except KeyError:

1374

# missing basis component

1375

needed_from_fallback.add(chain[-1])

1376

result = True

1377

break

1378

for chain_key in chain[:-1]:

1379

reconstructable_keys[chain_key] = result

1380

if not result:

1381

needed_from_fallback.add(key)

1382

# Double index lookups here : need a unified api ?

1383

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1384

if ordering == 'topological':

1385

# Global topological sort

1386

present_keys = tsort.topo_sort(global_map)

1387

# Now group by source:

1388

source_keys = []

1389

current_source = None

1390

for key in present_keys:

1391

for parent_map in parent_maps:

1392

if key in parent_map:

1393

key_source = parent_map

1394

break

1395

if current_source is not key_source:

1396

source_keys.append((key_source, []))

1397

current_source = key_source

1398

source_keys[-1][1].append(key)

1399

else:

1400

if ordering != 'unordered':

1401

raise AssertionError('valid values for ordering are:'

1402

' "unordered" or "topological" not: %r'

1403

% (ordering,))

1404

# Just group by source; remote sources first.

1405

present_keys = []

1406

source_keys = []

1407

for parent_map in reversed(parent_maps):

1408

source_keys.append((parent_map, []))

1409

for key in parent_map:

1410

present_keys.append(key)

1411

source_keys[-1][1].append(key)

1412

# We have been requested to return these records in an order that

1413

# suits us. So we ask the index to give us an optimally sorted

1414

# order.

1415

for source, sub_keys in source_keys:

1416

if source is parent_maps[0]:

1417

# Only sort the keys for this VF

1418

self._index._sort_keys_by_io(sub_keys, positions)

1419

absent_keys = keys - set(global_map)

1420

for key in absent_keys:

1421

yield AbsentContentFactory(key)

1422

# restrict our view to the keys we can answer.

1423

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1424

# XXX: At that point we need to consider the impact of double reads by

1425

# utilising components multiple times.

1426

if include_delta_closure:

1427

# XXX: get_content_maps performs its own index queries; allow state

1428

# to be passed in.

1429

non_local_keys = needed_from_fallback - absent_keys

1430

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1431

non_local_keys,

1432

positions):

1433

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1434

global_map)

1435

for record in generator.get_record_stream():

1436

yield record

1437

else:

1438

for source, keys in source_keys:

1439

if source is parent_maps[0]:

1440

# this KnitVersionedFiles

1441

records = [(key, positions[key][1]) for key in keys]

1442

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1443

(record_details, index_memo, _) = positions[key]

1444

yield KnitContentFactory(key, global_map[key],

1445

record_details, sha1, raw_data, self._factory.annotated, None)

1446

else:

1447

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1448

for record in vf.get_record_stream(keys, ordering,

1449

include_delta_closure):

1450

yield record

1451

1452

def get_sha1s(self, keys):

1453

"""See VersionedFiles.get_sha1s()."""

1454

missing = set(keys)

1455

record_map = self._get_record_map(missing, allow_missing=True)

1456

result = {}

1457

for key, details in record_map.iteritems():

1458

if key not in missing:

1459

continue

1460

# record entry 2 is the 'digest'.

1461

result[key] = details[2]

1462

missing.difference_update(set(result))

1463

for source in self._fallback_vfs:

1464

if not missing:

1465

break

1466

new_result = source.get_sha1s(missing)

1467

result.update(new_result)

1468

missing.difference_update(set(new_result))

1469

return result

1470

1471

def insert_record_stream(self, stream):

1472

"""Insert a record stream into this container.

1473

1474

:param stream: A stream of records to insert.

1475

:return: None

1476

:seealso VersionedFiles.get_record_stream:

1477

"""

1478

def get_adapter(adapter_key):

1479

try:

1480

return adapters[adapter_key]

1481

except KeyError:

1482

adapter_factory = adapter_registry.get(adapter_key)

1483

adapter = adapter_factory(self)

1484

adapters[adapter_key] = adapter

1485

return adapter

1486

delta_types = set()

1487

if self._factory.annotated:

1488

# self is annotated, we need annotated knits to use directly.

1489

annotated = "annotated-"

1490

convertibles = []

1491

else:

1492

# self is not annotated, but we can strip annotations cheaply.

1493

annotated = ""

1494

convertibles = set(["knit-annotated-ft-gz"])

1495

if self._max_delta_chain:

1496

delta_types.add("knit-annotated-delta-gz")

1497

convertibles.add("knit-annotated-delta-gz")

1498

# The set of types we can cheaply adapt without needing basis texts.

1499

native_types = set()

1500

if self._max_delta_chain:

1501

native_types.add("knit-%sdelta-gz" % annotated)

1502

delta_types.add("knit-%sdelta-gz" % annotated)

1503

native_types.add("knit-%sft-gz" % annotated)

1504

knit_types = native_types.union(convertibles)

1505

adapters = {}

1506

# Buffer all index entries that we can't add immediately because their

1507

# basis parent is missing. We don't buffer all because generating

1508

# annotations may require access to some of the new records. However we

1509

# can't generate annotations from new deltas until their basis parent

1510

# is present anyway, so we get away with not needing an index that

1511

# includes the new keys.

1512

1513

# See <http://launchpad.net/bugs/300177> about ordering of compression

1514

# parents in the records - to be conservative, we insist that all

1515

# parents must be present to avoid expanding to a fulltext.

1516

1517

# key = basis_parent, value = index entry to add

1518

buffered_index_entries = {}

1519

for record in stream:

1520

buffered = False

1521

parents = record.parents

1522

if record.storage_kind in delta_types:

1523

# TODO: eventually the record itself should track

1524

# compression_parent

1525

compression_parent = parents[0]

1526

else:

1527

compression_parent = None

1528

# Raise an error when a record is missing.

1529

if record.storage_kind == 'absent':

1530

raise RevisionNotPresent([record.key], self)

1531

elif ((record.storage_kind in knit_types)

1532

and (compression_parent is None

1533

or not self._fallback_vfs

1534

or self._index.has_key(compression_parent)

1535

or not self.has_key(compression_parent))):

1536

# we can insert the knit record literally if either it has no

1537

# compression parent OR we already have its basis in this kvf

1538

# OR the basis is not present even in the fallbacks. In the

1539

# last case it will either turn up later in the stream and all

1540

# will be well, or it won't turn up at all and we'll raise an

1541

# error at the end.

1542

1543

# TODO: self.has_key is somewhat redundant with

1544

# self._index.has_key; we really want something that directly

1545

# asks if it's only present in the fallbacks. -- mbp 20081119

1546

if record.storage_kind not in native_types:

1547

try:

1548

adapter_key = (record.storage_kind, "knit-delta-gz")

1549

adapter = get_adapter(adapter_key)

1550

except KeyError:

1551

adapter_key = (record.storage_kind, "knit-ft-gz")

1552

adapter = get_adapter(adapter_key)

1553

bytes = adapter.get_bytes(record)

1554

else:

1555

# It's a knit record, it has a _raw_record field (even if

1556

# it was reconstituted from a network stream).

1557

bytes = record._raw_record

1558

options = [record._build_details[0]]

1559

if record._build_details[1]:

1560

options.append('no-eol')

1561

# Just blat it across.

1562

# Note: This does end up adding data on duplicate keys. As

1563

# modern repositories use atomic insertions this should not

1564

# lead to excessive growth in the event of interrupted fetches.

1565

# 'knit' repositories may suffer excessive growth, but as a

1566

# deprecated format this is tolerable. It can be fixed if

1567

# needed by in the kndx index support raising on a duplicate

1568

# add with identical parents and options.

1569

access_memo = self._access.add_raw_records(

1570

[(record.key, len(bytes))], bytes)[0]

1571

index_entry = (record.key, options, access_memo, parents)

1572

if 'fulltext' not in options:

1573

# Not a fulltext, so we need to make sure the compression

1574

# parent will also be present.

1575

# Note that pack backed knits don't need to buffer here

1576

# because they buffer all writes to the transaction level,

1577

# but we don't expose that difference at the index level. If

1578

# the query here has sufficient cost to show up in

1579

# profiling we should do that.

1580

1581

# They're required to be physically in this

1582

# KnitVersionedFiles, not in a fallback.

1583

if not self._index.has_key(compression_parent):

1584

pending = buffered_index_entries.setdefault(

1585

compression_parent, [])

1586

pending.append(index_entry)

1587

buffered = True

1588

if not buffered:

1589

self._index.add_records([index_entry])

1590

elif record.storage_kind == 'chunked':

1591

self.add_lines(record.key, parents,

1592

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1593

else:

1594

# Not suitable for direct insertion as a

1595

# delta, either because it's not the right format, or this

1596

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1597

# 0) or because it depends on a base only present in the

1598

# fallback kvfs.

1599

try:

1600

# Try getting a fulltext directly from the record.

1601

bytes = record.get_bytes_as('fulltext')

1602

except errors.UnavailableRepresentation:

1603

adapter_key = record.storage_kind, 'fulltext'

1604

adapter = get_adapter(adapter_key)

1605

bytes = adapter.get_bytes(record)

1606

lines = split_lines(bytes)

1607

try:

1608

self.add_lines(record.key, parents, lines)

1609

except errors.RevisionAlreadyPresent:

1610

pass

1611

# Add any records whose basis parent is now available.

1612

if not buffered:

1613

added_keys = [record.key]

1614

while added_keys:

1615

key = added_keys.pop(0)

1616

if key in buffered_index_entries:

1617

index_entries = buffered_index_entries[key]

1618

self._index.add_records(index_entries)

1619

added_keys.extend(

1620

[index_entry[0] for index_entry in index_entries])

1621

del buffered_index_entries[key]

1622

if buffered_index_entries:

1623

# There were index entries buffered at the end of the stream,

1624

# So these need to be added (if the index supports holding such

1625

# entries for later insertion)

1626

for key in buffered_index_entries:

1627

index_entries = buffered_index_entries[key]

1628

self._index.add_records(index_entries,

1629

missing_compression_parents=True)

1630

1631

def get_missing_compression_parent_keys(self):

1632

"""Return an iterable of keys of missing compression parents.

1633

1634

Check this after calling insert_record_stream to find out if there are

1635

any missing compression parents. If there are, the records that

1636

depend on them are not able to be inserted safely. For atomic

1637

KnitVersionedFiles built on packs, the transaction should be aborted or

1638

suspended - commit will fail at this point. Nonatomic knits will error

1639

earlier because they have no staging area to put pending entries into.

1640

"""

1641

return self._index.get_missing_compression_parents()

1642

1643

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1644

"""Iterate over the lines in the versioned files from keys.

1645

1646

This may return lines from other keys. Each item the returned

1647

iterator yields is a tuple of a line and a text version that that line

1648

is present in (not introduced in).

1649

1650

Ordering of results is in whatever order is most suitable for the

1651

underlying storage format.

1652

1653

If a progress bar is supplied, it may be used to indicate progress.

1654

The caller is responsible for cleaning up progress bars (because this

1655

is an iterator).

1656

1657

NOTES:

1658

* Lines are normalised by the underlying store: they will all have \\n

1659

terminators.

1660

* Lines are returned in arbitrary order.

1661

* If a requested key did not change any lines (or didn't have any

1662

lines), it may not be mentioned at all in the result.

1663

1664

:return: An iterator over (line, key).

1665

"""

1666

if pb is None:

1667

pb = progress.DummyProgress()

1668

keys = set(keys)

1669

total = len(keys)

1670

done = False

1671

while not done:

1672

try:

1673

# we don't care about inclusions, the caller cares.

1674

# but we need to setup a list of records to visit.

1675

# we need key, position, length

1676

key_records = []

1677

build_details = self._index.get_build_details(keys)

1678

for key, details in build_details.iteritems():

1679

if key in keys:

1680

key_records.append((key, details[0]))

1681

records_iter = enumerate(self._read_records_iter(key_records))

1682

for (key_idx, (key, data, sha_value)) in records_iter:

1683

pb.update('Walking content.', key_idx, total)

1684

compression_parent = build_details[key][1]

1685

if compression_parent is None:

1686

# fulltext

1687

line_iterator = self._factory.get_fulltext_content(data)

1688

else:

1689

# Delta

1690

line_iterator = self._factory.get_linedelta_content(data)

1691

# Now that we are yielding the data for this key, remove it

1692

# from the list

1693

keys.remove(key)

1694

# XXX: It might be more efficient to yield (key,

1695

# line_iterator) in the future. However for now, this is a

1696

# simpler change to integrate into the rest of the

1697

# codebase. RBC 20071110

1698

for line in line_iterator:

1699

yield line, key

1700

done = True

1701

except errors.RetryWithNewPacks, e:

1702

self._access.reload_or_raise(e)

1703

# If there are still keys we've not yet found, we look in the fallback

1704

# vfs, and hope to find them there. Note that if the keys are found

1705

# but had no changes or no content, the fallback may not return

1706

# anything.

1707

if keys and not self._fallback_vfs:

1708

# XXX: strictly the second parameter is meant to be the file id

1709

# but it's not easily accessible here.

1710

raise RevisionNotPresent(keys, repr(self))

1711

for source in self._fallback_vfs:

1712

if not keys:

1713

break

1714

source_keys = set()

1715

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1716

source_keys.add(key)

1717

yield line, key

1718

keys.difference_update(source_keys)

1719

pb.update('Walking content.', total, total)

1720

1721

def _make_line_delta(self, delta_seq, new_content):

1722

"""Generate a line delta from delta_seq and new_content."""

1723

diff_hunks = []

1724

for op in delta_seq.get_opcodes():

1725

if op[0] == 'equal':

1726

continue

1727

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1728

return diff_hunks

1729

1730

def _merge_annotations(self, content, parents, parent_texts={},

1731

delta=None, annotated=None,

1732

left_matching_blocks=None):

1733

"""Merge annotations for content and generate deltas.

1734

1735

This is done by comparing the annotations based on changes to the text

1736

and generating a delta on the resulting full texts. If annotations are

1737

not being created then a simple delta is created.

1738

"""

1739

if left_matching_blocks is not None:

1740

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1741

else:

1742

delta_seq = None

1743

if annotated:

1744

for parent_key in parents:

1745

merge_content = self._get_content(parent_key, parent_texts)

1746

if (parent_key == parents[0] and delta_seq is not None):

1747

seq = delta_seq

1748

else:

1749

seq = patiencediff.PatienceSequenceMatcher(

1750

None, merge_content.text(), content.text())

1751

for i, j, n in seq.get_matching_blocks():

1752

if n == 0:

1753

continue

1754

# this copies (origin, text) pairs across to the new

1755

# content for any line that matches the last-checked

1756

# parent.

1757

content._lines[j:j+n] = merge_content._lines[i:i+n]

1758

# XXX: Robert says the following block is a workaround for a

1759

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1760

if content._lines and content._lines[-1][1][-1] != '\n':

1761

# The copied annotation was from a line without a trailing EOL,

1762

# reinstate one for the content object, to ensure correct

1763

# serialization.

1764

line = content._lines[-1][1] + '\n'

1765

content._lines[-1] = (content._lines[-1][0], line)

1766

if delta:

1767

if delta_seq is None:

1768

reference_content = self._get_content(parents[0], parent_texts)

1769

new_texts = content.text()

1770

old_texts = reference_content.text()

1771

delta_seq = patiencediff.PatienceSequenceMatcher(

1772

None, old_texts, new_texts)

1773

return self._make_line_delta(delta_seq, content)

1774

1775

def _parse_record(self, version_id, data):

1776

"""Parse an original format knit record.

1777

1778

These have the last element of the key only present in the stored data.

1779

"""

1780

rec, record_contents = self._parse_record_unchecked(data)

1781

self._check_header_version(rec, version_id)

1782

return record_contents, rec[3]

1783

1784

def _parse_record_header(self, key, raw_data):

1785

"""Parse a record header for consistency.

1786

1787

:return: the header and the decompressor stream.

1788

as (stream, header_record)

1789

"""

1790

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1791

try:

1792

# Current serialise

1793

rec = self._check_header(key, df.readline())

1794

except Exception, e:

1795

raise KnitCorrupt(self,

1796

"While reading {%s} got %s(%s)"

1797

% (key, e.__class__.__name__, str(e)))

1798

return df, rec

1799

1800

def _parse_record_unchecked(self, data):

1801

# profiling notes:

1802

# 4168 calls in 2880 217 internal

1803

# 4168 calls to _parse_record_header in 2121

1804

# 4168 calls to readlines in 330

1805

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1806

try:

1807

record_contents = df.readlines()

1808

except Exception, e:

1809

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1810

(data, e.__class__.__name__, str(e)))

1811

header = record_contents.pop(0)

1812

rec = self._split_header(header)

1813

last_line = record_contents.pop()

1814

if len(record_contents) != int(rec[2]):

1815

raise KnitCorrupt(self,

1816

'incorrect number of lines %s != %s'

1817

' for version {%s} %s'

1818

% (len(record_contents), int(rec[2]),

1819

rec[1], record_contents))

1820

if last_line != 'end %s\n' % rec[1]:

1821

raise KnitCorrupt(self,

1822

'unexpected version end line %r, wanted %r'

1823

% (last_line, rec[1]))

1824

df.close()

1825

return rec, record_contents

1826

1827

def _read_records_iter(self, records):

1828

"""Read text records from data file and yield result.

1829

1830

The result will be returned in whatever is the fastest to read.

1831

Not by the order requested. Also, multiple requests for the same

1832

record will only yield 1 response.

1833

:param records: A list of (key, access_memo) entries

1834

:return: Yields (key, contents, digest) in the order

1835

read, not the order requested

1836

"""

1837

if not records:

1838

return

1839

1840

# XXX: This smells wrong, IO may not be getting ordered right.

1841

needed_records = sorted(set(records), key=operator.itemgetter(1))

1842

if not needed_records:

1843

return

1844

1845

# The transport optimizes the fetching as well

1846

# (ie, reads continuous ranges.)

1847

raw_data = self._access.get_raw_records(

1848

[index_memo for key, index_memo in needed_records])

1849

1850

for (key, index_memo), data in \

1851

izip(iter(needed_records), raw_data):

1852

content, digest = self._parse_record(key[-1], data)

1853

yield key, content, digest

1854

1855

def _read_records_iter_raw(self, records):

1856

"""Read text records from data file and yield raw data.

1857

1858

This unpacks enough of the text record to validate the id is

1859

as expected but thats all.

1860

1861

Each item the iterator yields is (key, bytes,

1862

expected_sha1_of_full_text).

1863

"""

1864

for key, data in self._read_records_iter_unchecked(records):

1865

# validate the header (note that we can only use the suffix in

1866

# current knit records).

1867

df, rec = self._parse_record_header(key, data)

1868

df.close()

1869

yield key, data, rec[3]

1870

1871

def _read_records_iter_unchecked(self, records):

1872

"""Read text records from data file and yield raw data.

1873

1874

No validation is done.

1875

1876

Yields tuples of (key, data).

1877

"""

1878

# setup an iterator of the external records:

1879

# uses readv so nice and fast we hope.

1880

if len(records):

1881

# grab the disk data needed.

1882

needed_offsets = [index_memo for key, index_memo

1883

in records]

1884

raw_records = self._access.get_raw_records(needed_offsets)

1885

1886

for key, index_memo in records:

1887

data = raw_records.next()

1888

yield key, data

1889

1890

def _record_to_data(self, key, digest, lines, dense_lines=None):

1891

"""Convert key, digest, lines into a raw data block.

1892

1893

:param key: The key of the record. Currently keys are always serialised

1894

using just the trailing component.

1895

:param dense_lines: The bytes of lines but in a denser form. For

1896

instance, if lines is a list of 1000 bytestrings each ending in \n,

1897

dense_lines may be a list with one line in it, containing all the

1898

1000's lines and their \n's. Using dense_lines if it is already

1899

known is a win because the string join to create bytes in this

1900

function spends less time resizing the final string.

1901

:return: (len, a StringIO instance with the raw data ready to read.)

1902

"""

1903

# Note: using a string copy here increases memory pressure with e.g.

1904

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1905

# when doing the initial commit of a mozilla tree. RBC 20070921

1906

bytes = ''.join(chain(

1907

["version %s %d %s\n" % (key[-1],

1908

len(lines),

1909

digest)],

1910

dense_lines or lines,

1911

["end %s\n" % key[-1]]))

1912

if type(bytes) != str:

1913

raise AssertionError(

1914

'data must be plain bytes was %s' % type(bytes))

1915

if lines and lines[-1][-1] != '\n':

1916

raise ValueError('corrupt lines value %r' % lines)

1917

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1918

return len(compressed_bytes), compressed_bytes

1919

1920

def _split_header(self, line):

1921

rec = line.split()

1922

if len(rec) != 4:

1923

raise KnitCorrupt(self,

1924

'unexpected number of elements in record header')

1925

return rec

1926

1927

def keys(self):

1928

"""See VersionedFiles.keys."""

1929

if 'evil' in debug.debug_flags:

1930

trace.mutter_callsite(2, "keys scales with size of history")

1931

sources = [self._index] + self._fallback_vfs

1932

result = set()

1933

for source in sources:

1934

result.update(source.keys())

1935

return result

1936

1937

1938

class _ContentMapGenerator(object):

1939

"""Generate texts or expose raw deltas for a set of texts."""

1940

1941

def _get_content(self, key):

1942

"""Get the content object for key."""

1943

# Note that _get_content is only called when the _ContentMapGenerator

1944

# has been constructed with just one key requested for reconstruction.

1945

if key in self.nonlocal_keys:

1946

record = self.get_record_stream().next()

1947

# Create a content object on the fly

1948

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1949

return PlainKnitContent(lines, record.key)

1950

else:

1951

# local keys we can ask for directly

1952

return self._get_one_work(key)

1953

1954

def get_record_stream(self):

1955

"""Get a record stream for the keys requested during __init__."""

1956

for record in self._work():

1957

yield record

1958

1959

def _work(self):

1960

"""Produce maps of text and KnitContents as dicts.

1961

1962

:return: (text_map, content_map) where text_map contains the texts for

1963

the requested versions and content_map contains the KnitContents.

1964

"""

1965

# NB: By definition we never need to read remote sources unless texts

1966

# are requested from them: we don't delta across stores - and we

1967

# explicitly do not want to to prevent data loss situations.

1968

if self.global_map is None:

1969

self.global_map = self.vf.get_parent_map(self.keys)

1970

nonlocal_keys = self.nonlocal_keys

1971

1972

missing_keys = set(nonlocal_keys)

1973

# Read from remote versioned file instances and provide to our caller.

1974

for source in self.vf._fallback_vfs:

1975

if not missing_keys:

1976

break

1977

# Loop over fallback repositories asking them for texts - ignore

1978

# any missing from a particular fallback.

1979

for record in source.get_record_stream(missing_keys,

1980

'unordered', True):

1981

if record.storage_kind == 'absent':

1982

# Not in thie particular stream, may be in one of the

1983

# other fallback vfs objects.

1984

continue

1985

missing_keys.remove(record.key)

1986

yield record

1987

1988

self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,

1989

allow_missing=True)

1990

first = True

1991

for key in self.keys:

1992

if key in self.nonlocal_keys:

1993

continue

1994

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

1995

first = False

1996

1997

def _get_one_work(self, requested_key):

1998

# Now, if we have calculated everything already, just return the

1999

# desired text.

2000

if requested_key in self._contents_map:

2001

return self._contents_map[requested_key]

2002

# To simplify things, parse everything at once - code that wants one text

2003

# probably wants them all.

2004

# FUTURE: This function could be improved for the 'extract many' case

2005

# by tracking each component and only doing the copy when the number of

2006

# children than need to apply delta's to it is > 1 or it is part of the

2007

# final output.

2008

multiple_versions = len(self.keys) != 1

2009

if self._record_map is None:

2010

self._record_map = self.vf._raw_map_to_record_map(

2011

self._raw_record_map)

2012

record_map = self._record_map

2013

# raw_record_map is key:

2014

# Have read and parsed records at this point.

2015

for key in self.keys:

2016

if key in self.nonlocal_keys:

2017

# already handled

2018

continue

2019

components = []

2020

cursor = key

2021

while cursor is not None:

2022

try:

2023

record, record_details, digest, next = record_map[cursor]

2024

except KeyError:

2025

raise RevisionNotPresent(cursor, self)

2026

components.append((cursor, record, record_details, digest))

2027

cursor = next

2028

if cursor in self._contents_map:

2029

# no need to plan further back

2030

components.append((cursor, None, None, None))

2031

break

2032

2033

content = None

2034

for (component_id, record, record_details,

2035

digest) in reversed(components):

2036

if component_id in self._contents_map:

2037

content = self._contents_map[component_id]

2038

else:

2039

content, delta = self._factory.parse_record(key[-1],

2040

record, record_details, content,

2041

copy_base_content=multiple_versions)

2042

if multiple_versions:

2043

self._contents_map[component_id] = content

2044

2045

# digest here is the digest from the last applied component.

2046

text = content.text()

2047

actual_sha = sha_strings(text)

2048

if actual_sha != digest:

2049

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2050

if multiple_versions:

2051

return self._contents_map[requested_key]

2052

else:

2053

return content

2054

2055

def _wire_bytes(self):

2056

"""Get the bytes to put on the wire for 'key'.

2057

2058

The first collection of bytes asked for returns the serialised

2059

raw_record_map and the additional details (key, parent) for key.

2060

Subsequent calls return just the additional details (key, parent).

2061

The wire storage_kind given for the first key is 'knit-delta-closure',

2062

For subsequent keys it is 'knit-delta-closure-ref'.

2063

2064

:param key: A key from the content generator.

2065

:return: Bytes to put on the wire.

2066

"""

2067

lines = []

2068

# kind marker for dispatch on the far side,

2069

lines.append('knit-delta-closure')

2070

# Annotated or not

2071

if self.vf._factory.annotated:

2072

lines.append('annotated')

2073

else:

2074

lines.append('')

2075

# then the list of keys

2076

lines.append('\t'.join(['\x00'.join(key) for key in self.keys

2077

if key not in self.nonlocal_keys]))

2078

# then the _raw_record_map in serialised form:

2079

map_byte_list = []

2080

# for each item in the map:

2081

# 1 line with key

2082

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2083

# one line with method

2084

# one line with noeol

2085

# one line with next ('' for None)

2086

# one line with byte count of the record bytes

2087

# the record bytes

2088

for key, (record_bytes, (method, noeol), next) in \

2089

self._raw_record_map.iteritems():

2090

key_bytes = '\x00'.join(key)

2091

parents = self.global_map.get(key, None)

2092

if parents is None:

2093

parent_bytes = 'None:'

2094

else:

2095

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

2096

method_bytes = method

2097

if noeol:

2098

noeol_bytes = "T"

2099

else:

2100

noeol_bytes = "F"

2101

if next:

2102

next_bytes = '\x00'.join(next)

2103

else:

2104

next_bytes = ''

2105

map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (

2106

key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2107

len(record_bytes), record_bytes))

2108

map_bytes = ''.join(map_byte_list)

2109

lines.append(map_bytes)

2110

bytes = '\n'.join(lines)

2111

return bytes

2112

2113

2114

class _VFContentMapGenerator(_ContentMapGenerator):

2115

"""Content map generator reading from a VersionedFiles object."""

2116

2117

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2118

global_map=None, raw_record_map=None):

2119

"""Create a _ContentMapGenerator.

2120

2121

:param versioned_files: The versioned files that the texts are being

2122

extracted from.

2123

:param keys: The keys to produce content maps for.

2124

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2125

which are known to not be in this knit, but rather in one of the

2126

fallback knits.

2127

:param global_map: The result of get_parent_map(keys) (or a supermap).

2128

This is required if get_record_stream() is to be used.

2129

:param raw_record_map: A unparsed raw record map to use for answering

2130

contents.

2131

"""

2132

# The vf to source data from

2133

self.vf = versioned_files

2134

# The keys desired

2135

self.keys = list(keys)

2136

# Keys known to be in fallback vfs objects

2137

if nonlocal_keys is None:

2138

self.nonlocal_keys = set()

2139

else:

2140

self.nonlocal_keys = frozenset(nonlocal_keys)

2141

# Parents data for keys to be returned in get_record_stream

2142

self.global_map = global_map

2143

# The chunked lists for self.keys in text form

2144

self._text_map = {}

2145

# A cache of KnitContent objects used in extracting texts.

2146

self._contents_map = {}

2147

# All the knit records needed to assemble the requested keys as full

2148

# texts.

2149

self._record_map = None

2150

if raw_record_map is None:

2151

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2152

allow_missing=True)

2153

else:

2154

self._raw_record_map = raw_record_map

2155

# the factory for parsing records

2156

self._factory = self.vf._factory

2157

2158

2159

class _NetworkContentMapGenerator(_ContentMapGenerator):

2160

"""Content map generator sourced from a network stream."""

2161

2162

def __init__(self, bytes, line_end):

2163

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2164

self._bytes = bytes

2165

self.global_map = {}

2166

self._raw_record_map = {}

2167

self._contents_map = {}

2168

self._record_map = None

2169

self.nonlocal_keys = []

2170

# Get access to record parsing facilities

2171

self.vf = KnitVersionedFiles(None, None)

2172

start = line_end

2173

# Annotated or not

2174

line_end = bytes.find('\n', start)

2175

line = bytes[start:line_end]

2176

start = line_end + 1

2177

if line == 'annotated':

2178

self._factory = KnitAnnotateFactory()

2179

else:

2180

self._factory = KnitPlainFactory()

2181

# list of keys to emit in get_record_stream

2182

line_end = bytes.find('\n', start)

2183

line = bytes[start:line_end]

2184

start = line_end + 1

2185

self.keys = [

2186

tuple(segment.split('\x00')) for segment in line.split('\t')

2187

if segment]

2188

# now a loop until the end. XXX: It would be nice if this was just a

2189

# bunch of the same records as get_record_stream(..., False) gives, but

2190

# there is a decent sized gap stopping that at the moment.

2191

end = len(bytes)

2192

while start < end:

2193

# 1 line with key

2194

line_end = bytes.find('\n', start)

2195

key = tuple(bytes[start:line_end].split('\x00'))

2196

start = line_end + 1

2197

# 1 line with parents (None: for None, '' for ())

2198

line_end = bytes.find('\n', start)

2199

line = bytes[start:line_end]

2200

if line == 'None:':

2201

parents = None

2202

else:

2203

parents = tuple(

2204

[tuple(segment.split('\x00')) for segment in line.split('\t')

2205

if segment])

2206

self.global_map[key] = parents

2207

start = line_end + 1

2208

# one line with method

2209

line_end = bytes.find('\n', start)

2210

line = bytes[start:line_end]

2211

method = line

2212

start = line_end + 1

2213

# one line with noeol

2214

line_end = bytes.find('\n', start)

2215

line = bytes[start:line_end]

2216

noeol = line == "T"

2217

start = line_end + 1

2218

# one line with next ('' for None)

2219

line_end = bytes.find('\n', start)

2220

line = bytes[start:line_end]

2221

if not line:

2222

next = None

2223

else:

2224

next = tuple(bytes[start:line_end].split('\x00'))

2225

start = line_end + 1

2226

# one line with byte count of the record bytes

2227

line_end = bytes.find('\n', start)

2228

line = bytes[start:line_end]

2229

count = int(line)

2230

start = line_end + 1

2231

# the record bytes

2232

record_bytes = bytes[start:start+count]

2233

start = start + count

2234

# put it in the map

2235

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2236

2237

def get_record_stream(self):

2238

"""Get a record stream for for keys requested by the bytestream."""

2239

first = True

2240

for key in self.keys:

2241

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2242

first = False

2243

2244

def _wire_bytes(self):

2245

return self._bytes

2246

2247

2248

class _KndxIndex(object):

2249

"""Manages knit index files

2250

2251

The index is kept in memory and read on startup, to enable

2252

fast lookups of revision information. The cursor of the index

2253

file is always pointing to the end, making it easy to append

2254

entries.

2255

2256

_cache is a cache for fast mapping from version id to a Index

2257

object.

2258

2259

_history is a cache for fast mapping from indexes to version ids.

2260

2261

The index data format is dictionary compressed when it comes to

2262

parent references; a index entry may only have parents that with a

2263

lover index number. As a result, the index is topological sorted.

2264

2265

Duplicate entries may be written to the index for a single version id

2266

if this is done then the latter one completely replaces the former:

2267

this allows updates to correct version and parent information.

2268

Note that the two entries may share the delta, and that successive

2269

annotations and references MUST point to the first entry.

2270

2271

The index file on disc contains a header, followed by one line per knit

2272

record. The same revision can be present in an index file more than once.

2273

The first occurrence gets assigned a sequence number starting from 0.

2274

2275

The format of a single line is

2276

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

2277

REVISION_ID is a utf8-encoded revision id

2278

FLAGS is a comma separated list of flags about the record. Values include

2279

no-eol, line-delta, fulltext.

2280

BYTE_OFFSET is the ascii representation of the byte offset in the data file

2281

that the the compressed data starts at.

2282

LENGTH is the ascii representation of the length of the data file.

2283

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

2284

REVISION_ID.

2285

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

2286

revision id already in the knit that is a parent of REVISION_ID.

2287

The ' :' marker is the end of record marker.

2288

2289

partial writes:

2290

when a write is interrupted to the index file, it will result in a line

2291

that does not end in ' :'. If the ' :' is not present at the end of a line,

2292

or at the end of the file, then the record that is missing it will be

2293

ignored by the parser.

2294

2295

When writing new records to the index file, the data is preceded by '\n'

2296

to ensure that records always start on new lines even if the last write was

2297

interrupted. As a result its normal for the last line in the index to be

2298

missing a trailing newline. One can be added with no harmful effects.

2299

2300

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

2301

where prefix is e.g. the (fileid,) for .texts instances or () for

2302

constant-mapped things like .revisions, and the old state is

2303

tuple(history_vector, cache_dict). This is used to prevent having an

2304

ABI change with the C extension that reads .kndx files.

2305

"""

2306

2307

HEADER = "# bzr knit index 8\n"

2308

2309

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

2310

"""Create a _KndxIndex on transport using mapper."""

2311

self._transport = transport

2312

self._mapper = mapper

2313

self._get_scope = get_scope

2314

self._allow_writes = allow_writes

2315

self._is_locked = is_locked

2316

self._reset_cache()

2317

self.has_graph = True

2318

2319

def add_records(self, records, random_id=False, missing_compression_parents=False):

2320

"""Add multiple records to the index.

2321

2322

:param records: a list of tuples:

2323

(key, options, access_memo, parents).

2324

:param random_id: If True the ids being added were randomly generated

2325

and no check for existence will be performed.

2326

:param missing_compression_parents: If True the records being added are

2327

only compressed against texts already in the index (or inside

2328

records). If False the records all refer to unavailable texts (or

2329

texts inside records) as compression parents.

2330

"""

2331

if missing_compression_parents:

2332

# It might be nice to get the edge of the records. But keys isn't

2333

# _wrong_.

2334

keys = sorted(record[0] for record in records)

2335

raise errors.RevisionNotPresent(keys, self)

2336

paths = {}

2337

for record in records:

2338

key = record[0]

2339

prefix = key[:-1]

2340

path = self._mapper.map(key) + '.kndx'

2341

path_keys = paths.setdefault(path, (prefix, []))

2342

path_keys[1].append(record)

2343

for path in sorted(paths):

2344

prefix, path_keys = paths[path]

2345

self._load_prefixes([prefix])

2346

lines = []

2347

orig_history = self._kndx_cache[prefix][1][:]

2348

orig_cache = self._kndx_cache[prefix][0].copy()

2349

2350

try:

2351

for key, options, (_, pos, size), parents in path_keys:

2352

if parents is None:

2353

# kndx indices cannot be parentless.

2354

parents = ()

2355

line = "\n%s %s %s %s %s :" % (

2356

key[-1], ','.join(options), pos, size,

2357

self._dictionary_compress(parents))

2358

if type(line) != str:

2359

raise AssertionError(

2360

'data must be utf8 was %s' % type(line))

2361

lines.append(line)

2362

self._cache_key(key, options, pos, size, parents)

2363

if len(orig_history):

2364

self._transport.append_bytes(path, ''.join(lines))

2365

else:

2366

self._init_index(path, lines)

2367

except:

2368

# If any problems happen, restore the original values and re-raise

2369

self._kndx_cache[prefix] = (orig_cache, orig_history)

2370

raise

2371

2372

def scan_unvalidated_index(self, graph_index):

2373

"""See _KnitGraphIndex.scan_unvalidated_index."""

2374

# Because kndx files do not support atomic insertion via separate index

2375

# files, they do not support this method.

2376

raise NotImplementedError(self.scan_unvalidated_index)

2377

2378

def get_missing_compression_parents(self):

2379

"""See _KnitGraphIndex.get_missing_compression_parents."""

2380

# Because kndx files do not support atomic insertion via separate index

2381

# files, they do not support this method.

2382

raise NotImplementedError(self.get_missing_compression_parents)

2383

2384

def _cache_key(self, key, options, pos, size, parent_keys):

2385

"""Cache a version record in the history array and index cache.

2386

2387

This is inlined into _load_data for performance. KEEP IN SYNC.

2388

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

2389

indexes).

2390

"""

2391

prefix = key[:-1]

2392

version_id = key[-1]

2393

# last-element only for compatibilty with the C load_data.

2394

parents = tuple(parent[-1] for parent in parent_keys)

2395

for parent in parent_keys:

2396

if parent[:-1] != prefix:

2397

raise ValueError("mismatched prefixes for %r, %r" % (

2398

key, parent_keys))

2399

cache, history = self._kndx_cache[prefix]

2400

# only want the _history index to reference the 1st index entry

2401

# for version_id

2402

if version_id not in cache:

2403

index = len(history)

2404

history.append(version_id)

2405

else:

2406

index = cache[version_id][5]

2407

cache[version_id] = (version_id,

2408

options,

2409

pos,

2410

size,

2411

parents,

2412

index)

2413

2414

def check_header(self, fp):

2415

line = fp.readline()

2416

if line == '':

2417

# An empty file can actually be treated as though the file doesn't

2418

# exist yet.

2419

raise errors.NoSuchFile(self)

2420

if line != self.HEADER:

2421

raise KnitHeaderError(badline=line, filename=self)

2422

2423

def _check_read(self):

2424

if not self._is_locked():

2425

raise errors.ObjectNotLocked(self)

2426

if self._get_scope() != self._scope:

2427

self._reset_cache()

2428

2429

def _check_write_ok(self):

2430

"""Assert if not writes are permitted."""

2431

if not self._is_locked():

2432

raise errors.ObjectNotLocked(self)

2433

if self._get_scope() != self._scope:

2434

self._reset_cache()

2435

if self._mode != 'w':

2436

raise errors.ReadOnlyObjectDirtiedError(self)

2437

2438

def get_build_details(self, keys):

2439

"""Get the method, index_memo and compression parent for keys.

2440

2441

Ghosts are omitted from the result.

2442

2443

:param keys: An iterable of keys.

2444

:return: A dict of key:(index_memo, compression_parent, parents,

2445

record_details).

2446

index_memo

2447

opaque structure to pass to read_records to extract the raw

2448

data

2449

compression_parent

2450

Content that this record is built upon, may be None

2451

parents

2452

Logical parents of this node

2453

record_details

2454

extra information about the content which needs to be passed to

2455

Factory.parse_record

2456

"""

2457

parent_map = self.get_parent_map(keys)

2458

result = {}

2459

for key in keys:

2460

if key not in parent_map:

2461

continue # Ghost

2462

method = self.get_method(key)

2463

parents = parent_map[key]

2464

if method == 'fulltext':

2465

compression_parent = None

2466

else:

2467

compression_parent = parents[0]

2468

noeol = 'no-eol' in self.get_options(key)

2469

index_memo = self.get_position(key)

2470

result[key] = (index_memo, compression_parent,

2471

parents, (method, noeol))

2472

return result

2473

2474

def get_method(self, key):

2475

"""Return compression method of specified key."""

2476

options = self.get_options(key)

2477

if 'fulltext' in options:

2478

return 'fulltext'

2479

elif 'line-delta' in options:

2480

return 'line-delta'

2481

else:

2482

raise errors.KnitIndexUnknownMethod(self, options)

2483

2484

def get_options(self, key):

2485

"""Return a list representing options.

2486

2487

e.g. ['foo', 'bar']

2488

"""

2489

prefix, suffix = self._split_key(key)

2490

self._load_prefixes([prefix])

2491

try:

2492

return self._kndx_cache[prefix][0][suffix][1]

2493

except KeyError:

2494

raise RevisionNotPresent(key, self)

2495

2496

def get_parent_map(self, keys):

2497

"""Get a map of the parents of keys.

2498

2499

:param keys: The keys to look up parents for.

2500

:return: A mapping from keys to parents. Absent keys are absent from

2501

the mapping.

2502

"""

2503

# Parse what we need to up front, this potentially trades off I/O

2504

# locality (.kndx and .knit in the same block group for the same file

2505

# id) for less checking in inner loops.

2506

prefixes = set(key[:-1] for key in keys)

2507

self._load_prefixes(prefixes)

2508

result = {}

2509

for key in keys:

2510

prefix = key[:-1]

2511

try:

2512

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2513

except KeyError:

2514

pass

2515

else:

2516

result[key] = tuple(prefix + (suffix,) for

2517

suffix in suffix_parents)

2518

return result

2519

2520

def get_position(self, key):

2521

"""Return details needed to access the version.

2522

2523

:return: a tuple (key, data position, size) to hand to the access

2524

logic to get the record.

2525

"""

2526

prefix, suffix = self._split_key(key)

2527

self._load_prefixes([prefix])

2528

entry = self._kndx_cache[prefix][0][suffix]

2529

return key, entry[2], entry[3]

2530

2531

has_key = _mod_index._has_key_from_parent_map

2532

2533

def _init_index(self, path, extra_lines=[]):

2534

"""Initialize an index."""

2535

sio = StringIO()

2536

sio.write(self.HEADER)

2537

sio.writelines(extra_lines)

2538

sio.seek(0)

2539

self._transport.put_file_non_atomic(path, sio,

2540

create_parent_dir=True)

2541

# self._create_parent_dir)

2542

# mode=self._file_mode,

2543

# dir_mode=self._dir_mode)

2544

2545

def keys(self):

2546

"""Get all the keys in the collection.

2547

2548

The keys are not ordered.

2549

"""

2550

result = set()

2551

# Identify all key prefixes.

2552

# XXX: A bit hacky, needs polish.

2553

if type(self._mapper) == ConstantMapper:

2554

prefixes = [()]

2555

else:

2556

relpaths = set()

2557

for quoted_relpath in self._transport.iter_files_recursive():

2558

path, ext = os.path.splitext(quoted_relpath)

2559

relpaths.add(path)

2560

prefixes = [self._mapper.unmap(path) for path in relpaths]

2561

self._load_prefixes(prefixes)

2562

for prefix in prefixes:

2563

for suffix in self._kndx_cache[prefix][1]:

2564

result.add(prefix + (suffix,))

2565

return result

2566

2567

def _load_prefixes(self, prefixes):

2568

"""Load the indices for prefixes."""

2569

self._check_read()

2570

for prefix in prefixes:

2571

if prefix not in self._kndx_cache:

2572

# the load_data interface writes to these variables.

2573

self._cache = {}

2574

self._history = []

2575

self._filename = prefix

2576

try:

2577

path = self._mapper.map(prefix) + '.kndx'

2578

fp = self._transport.get(path)

2579

try:

2580

# _load_data may raise NoSuchFile if the target knit is

2581

# completely empty.

2582

_load_data(self, fp)

2583

finally:

2584

fp.close()

2585

self._kndx_cache[prefix] = (self._cache, self._history)

2586

del self._cache

2587

del self._filename

2588

del self._history

2589

except NoSuchFile:

2590

self._kndx_cache[prefix] = ({}, [])

2591

if type(self._mapper) == ConstantMapper:

2592

# preserve behaviour for revisions.kndx etc.

2593

self._init_index(path)

2594

del self._cache

2595

del self._filename

2596

del self._history

2597

2598

missing_keys = _mod_index._missing_keys_from_parent_map

2599

2600

def _partition_keys(self, keys):

2601

"""Turn keys into a dict of prefix:suffix_list."""

2602

result = {}

2603

for key in keys:

2604

prefix_keys = result.setdefault(key[:-1], [])

2605

prefix_keys.append(key[-1])

2606

return result

2607

2608

def _dictionary_compress(self, keys):

2609

"""Dictionary compress keys.

2610

2611

:param keys: The keys to generate references to.

2612

:return: A string representation of keys. keys which are present are

2613

dictionary compressed, and others are emitted as fulltext with a

2614

'.' prefix.

2615

"""

2616

if not keys:

2617

return ''

2618

result_list = []

2619

prefix = keys[0][:-1]

2620

cache = self._kndx_cache[prefix][0]

2621

for key in keys:

2622

if key[:-1] != prefix:

2623

# kndx indices cannot refer across partitioned storage.

2624

raise ValueError("mismatched prefixes for %r" % keys)

2625

if key[-1] in cache:

2626

# -- inlined lookup() --

2627

result_list.append(str(cache[key[-1]][5]))

2628

# -- end lookup () --

2629

else:

2630

result_list.append('.' + key[-1])

2631

return ' '.join(result_list)

2632

2633

def _reset_cache(self):

2634

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2635

# (cache_dict, history_vector) for parsed kndx files.

2636

self._kndx_cache = {}

2637

self._scope = self._get_scope()

2638

allow_writes = self._allow_writes()

2639

if allow_writes:

2640

self._mode = 'w'

2641

else:

2642

self._mode = 'r'

2643

2644

def _sort_keys_by_io(self, keys, positions):

2645

"""Figure out an optimal order to read the records for the given keys.

2646

2647

Sort keys, grouped by index and sorted by position.

2648

2649

:param keys: A list of keys whose records we want to read. This will be

2650

sorted 'in-place'.

2651

:param positions: A dict, such as the one returned by

2652

_get_components_positions()

2653

:return: None

2654

"""

2655

def get_sort_key(key):

2656

index_memo = positions[key][1]

2657

# Group by prefix and position. index_memo[0] is the key, so it is

2658

# (file_id, revision_id) and we don't want to sort on revision_id,

2659

# index_memo[1] is the position, and index_memo[2] is the size,

2660

# which doesn't matter for the sort

2661

return index_memo[0][:-1], index_memo[1]

2662

return keys.sort(key=get_sort_key)

2663

2664

_get_total_build_size = _get_total_build_size

2665

2666

def _split_key(self, key):

2667

"""Split key into a prefix and suffix."""

2668

return key[:-1], key[-1]

2669

2670

2671

class _KnitGraphIndex(object):

2672

"""A KnitVersionedFiles index layered on GraphIndex."""

2673

2674

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2675

add_callback=None):

2676

"""Construct a KnitGraphIndex on a graph_index.

2677

2678

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2679

:param is_locked: A callback to check whether the object should answer

2680

queries.

2681

:param deltas: Allow delta-compressed records.

2682

:param parents: If True, record knits parents, if not do not record

2683

parents.

2684

:param add_callback: If not None, allow additions to the index and call

2685

this callback with a list of added GraphIndex nodes:

2686

[(node, value, node_refs), ...]

2687

:param is_locked: A callback, returns True if the index is locked and

2688

thus usable.

2689

"""

2690

self._add_callback = add_callback

2691

self._graph_index = graph_index

2692

self._deltas = deltas

2693

self._parents = parents

2694

if deltas and not parents:

2695

# XXX: TODO: Delta tree and parent graph should be conceptually

2696

# separate.

2697

raise KnitCorrupt(self, "Cannot do delta compression without "

2698

"parent tracking.")

2699

self.has_graph = parents

2700

self._is_locked = is_locked

2701

self._missing_compression_parents = set()

2702

2703

def __repr__(self):

2704

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2705

2706

def add_records(self, records, random_id=False,

2707

missing_compression_parents=False):

2708

"""Add multiple records to the index.

2709

2710

This function does not insert data into the Immutable GraphIndex

2711

backing the KnitGraphIndex, instead it prepares data for insertion by

2712

the caller and checks that it is safe to insert then calls

2713

self._add_callback with the prepared GraphIndex nodes.

2714

2715

:param records: a list of tuples:

2716

(key, options, access_memo, parents).

2717

:param random_id: If True the ids being added were randomly generated

2718

and no check for existence will be performed.

2719

:param missing_compression_parents: If True the records being added are

2720

only compressed against texts already in the index (or inside

2721

records). If False the records all refer to unavailable texts (or

2722

texts inside records) as compression parents.

2723

"""

2724

if not self._add_callback:

2725

raise errors.ReadOnlyError(self)

2726

# we hope there are no repositories with inconsistent parentage

2727

# anymore.

2728

2729

keys = {}

2730

compression_parents = set()

2731

for (key, options, access_memo, parents) in records:

2732

if self._parents:

2733

parents = tuple(parents)

2734

index, pos, size = access_memo

2735

if 'no-eol' in options:

2736

value = 'N'

2737

else:

2738

value = ' '

2739

value += "%d %d" % (pos, size)

2740

if not self._deltas:

2741

if 'line-delta' in options:

2742

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2743

if self._parents:

2744

if self._deltas:

2745

if 'line-delta' in options:

2746

node_refs = (parents, (parents[0],))

2747

if missing_compression_parents:

2748

compression_parents.add(parents[0])

2749

else:

2750

node_refs = (parents, ())

2751

else:

2752

node_refs = (parents, )

2753

else:

2754

if parents:

2755

raise KnitCorrupt(self, "attempt to add node with parents "

2756

"in parentless index.")

2757

node_refs = ()

2758

keys[key] = (value, node_refs)

2759

# check for dups

2760

if not random_id:

2761

present_nodes = self._get_entries(keys)

2762

for (index, key, value, node_refs) in present_nodes:

2763

if (value[0] != keys[key][0][0] or

2764

node_refs[:1] != keys[key][1][:1]):

2765

raise KnitCorrupt(self, "inconsistent details in add_records"

2766

": %s %s" % ((value, node_refs), keys[key]))

2767

del keys[key]

2768

result = []

2769

if self._parents:

2770

for key, (value, node_refs) in keys.iteritems():

2771

result.append((key, value, node_refs))

2772

else:

2773

for key, (value, node_refs) in keys.iteritems():

2774

result.append((key, value))

2775

self._add_callback(result)

2776

if missing_compression_parents:

2777

# This may appear to be incorrect (it does not check for

2778

# compression parents that are in the existing graph index),

2779

# but such records won't have been buffered, so this is

2780

# actually correct: every entry when

2781

# missing_compression_parents==True either has a missing parent, or

2782

# a parent that is one of the keys in records.

2783

compression_parents.difference_update(keys)

2784

self._missing_compression_parents.update(compression_parents)

2785

# Adding records may have satisfied missing compression parents.

2786

self._missing_compression_parents.difference_update(keys)

2787

2788

def scan_unvalidated_index(self, graph_index):

2789

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2790

2791

This allows this _KnitGraphIndex to keep track of any missing

2792

compression parents we may want to have filled in to make those

2793

indices valid.

2794

2795

:param graph_index: A GraphIndex

2796

"""

2797

if self._deltas:

2798

new_missing = graph_index.external_references(ref_list_num=1)

2799

new_missing.difference_update(self.get_parent_map(new_missing))

2800

self._missing_compression_parents.update(new_missing)

2801

2802

def get_missing_compression_parents(self):

2803

"""Return the keys of missing compression parents.

2804

2805

Missing compression parents occur when a record stream was missing

2806

basis texts, or a index was scanned that had missing basis texts.

2807

"""

2808

return frozenset(self._missing_compression_parents)

2809

2810

def _check_read(self):

2811

"""raise if reads are not permitted."""

2812

if not self._is_locked():

2813

raise errors.ObjectNotLocked(self)

2814

2815

def _check_write_ok(self):

2816

"""Assert if writes are not permitted."""

2817

if not self._is_locked():

2818

raise errors.ObjectNotLocked(self)

2819

2820

def _compression_parent(self, an_entry):

2821

# return the key that an_entry is compressed against, or None

2822

# Grab the second parent list (as deltas implies parents currently)

2823

compression_parents = an_entry[3][1]

2824

if not compression_parents:

2825

return None

2826

if len(compression_parents) != 1:

2827

raise AssertionError(

2828

"Too many compression parents: %r" % compression_parents)

2829

return compression_parents[0]

2830

2831

def get_build_details(self, keys):

2832

"""Get the method, index_memo and compression parent for version_ids.

2833

2834

Ghosts are omitted from the result.

2835

2836

:param keys: An iterable of keys.

2837

:return: A dict of key:

2838

(index_memo, compression_parent, parents, record_details).

2839

index_memo

2840

opaque structure to pass to read_records to extract the raw

2841

data

2842

compression_parent

2843

Content that this record is built upon, may be None

2844

parents

2845

Logical parents of this node

2846

record_details

2847

extra information about the content which needs to be passed to

2848

Factory.parse_record

2849

"""

2850

self._check_read()

2851

result = {}

2852

entries = self._get_entries(keys, False)

2853

for entry in entries:

2854

key = entry[1]

2855

if not self._parents:

2856

parents = ()

2857

else:

2858

parents = entry[3][0]

2859

if not self._deltas:

2860

compression_parent_key = None

2861

else:

2862

compression_parent_key = self._compression_parent(entry)

2863

noeol = (entry[2][0] == 'N')

2864

if compression_parent_key:

2865

method = 'line-delta'

2866

else:

2867

method = 'fulltext'

2868

result[key] = (self._node_to_position(entry),

2869

compression_parent_key, parents,

2870

(method, noeol))

2871

return result

2872

2873

def _get_entries(self, keys, check_present=False):

2874

"""Get the entries for keys.

2875

2876

:param keys: An iterable of index key tuples.

2877

"""

2878

keys = set(keys)

2879

found_keys = set()

2880

if self._parents:

2881

for node in self._graph_index.iter_entries(keys):

2882

yield node

2883

found_keys.add(node[1])

2884

else:

2885

# adapt parentless index to the rest of the code.

2886

for node in self._graph_index.iter_entries(keys):

2887

yield node[0], node[1], node[2], ()

2888

found_keys.add(node[1])

2889

if check_present:

2890

missing_keys = keys.difference(found_keys)

2891

if missing_keys:

2892

raise RevisionNotPresent(missing_keys.pop(), self)

2893

2894

def get_method(self, key):

2895

"""Return compression method of specified key."""

2896

return self._get_method(self._get_node(key))

2897

2898

def _get_method(self, node):

2899

if not self._deltas:

2900

return 'fulltext'

2901

if self._compression_parent(node):

2902

return 'line-delta'

2903

else:

2904

return 'fulltext'

2905

2906

def _get_node(self, key):

2907

try:

2908

return list(self._get_entries([key]))[0]

2909

except IndexError:

2910

raise RevisionNotPresent(key, self)

2911

2912

def get_options(self, key):

2913

"""Return a list representing options.

2914

2915

e.g. ['foo', 'bar']

2916

"""

2917

node = self._get_node(key)

2918

options = [self._get_method(node)]

2919

if node[2][0] == 'N':

2920

options.append('no-eol')

2921

return options

2922

2923

def get_parent_map(self, keys):

2924

"""Get a map of the parents of keys.

2925

2926

:param keys: The keys to look up parents for.

2927

:return: A mapping from keys to parents. Absent keys are absent from

2928

the mapping.

2929

"""

2930

self._check_read()

2931

nodes = self._get_entries(keys)

2932

result = {}

2933

if self._parents:

2934

for node in nodes:

2935

result[node[1]] = node[3][0]

2936

else:

2937

for node in nodes:

2938

result[node[1]] = None

2939

return result

2940

2941

def get_position(self, key):

2942

"""Return details needed to access the version.

2943

2944

:return: a tuple (index, data position, size) to hand to the access

2945

logic to get the record.

2946

"""

2947

node = self._get_node(key)

2948

return self._node_to_position(node)

2949

2950

has_key = _mod_index._has_key_from_parent_map

2951

2952

def keys(self):

2953

"""Get all the keys in the collection.

2954

2955

The keys are not ordered.

2956

"""

2957

self._check_read()

2958

return [node[1] for node in self._graph_index.iter_all_entries()]

2959

2960

missing_keys = _mod_index._missing_keys_from_parent_map

2961

2962

def _node_to_position(self, node):

2963

"""Convert an index value to position details."""

2964

bits = node[2][1:].split(' ')

2965

return node[0], int(bits[0]), int(bits[1])

2966

2967

def _sort_keys_by_io(self, keys, positions):

2968

"""Figure out an optimal order to read the records for the given keys.

2969

2970

Sort keys, grouped by index and sorted by position.

2971

2972

:param keys: A list of keys whose records we want to read. This will be

2973

sorted 'in-place'.

2974

:param positions: A dict, such as the one returned by

2975

_get_components_positions()

2976

:return: None

2977

"""

2978

def get_index_memo(key):

2979

# index_memo is at offset [1]. It is made up of (GraphIndex,

2980

# position, size). GI is an object, which will be unique for each

2981

# pack file. This causes us to group by pack file, then sort by

2982

# position. Size doesn't matter, but it isn't worth breaking up the

2983

# tuple.

2984

return positions[key][1]

2985

return keys.sort(key=get_index_memo)

2986

2987

_get_total_build_size = _get_total_build_size

2988

2989

2990

class _KnitKeyAccess(object):

2991

"""Access to records in .knit files."""

2992

2993

def __init__(self, transport, mapper):

2994

"""Create a _KnitKeyAccess with transport and mapper.

2995

2996

:param transport: The transport the access object is rooted at.

2997

:param mapper: The mapper used to map keys to .knit files.

2998

"""

2999

self._transport = transport

3000

self._mapper = mapper

3001

3002

def add_raw_records(self, key_sizes, raw_data):

3003

"""Add raw knit bytes to a storage area.

3004

3005

The data is spooled to the container writer in one bytes-record per

3006

raw data item.

3007

3008

:param sizes: An iterable of tuples containing the key and size of each

3009

raw data segment.

3010

:param raw_data: A bytestring containing the data.

3011

:return: A list of memos to retrieve the record later. Each memo is an

3012

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3013

length), where the key is the record key.

3014

"""

3015

if type(raw_data) != str:

3016

raise AssertionError(

3017

'data must be plain bytes was %s' % type(raw_data))

3018

result = []

3019

offset = 0

3020

# TODO: This can be tuned for writing to sftp and other servers where

3021

# append() is relatively expensive by grouping the writes to each key

3022

# prefix.

3023

for key, size in key_sizes:

3024

path = self._mapper.map(key)

3025

try:

3026

base = self._transport.append_bytes(path + '.knit',

3027

raw_data[offset:offset+size])

3028

except errors.NoSuchFile:

3029

self._transport.mkdir(osutils.dirname(path))

3030

base = self._transport.append_bytes(path + '.knit',

3031

raw_data[offset:offset+size])

3032

# if base == 0:

3033

# chmod.

3034

offset += size

3035

result.append((key, base, size))

3036

return result

3037

3038

def get_raw_records(self, memos_for_retrieval):

3039

"""Get the raw bytes for a records.

3040

3041

:param memos_for_retrieval: An iterable containing the access memo for

3042

retrieving the bytes.

3043

:return: An iterator over the bytes of the records.

3044

"""

3045

# first pass, group into same-index request to minimise readv's issued.

3046

request_lists = []

3047

current_prefix = None

3048

for (key, offset, length) in memos_for_retrieval:

3049

if current_prefix == key[:-1]:

3050

current_list.append((offset, length))

3051

else:

3052

if current_prefix is not None:

3053

request_lists.append((current_prefix, current_list))

3054

current_prefix = key[:-1]

3055

current_list = [(offset, length)]

3056

# handle the last entry

3057

if current_prefix is not None:

3058

request_lists.append((current_prefix, current_list))

3059

for prefix, read_vector in request_lists:

3060

path = self._mapper.map(prefix) + '.knit'

3061

for pos, data in self._transport.readv(path, read_vector):

3062

yield data

3063

3064

3065

class _DirectPackAccess(object):

3066

"""Access to data in one or more packs with less translation."""

3067

3068

def __init__(self, index_to_packs, reload_func=None):

3069

"""Create a _DirectPackAccess object.

3070

3071

:param index_to_packs: A dict mapping index objects to the transport

3072

and file names for obtaining data.

3073

:param reload_func: A function to call if we determine that the pack

3074

files have moved and we need to reload our caches. See

3075

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

3076

"""

3077

self._container_writer = None

3078

self._write_index = None

3079

self._indices = index_to_packs

3080

self._reload_func = reload_func

3081

3082

def add_raw_records(self, key_sizes, raw_data):

3083

"""Add raw knit bytes to a storage area.

3084

3085

The data is spooled to the container writer in one bytes-record per

3086

raw data item.

3087

3088

:param sizes: An iterable of tuples containing the key and size of each

3089

raw data segment.

3090

:param raw_data: A bytestring containing the data.

3091

:return: A list of memos to retrieve the record later. Each memo is an

3092

opaque index memo. For _DirectPackAccess the memo is (index, pos,

3093

length), where the index field is the write_index object supplied

3094

to the PackAccess object.

3095

"""

3096

if type(raw_data) != str:

3097

raise AssertionError(

3098

'data must be plain bytes was %s' % type(raw_data))

3099

result = []

3100

offset = 0

3101

for key, size in key_sizes:

3102

p_offset, p_length = self._container_writer.add_bytes_record(

3103

raw_data[offset:offset+size], [])

3104

offset += size

3105

result.append((self._write_index, p_offset, p_length))

3106

return result

3107

3108

def get_raw_records(self, memos_for_retrieval):

3109

"""Get the raw bytes for a records.

3110

3111

:param memos_for_retrieval: An iterable containing the (index, pos,

3112

length) memo for retrieving the bytes. The Pack access method

3113

looks up the pack to use for a given record in its index_to_pack

3114

map.

3115

:return: An iterator over the bytes of the records.

3116

"""

3117

# first pass, group into same-index requests

3118

request_lists = []

3119

current_index = None

3120

for (index, offset, length) in memos_for_retrieval:

3121

if current_index == index:

3122

current_list.append((offset, length))

3123

else:

3124

if current_index is not None:

3125

request_lists.append((current_index, current_list))

3126

current_index = index

3127

current_list = [(offset, length)]

3128

# handle the last entry

3129

if current_index is not None:

3130

request_lists.append((current_index, current_list))

3131

for index, offsets in request_lists:

3132

try:

3133

transport, path = self._indices[index]

3134

except KeyError:

3135

# A KeyError here indicates that someone has triggered an index

3136

# reload, and this index has gone missing, we need to start

3137

# over.

3138

if self._reload_func is None:

3139

# If we don't have a _reload_func there is nothing that can

3140

# be done

3141

raise

3142

raise errors.RetryWithNewPacks(index,

3143

reload_occurred=True,

3144

exc_info=sys.exc_info())

3145

try:

3146

reader = pack.make_readv_reader(transport, path, offsets)

3147

for names, read_func in reader.iter_records():

3148

yield read_func(None)

3149

except errors.NoSuchFile:

3150

# A NoSuchFile error indicates that a pack file has gone

3151

# missing on disk, we need to trigger a reload, and start over.

3152

if self._reload_func is None:

3153

raise

3154

raise errors.RetryWithNewPacks(transport.abspath(path),

3155

reload_occurred=False,

3156

exc_info=sys.exc_info())

3157

3158

def set_writer(self, writer, index, transport_packname):

3159

"""Set a writer to use for adding data."""

3160

if index is not None:

3161

self._indices[index] = transport_packname

3162

self._container_writer = writer

3163

self._write_index = index

3164

3165

def reload_or_raise(self, retry_exc):

3166

"""Try calling the reload function, or re-raise the original exception.

3167

3168

This should be called after _DirectPackAccess raises a

3169

RetryWithNewPacks exception. This function will handle the common logic

3170

of determining when the error is fatal versus being temporary.

3171

It will also make sure that the original exception is raised, rather

3172

than the RetryWithNewPacks exception.

3173

3174

If this function returns, then the calling function should retry

3175

whatever operation was being performed. Otherwise an exception will

3176

be raised.

3177

3178

:param retry_exc: A RetryWithNewPacks exception.

3179

"""

3180

is_error = False

3181

if self._reload_func is None:

3182

is_error = True

3183

elif not self._reload_func():

3184

# The reload claimed that nothing changed

3185

if not retry_exc.reload_occurred:

3186

# If there wasn't an earlier reload, then we really were

3187

# expecting to find changes. We didn't find them, so this is a

3188

# hard error

3189

is_error = True

3190

if is_error:

3191

exc_class, exc_value, exc_traceback = retry_exc.exc_info

3192

raise exc_class, exc_value, exc_traceback

3193

3194

3195

# Deprecated, use PatienceSequenceMatcher instead

3196

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

3197

3198

3199

def annotate_knit(knit, revision_id):

3200

"""Annotate a knit with no cached annotations.

3201

3202

This implementation is for knits with no cached annotations.

3203

It will work for knits with cached annotations, but this is not

3204

recommended.

3205

"""

3206

annotator = _KnitAnnotator(knit)

3207

return iter(annotator.annotate(revision_id))

3208

3209

3210

class _KnitAnnotator(object):

3211

"""Build up the annotations for a text."""

3212

3213

def __init__(self, knit):

3214

self._knit = knit

3215

3216

# Content objects, differs from fulltexts because of how final newlines

3217

# are treated by knits. the content objects here will always have a

3218

# final newline

3219

self._fulltext_contents = {}

3220

3221

# Annotated lines of specific revisions

3222

self._annotated_lines = {}

3223

3224

# Track the raw data for nodes that we could not process yet.

3225

# This maps the revision_id of the base to a list of children that will

3226

# annotated from it.

3227

self._pending_children = {}

3228

3229

# Nodes which cannot be extracted

3230

self._ghosts = set()

3231

3232

# Track how many children this node has, so we know if we need to keep

3233

# it

3234

self._annotate_children = {}

3235

self._compression_children = {}

3236

3237

self._all_build_details = {}

3238

# The children => parent revision_id graph

3239

self._revision_id_graph = {}

3240

3241

self._heads_provider = None

3242

3243

self._nodes_to_keep_annotations = set()

3244

self._generations_until_keep = 100

3245

3246

def set_generations_until_keep(self, value):

3247

"""Set the number of generations before caching a node.

3248

3249

Setting this to -1 will cache every merge node, setting this higher

3250

will cache fewer nodes.

3251

"""

3252

self._generations_until_keep = value

3253

3254

def _add_fulltext_content(self, revision_id, content_obj):

3255

self._fulltext_contents[revision_id] = content_obj

3256

# TODO: jam 20080305 It might be good to check the sha1digest here

3257

return content_obj.text()

3258

3259

def _check_parents(self, child, nodes_to_annotate):

3260

"""Check if all parents have been processed.

3261

3262

:param child: A tuple of (rev_id, parents, raw_content)

3263

:param nodes_to_annotate: If child is ready, add it to

3264

nodes_to_annotate, otherwise put it back in self._pending_children

3265

"""

3266

for parent_id in child[1]:

3267

if (parent_id not in self._annotated_lines):

3268

# This parent is present, but another parent is missing

3269

self._pending_children.setdefault(parent_id,

3270

[]).append(child)

3271

break

3272

else:

3273

# This one is ready to be processed

3274

nodes_to_annotate.append(child)

3275

3276

def _add_annotation(self, revision_id, fulltext, parent_ids,

3277

left_matching_blocks=None):

3278

"""Add an annotation entry.

3279

3280

All parents should already have been annotated.

3281

:return: A list of children that now have their parents satisfied.

3282

"""

3283

a = self._annotated_lines

3284

annotated_parent_lines = [a[p] for p in parent_ids]

3285

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3286

fulltext, revision_id, left_matching_blocks,

3287

heads_provider=self._get_heads_provider()))

3288

self._annotated_lines[revision_id] = annotated_lines

3289

for p in parent_ids:

3290

ann_children = self._annotate_children[p]

3291

ann_children.remove(revision_id)

3292

if (not ann_children

3293

and p not in self._nodes_to_keep_annotations):

3294

del self._annotated_lines[p]

3295

del self._all_build_details[p]

3296

if p in self._fulltext_contents:

3297

del self._fulltext_contents[p]

3298

# Now that we've added this one, see if there are any pending

3299

# deltas to be done, certainly this parent is finished

3300

nodes_to_annotate = []

3301

for child in self._pending_children.pop(revision_id, []):

3302

self._check_parents(child, nodes_to_annotate)

3303

return nodes_to_annotate

3304

3305

def _get_build_graph(self, key):

3306

"""Get the graphs for building texts and annotations.

3307

3308

The data you need for creating a full text may be different than the

3309

data you need to annotate that text. (At a minimum, you need both

3310

parents to create an annotation, but only need 1 parent to generate the

3311

fulltext.)

3312

3313

:return: A list of (key, index_memo) records, suitable for

3314

passing to read_records_iter to start reading in the raw data fro/

3315

the pack file.

3316

"""

3317

if key in self._annotated_lines:

3318

# Nothing to do

3319

return []

3320

pending = set([key])

3321

records = []

3322

generation = 0

3323

kept_generation = 0

3324

while pending:

3325

# get all pending nodes

3326

generation += 1

3327

this_iteration = pending

3328

build_details = self._knit._index.get_build_details(this_iteration)

3329

self._all_build_details.update(build_details)

3330

# new_nodes = self._knit._index._get_entries(this_iteration)

3331

pending = set()

3332

for key, details in build_details.iteritems():

3333

(index_memo, compression_parent, parents,

3334

record_details) = details

3335

self._revision_id_graph[key] = parents

3336

records.append((key, index_memo))

3337

# Do we actually need to check _annotated_lines?

3338

pending.update(p for p in parents

3339

if p not in self._all_build_details)

3340

if compression_parent:

3341

self._compression_children.setdefault(compression_parent,

3342

[]).append(key)

3343

if parents:

3344

for parent in parents:

3345

self._annotate_children.setdefault(parent,

3346

[]).append(key)

3347

num_gens = generation - kept_generation

3348

if ((num_gens >= self._generations_until_keep)

3349

and len(parents) > 1):

3350

kept_generation = generation

3351

self._nodes_to_keep_annotations.add(key)

3352

3353

missing_versions = this_iteration.difference(build_details.keys())

3354

self._ghosts.update(missing_versions)

3355

for missing_version in missing_versions:

3356

# add a key, no parents

3357

self._revision_id_graph[missing_version] = ()

3358

pending.discard(missing_version) # don't look for it

3359

if self._ghosts.intersection(self._compression_children):

3360

raise KnitCorrupt(

3361

"We cannot have nodes which have a ghost compression parent:\n"

3362

"ghosts: %r\n"

3363

"compression children: %r"

3364

% (self._ghosts, self._compression_children))

3365

# Cleanout anything that depends on a ghost so that we don't wait for

3366

# the ghost to show up

3367

for node in self._ghosts:

3368

if node in self._annotate_children:

3369

# We won't be building this node

3370

del self._annotate_children[node]

3371

# Generally we will want to read the records in reverse order, because

3372

# we find the parent nodes after the children

3373

records.reverse()

3374

return records

3375

3376

def _annotate_records(self, records):

3377

"""Build the annotations for the listed records."""

3378

# We iterate in the order read, rather than a strict order requested

3379

# However, process what we can, and put off to the side things that

3380

# still need parents, cleaning them up when those parents are

3381

# processed.

3382

for (rev_id, record,

3383

digest) in self._knit._read_records_iter(records):

3384

if rev_id in self._annotated_lines:

3385

continue

3386

parent_ids = self._revision_id_graph[rev_id]

3387

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3388

details = self._all_build_details[rev_id]

3389

(index_memo, compression_parent, parents,

3390

record_details) = details

3391

nodes_to_annotate = []

3392

# TODO: Remove the punning between compression parents, and

3393

# parent_ids, we should be able to do this without assuming

3394

# the build order

3395

if len(parent_ids) == 0:

3396

# There are no parents for this node, so just add it

3397

# TODO: This probably needs to be decoupled

3398

fulltext_content, delta = self._knit._factory.parse_record(

3399

rev_id, record, record_details, None)

3400

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3401

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3402

parent_ids, left_matching_blocks=None))

3403

else:

3404

child = (rev_id, parent_ids, record)

3405

# Check if all the parents are present

3406

self._check_parents(child, nodes_to_annotate)

3407

while nodes_to_annotate:

3408

# Should we use a queue here instead of a stack?

3409

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3410

(index_memo, compression_parent, parents,

3411

record_details) = self._all_build_details[rev_id]

3412

blocks = None

3413

if compression_parent is not None:

3414

comp_children = self._compression_children[compression_parent]

3415

if rev_id not in comp_children:

3416

raise AssertionError("%r not in compression children %r"

3417

% (rev_id, comp_children))

3418

# If there is only 1 child, it is safe to reuse this

3419

# content

3420

reuse_content = (len(comp_children) == 1

3421

and compression_parent not in

3422

self._nodes_to_keep_annotations)

3423

if reuse_content:

3424

# Remove it from the cache since it will be changing

3425

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3426

# Make sure to copy the fulltext since it might be

3427

# modified

3428

parent_fulltext = list(parent_fulltext_content.text())

3429

else:

3430

parent_fulltext_content = self._fulltext_contents[compression_parent]

3431

parent_fulltext = parent_fulltext_content.text()

3432

comp_children.remove(rev_id)

3433

fulltext_content, delta = self._knit._factory.parse_record(

3434

rev_id, record, record_details,

3435

parent_fulltext_content,

3436

copy_base_content=(not reuse_content))

3437

fulltext = self._add_fulltext_content(rev_id,

3438

fulltext_content)

3439

if compression_parent == parent_ids[0]:

3440

# the compression_parent is the left parent, so we can

3441

# re-use the delta

3442

blocks = KnitContent.get_line_delta_blocks(delta,

3443

parent_fulltext, fulltext)

3444

else:

3445

fulltext_content = self._knit._factory.parse_fulltext(

3446

record, rev_id)

3447

fulltext = self._add_fulltext_content(rev_id,

3448

fulltext_content)

3449

nodes_to_annotate.extend(

3450

self._add_annotation(rev_id, fulltext, parent_ids,

3451

left_matching_blocks=blocks))

3452

3453

def _get_heads_provider(self):

3454

"""Create a heads provider for resolving ancestry issues."""

3455

if self._heads_provider is not None:

3456

return self._heads_provider

3457

parent_provider = _mod_graph.DictParentsProvider(

3458

self._revision_id_graph)

3459

graph_obj = _mod_graph.Graph(parent_provider)

3460

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3461

self._heads_provider = head_cache

3462

return head_cache

3463

3464

def annotate(self, key):

3465

"""Return the annotated fulltext at the given key.

3466

3467

:param key: The key to annotate.

3468

"""

3469

if len(self._knit._fallback_vfs) > 0:

3470

# stacked knits can't use the fast path at present.

3471

return self._simple_annotate(key)

3472

while True:

3473

try:

3474

records = self._get_build_graph(key)

3475

if key in self._ghosts:

3476

raise errors.RevisionNotPresent(key, self._knit)

3477

self._annotate_records(records)

3478

return self._annotated_lines[key]

3479

except errors.RetryWithNewPacks, e:

3480

self._knit._access.reload_or_raise(e)

3481

# The cached build_details are no longer valid

3482

self._all_build_details.clear()

3483

3484

def _simple_annotate(self, key):

3485

"""Return annotated fulltext, rediffing from the full texts.

3486

3487

This is slow but makes no assumptions about the repository

3488

being able to produce line deltas.

3489

"""

3490

# TODO: this code generates a parent maps of present ancestors; it

3491

# could be split out into a separate method, and probably should use

3492

# iter_ancestry instead. -- mbp and robertc 20080704

3493

graph = _mod_graph.Graph(self._knit)

3494

head_cache = _mod_graph.FrozenHeadsCache(graph)

3495

search = graph._make_breadth_first_searcher([key])

3496

keys = set()

3497

while True:

3498

try:

3499

present, ghosts = search.next_with_ghosts()

3500

except StopIteration:

3501

break

3502

keys.update(present)

3503

parent_map = self._knit.get_parent_map(keys)

3504

parent_cache = {}

3505

reannotate = annotate.reannotate

3506

for record in self._knit.get_record_stream(keys, 'topological', True):

3507

key = record.key

3508

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

3509

parents = parent_map[key]

3510

if parents is not None:

3511

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

3512

else:

3513

parent_lines = []

3514

parent_cache[key] = list(

3515

reannotate(parent_lines, fulltext, key, None, head_cache))

3516

try:

3517

return parent_cache[key]

3518

except KeyError, e:

3519

raise errors.RevisionNotPresent(key, self._knit)

3520

3521

3522

try:

3523

from bzrlib._knit_load_data_c import _load_data_c as _load_data

3524

except ImportError:

3525

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »