/brz/remove-bazaar : revision 3549.1.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2008-07-17 05:51:29 UTC
mto: (3606.1.1 prepare-1.6)
mto: This revision was merged to the branch mainline in revision 3551.
Revision ID: mbp@sourcefrog.net-20080717055129-pc3im88b6aou5fv4

rename push --reference to --stacked-on

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_add_fallback_repository.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import urllib

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

100

KnitCorrupt,

101

KnitHeaderError,

102

RevisionNotPresent,

103

RevisionAlreadyPresent,

104

)

105

from bzrlib.graph import Graph

106

from bzrlib.osutils import (

107

contains_whitespace,

108

contains_linebreaks,

109

sha_string,

110

sha_strings,

111

split_lines,

112

)

113

from bzrlib.tsort import topo_sort

114

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

115

import bzrlib.ui

116

from bzrlib.versionedfile import (

117

AbsentContentFactory,

118

adapter_registry,

119

ConstantMapper,

120

ContentFactory,

121

FulltextContentFactory,

122

VersionedFile,

123

VersionedFiles,

124

)

125

import bzrlib.weave

126

127

128

# TODO: Split out code specific to this format into an associated object.

129

130

# TODO: Can we put in some kind of value to check that the index and data

131

# files belong together?

132

133

# TODO: accommodate binaries, perhaps by storing a byte count

134

135

# TODO: function to check whole file

136

137

# TODO: atomically append data, then measure backwards from the cursor

138

# position after writing to work out where it was located. we may need to

139

# bypass python file buffering.

140

141

DATA_SUFFIX = '.knit'

142

INDEX_SUFFIX = '.kndx'

143

144

145

class KnitAdapter(object):

146

"""Base class for knit record adaption."""

147

148

def __init__(self, basis_vf):

149

"""Create an adapter which accesses full texts from basis_vf.

150

151

:param basis_vf: A versioned file to access basis texts of deltas from.

152

May be None for adapters that do not need to access basis texts.

153

"""

154

self._data = KnitVersionedFiles(None, None)

155

self._annotate_factory = KnitAnnotateFactory()

156

self._plain_factory = KnitPlainFactory()

157

self._basis_vf = basis_vf

158

159

160

class FTAnnotatedToUnannotated(KnitAdapter):

161

"""An adapter from FT annotated knits to unannotated ones."""

162

163

def get_bytes(self, factory, annotated_compressed_bytes):

164

rec, contents = \

165

self._data._parse_record_unchecked(annotated_compressed_bytes)

166

content = self._annotate_factory.parse_fulltext(contents, rec[1])

167

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

168

return bytes

169

170

171

class DeltaAnnotatedToUnannotated(KnitAdapter):

172

"""An adapter for deltas from annotated to unannotated."""

173

174

def get_bytes(self, factory, annotated_compressed_bytes):

175

rec, contents = \

176

self._data._parse_record_unchecked(annotated_compressed_bytes)

177

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

178

plain=True)

179

contents = self._plain_factory.lower_line_delta(delta)

180

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

181

return bytes

182

183

184

class FTAnnotatedToFullText(KnitAdapter):

185

"""An adapter from FT annotated knits to unannotated ones."""

186

187

def get_bytes(self, factory, annotated_compressed_bytes):

188

rec, contents = \

189

self._data._parse_record_unchecked(annotated_compressed_bytes)

190

content, delta = self._annotate_factory.parse_record(factory.key[-1],

191

contents, factory._build_details, None)

192

return ''.join(content.text())

193

194

195

class DeltaAnnotatedToFullText(KnitAdapter):

196

"""An adapter for deltas from annotated to unannotated."""

197

198

def get_bytes(self, factory, annotated_compressed_bytes):

199

rec, contents = \

200

self._data._parse_record_unchecked(annotated_compressed_bytes)

201

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

202

plain=True)

203

compression_parent = factory.parents[0]

204

basis_entry = self._basis_vf.get_record_stream(

205

[compression_parent], 'unordered', True).next()

206

if basis_entry.storage_kind == 'absent':

207

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

208

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

209

# Manually apply the delta because we have one annotated content and

210

# one plain.

211

basis_content = PlainKnitContent(basis_lines, compression_parent)

212

basis_content.apply_delta(delta, rec[1])

213

basis_content._should_strip_eol = factory._build_details[1]

214

return ''.join(basis_content.text())

215

216

217

class FTPlainToFullText(KnitAdapter):

218

"""An adapter from FT plain knits to unannotated ones."""

219

220

def get_bytes(self, factory, compressed_bytes):

221

rec, contents = \

222

self._data._parse_record_unchecked(compressed_bytes)

223

content, delta = self._plain_factory.parse_record(factory.key[-1],

224

contents, factory._build_details, None)

225

return ''.join(content.text())

226

227

228

class DeltaPlainToFullText(KnitAdapter):

229

"""An adapter for deltas from annotated to unannotated."""

230

231

def get_bytes(self, factory, compressed_bytes):

232

rec, contents = \

233

self._data._parse_record_unchecked(compressed_bytes)

234

delta = self._plain_factory.parse_line_delta(contents, rec[1])

235

compression_parent = factory.parents[0]

236

# XXX: string splitting overhead.

237

basis_entry = self._basis_vf.get_record_stream(

238

[compression_parent], 'unordered', True).next()

239

if basis_entry.storage_kind == 'absent':

240

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

241

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

242

basis_content = PlainKnitContent(basis_lines, compression_parent)

243

# Manually apply the delta because we have one annotated content and

244

# one plain.

245

content, _ = self._plain_factory.parse_record(rec[1], contents,

246

factory._build_details, basis_content)

247

return ''.join(content.text())

248

249

250

class KnitContentFactory(ContentFactory):

251

"""Content factory for streaming from knits.

252

253

:seealso ContentFactory:

254

"""

255

256

def __init__(self, key, parents, build_details, sha1, raw_record,

257

annotated, knit=None):

258

"""Create a KnitContentFactory for key.

259

260

:param key: The key.

261

:param parents: The parents.

262

:param build_details: The build details as returned from

263

get_build_details.

264

:param sha1: The sha1 expected from the full text of this object.

265

:param raw_record: The bytes of the knit data from disk.

266

:param annotated: True if the raw data is annotated.

267

"""

268

ContentFactory.__init__(self)

269

self.sha1 = sha1

270

self.key = key

271

self.parents = parents

272

if build_details[0] == 'line-delta':

273

kind = 'delta'

274

else:

275

kind = 'ft'

276

if annotated:

277

annotated_kind = 'annotated-'

278

else:

279

annotated_kind = ''

280

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

281

self._raw_record = raw_record

282

self._build_details = build_details

283

self._knit = knit

284

285

def get_bytes_as(self, storage_kind):

286

if storage_kind == self.storage_kind:

287

return self._raw_record

288

if storage_kind == 'fulltext' and self._knit is not None:

289

return self._knit.get_text(self.key[0])

290

else:

291

raise errors.UnavailableRepresentation(self.key, storage_kind,

292

self.storage_kind)

293

294

295

class KnitContent(object):

296

"""Content of a knit version to which deltas can be applied.

297

298

This is always stored in memory as a list of lines with \n at the end,

299

plus a flag saying if the final ending is really there or not, because that

300

corresponds to the on-disk knit representation.

301

"""

302

303

def __init__(self):

304

self._should_strip_eol = False

305

306

def apply_delta(self, delta, new_version_id):

307

"""Apply delta to this object to become new_version_id."""

308

raise NotImplementedError(self.apply_delta)

309

310

def line_delta_iter(self, new_lines):

311

"""Generate line-based delta from this content to new_lines."""

312

new_texts = new_lines.text()

313

old_texts = self.text()

314

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

315

for tag, i1, i2, j1, j2 in s.get_opcodes():

316

if tag == 'equal':

317

continue

318

# ofrom, oto, length, data

319

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

320

321

def line_delta(self, new_lines):

322

return list(self.line_delta_iter(new_lines))

323

324

@staticmethod

325

def get_line_delta_blocks(knit_delta, source, target):

326

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

327

target_len = len(target)

328

s_pos = 0

329

t_pos = 0

330

for s_begin, s_end, t_len, new_text in knit_delta:

331

true_n = s_begin - s_pos

332

n = true_n

333

if n > 0:

334

# knit deltas do not provide reliable info about whether the

335

# last line of a file matches, due to eol handling.

336

if source[s_pos + n -1] != target[t_pos + n -1]:

337

n-=1

338

if n > 0:

339

yield s_pos, t_pos, n

340

t_pos += t_len + true_n

341

s_pos = s_end

342

n = target_len - t_pos

343

if n > 0:

344

if source[s_pos + n -1] != target[t_pos + n -1]:

345

n-=1

346

if n > 0:

347

yield s_pos, t_pos, n

348

yield s_pos + (target_len - t_pos), target_len, 0

349

350

351

class AnnotatedKnitContent(KnitContent):

352

"""Annotated content."""

353

354

def __init__(self, lines):

355

KnitContent.__init__(self)

356

self._lines = lines

357

358

def annotate(self):

359

"""Return a list of (origin, text) for each content line."""

360

lines = self._lines[:]

361

if self._should_strip_eol:

362

origin, last_line = lines[-1]

363

lines[-1] = (origin, last_line.rstrip('\n'))

364

return lines

365

366

def apply_delta(self, delta, new_version_id):

367

"""Apply delta to this object to become new_version_id."""

368

offset = 0

369

lines = self._lines

370

for start, end, count, delta_lines in delta:

371

lines[offset+start:offset+end] = delta_lines

372

offset = offset + (start - end) + count

373

374

def text(self):

375

try:

376

lines = [text for origin, text in self._lines]

377

except ValueError, e:

378

# most commonly (only?) caused by the internal form of the knit

379

# missing annotation information because of a bug - see thread

380

# around 20071015

381

raise KnitCorrupt(self,

382

"line in annotated knit missing annotation information: %s"

383

% (e,))

384

if self._should_strip_eol:

385

lines[-1] = lines[-1].rstrip('\n')

386

return lines

387

388

def copy(self):

389

return AnnotatedKnitContent(self._lines[:])

390

391

392

class PlainKnitContent(KnitContent):

393

"""Unannotated content.

394

395

When annotate[_iter] is called on this content, the same version is reported

396

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

397

objects.

398

"""

399

400

def __init__(self, lines, version_id):

401

KnitContent.__init__(self)

402

self._lines = lines

403

self._version_id = version_id

404

405

def annotate(self):

406

"""Return a list of (origin, text) for each content line."""

407

return [(self._version_id, line) for line in self._lines]

408

409

def apply_delta(self, delta, new_version_id):

410

"""Apply delta to this object to become new_version_id."""

411

offset = 0

412

lines = self._lines

413

for start, end, count, delta_lines in delta:

414

lines[offset+start:offset+end] = delta_lines

415

offset = offset + (start - end) + count

416

self._version_id = new_version_id

417

418

def copy(self):

419

return PlainKnitContent(self._lines[:], self._version_id)

420

421

def text(self):

422

lines = self._lines

423

if self._should_strip_eol:

424

lines = lines[:]

425

lines[-1] = lines[-1].rstrip('\n')

426

return lines

427

428

429

class _KnitFactory(object):

430

"""Base class for common Factory functions."""

431

432

def parse_record(self, version_id, record, record_details,

433

base_content, copy_base_content=True):

434

"""Parse a record into a full content object.

435

436

:param version_id: The official version id for this content

437

:param record: The data returned by read_records_iter()

438

:param record_details: Details about the record returned by

439

get_build_details

440

:param base_content: If get_build_details returns a compression_parent,

441

you must return a base_content here, else use None

442

:param copy_base_content: When building from the base_content, decide

443

you can either copy it and return a new object, or modify it in

444

place.

445

:return: (content, delta) A Content object and possibly a line-delta,

446

delta may be None

447

"""

448

method, noeol = record_details

449

if method == 'line-delta':

450

if copy_base_content:

451

content = base_content.copy()

452

else:

453

content = base_content

454

delta = self.parse_line_delta(record, version_id)

455

content.apply_delta(delta, version_id)

456

else:

457

content = self.parse_fulltext(record, version_id)

458

delta = None

459

content._should_strip_eol = noeol

460

return (content, delta)

461

462

463

class KnitAnnotateFactory(_KnitFactory):

464

"""Factory for creating annotated Content objects."""

465

466

annotated = True

467

468

def make(self, lines, version_id):

469

num_lines = len(lines)

470

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

471

472

def parse_fulltext(self, content, version_id):

473

"""Convert fulltext to internal representation

474

475

fulltext content is of the format

476

revid(utf8) plaintext\n

477

internal representation is of the format:

478

(revid, plaintext)

479

"""

480

# TODO: jam 20070209 The tests expect this to be returned as tuples,

481

# but the code itself doesn't really depend on that.

482

# Figure out a way to not require the overhead of turning the

483

# list back into tuples.

484

lines = [tuple(line.split(' ', 1)) for line in content]

485

return AnnotatedKnitContent(lines)

486

487

def parse_line_delta_iter(self, lines):

488

return iter(self.parse_line_delta(lines))

489

490

def parse_line_delta(self, lines, version_id, plain=False):

491

"""Convert a line based delta into internal representation.

492

493

line delta is in the form of:

494

intstart intend intcount

495

1..count lines:

496

revid(utf8) newline\n

497

internal representation is

498

(start, end, count, [1..count tuples (revid, newline)])

499

500

:param plain: If True, the lines are returned as a plain

501

list without annotations, not as a list of (origin, content) tuples, i.e.

502

(start, end, count, [1..count newline])

503

"""

504

result = []

505

lines = iter(lines)

506

next = lines.next

507

508

cache = {}

509

def cache_and_return(line):

510

origin, text = line.split(' ', 1)

511

return cache.setdefault(origin, origin), text

512

513

# walk through the lines parsing.

514

# Note that the plain test is explicitly pulled out of the

515

# loop to minimise any performance impact

516

if plain:

517

for header in lines:

518

start, end, count = [int(n) for n in header.split(',')]

519

contents = [next().split(' ', 1)[1] for i in xrange(count)]

520

result.append((start, end, count, contents))

521

else:

522

for header in lines:

523

start, end, count = [int(n) for n in header.split(',')]

524

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

525

result.append((start, end, count, contents))

526

return result

527

528

def get_fulltext_content(self, lines):

529

"""Extract just the content lines from a fulltext."""

530

return (line.split(' ', 1)[1] for line in lines)

531

532

def get_linedelta_content(self, lines):

533

"""Extract just the content from a line delta.

534

535

This doesn't return all of the extra information stored in a delta.

536

Only the actual content lines.

537

"""

538

lines = iter(lines)

539

next = lines.next

540

for header in lines:

541

header = header.split(',')

542

count = int(header[2])

543

for i in xrange(count):

544

origin, text = next().split(' ', 1)

545

yield text

546

547

def lower_fulltext(self, content):

548

"""convert a fulltext content record into a serializable form.

549

550

see parse_fulltext which this inverts.

551

"""

552

# TODO: jam 20070209 We only do the caching thing to make sure that

553

# the origin is a valid utf-8 line, eventually we could remove it

554

return ['%s %s' % (o, t) for o, t in content._lines]

555

556

def lower_line_delta(self, delta):

557

"""convert a delta into a serializable form.

558

559

See parse_line_delta which this inverts.

560

"""

561

# TODO: jam 20070209 We only do the caching thing to make sure that

562

# the origin is a valid utf-8 line, eventually we could remove it

563

out = []

564

for start, end, c, lines in delta:

565

out.append('%d,%d,%d\n' % (start, end, c))

566

out.extend(origin + ' ' + text

567

for origin, text in lines)

568

return out

569

570

def annotate(self, knit, key):

571

content = knit._get_content(key)

572

# adjust for the fact that serialised annotations are only key suffixes

573

# for this factory.

574

if type(key) == tuple:

575

prefix = key[:-1]

576

origins = content.annotate()

577

result = []

578

for origin, line in origins:

579

result.append((prefix + (origin,), line))

580

return result

581

else:

582

# XXX: This smells a bit. Why would key ever be a non-tuple here?

583

# Aren't keys defined to be tuples? -- spiv 20080618

584

return content.annotate()

585

586

587

class KnitPlainFactory(_KnitFactory):

588

"""Factory for creating plain Content objects."""

589

590

annotated = False

591

592

def make(self, lines, version_id):

593

return PlainKnitContent(lines, version_id)

594

595

def parse_fulltext(self, content, version_id):

596

"""This parses an unannotated fulltext.

597

598

Note that this is not a noop - the internal representation

599

has (versionid, line) - its just a constant versionid.

600

"""

601

return self.make(content, version_id)

602

603

def parse_line_delta_iter(self, lines, version_id):

604

cur = 0

605

num_lines = len(lines)

606

while cur < num_lines:

607

header = lines[cur]

608

cur += 1

609

start, end, c = [int(n) for n in header.split(',')]

610

yield start, end, c, lines[cur:cur+c]

611

cur += c

612

613

def parse_line_delta(self, lines, version_id):

614

return list(self.parse_line_delta_iter(lines, version_id))

615

616

def get_fulltext_content(self, lines):

617

"""Extract just the content lines from a fulltext."""

618

return iter(lines)

619

620

def get_linedelta_content(self, lines):

621

"""Extract just the content from a line delta.

622

623

This doesn't return all of the extra information stored in a delta.

624

Only the actual content lines.

625

"""

626

lines = iter(lines)

627

next = lines.next

628

for header in lines:

629

header = header.split(',')

630

count = int(header[2])

631

for i in xrange(count):

632

yield next()

633

634

def lower_fulltext(self, content):

635

return content.text()

636

637

def lower_line_delta(self, delta):

638

out = []

639

for start, end, c, lines in delta:

640

out.append('%d,%d,%d\n' % (start, end, c))

641

out.extend(lines)

642

return out

643

644

def annotate(self, knit, key):

645

annotator = _KnitAnnotator(knit)

646

return annotator.annotate(key)

647

648

649

650

def make_file_factory(annotated, mapper):

651

"""Create a factory for creating a file based KnitVersionedFiles.

652

653

This is only functional enough to run interface tests, it doesn't try to

654

provide a full pack environment.

655

656

:param annotated: knit annotations are wanted.

657

:param mapper: The mapper from keys to paths.

658

"""

659

def factory(transport):

660

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

661

access = _KnitKeyAccess(transport, mapper)

662

return KnitVersionedFiles(index, access, annotated=annotated)

663

return factory

664

665

666

def make_pack_factory(graph, delta, keylength):

667

"""Create a factory for creating a pack based VersionedFiles.

668

669

This is only functional enough to run interface tests, it doesn't try to

670

provide a full pack environment.

671

672

:param graph: Store a graph.

673

:param delta: Delta compress contents.

674

:param keylength: How long should keys be.

675

"""

676

def factory(transport):

677

parents = graph or delta

678

ref_length = 0

679

if graph:

680

ref_length += 1

681

if delta:

682

ref_length += 1

683

max_delta_chain = 200

684

else:

685

max_delta_chain = 0

686

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

687

key_elements=keylength)

688

stream = transport.open_write_stream('newpack')

689

writer = pack.ContainerWriter(stream.write)

690

writer.begin()

691

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

692

deltas=delta, add_callback=graph_index.add_nodes)

693

access = _DirectPackAccess({})

694

access.set_writer(writer, graph_index, (transport, 'newpack'))

695

result = KnitVersionedFiles(index, access,

696

max_delta_chain=max_delta_chain)

697

result.stream = stream

698

result.writer = writer

699

return result

700

return factory

701

702

703

def cleanup_pack_knit(versioned_files):

704

versioned_files.stream.close()

705

versioned_files.writer.end()

706

707

708

class KnitVersionedFiles(VersionedFiles):

709

"""Storage for many versioned files using knit compression.

710

711

Backend storage is managed by indices and data objects.

712

"""

713

714

def __init__(self, index, data_access, max_delta_chain=200,

715

annotated=False):

716

"""Create a KnitVersionedFiles with index and data_access.

717

718

:param index: The index for the knit data.

719

:param data_access: The access object to store and retrieve knit

720

records.

721

:param max_delta_chain: The maximum number of deltas to permit during

722

insertion. Set to 0 to prohibit the use of deltas.

723

:param annotated: Set to True to cause annotations to be calculated and

724

stored during insertion.

725

"""

726

self._index = index

727

self._access = data_access

728

self._max_delta_chain = max_delta_chain

729

if annotated:

730

self._factory = KnitAnnotateFactory()

731

else:

732

self._factory = KnitPlainFactory()

733

self._fallback_vfs = []

734

735

def add_fallback_versioned_files(self, a_versioned_files):

736

"""Add a source of texts for texts not present in this knit.

737

738

:param a_versioned_files: A VersionedFiles object.

739

"""

740

self._fallback_vfs.append(a_versioned_files)

741

742

def add_lines(self, key, parents, lines, parent_texts=None,

743

left_matching_blocks=None, nostore_sha=None, random_id=False,

744

check_content=True):

745

"""See VersionedFiles.add_lines()."""

746

self._index._check_write_ok()

747

self._check_add(key, lines, random_id, check_content)

748

if parents is None:

749

# The caller might pass None if there is no graph data, but kndx

750

# indexes can't directly store that, so we give them

751

# an empty tuple instead.

752

parents = ()

753

return self._add(key, lines, parents,

754

parent_texts, left_matching_blocks, nostore_sha, random_id)

755

756

def _add(self, key, lines, parents, parent_texts,

757

left_matching_blocks, nostore_sha, random_id):

758

"""Add a set of lines on top of version specified by parents.

759

760

Any versions not present will be converted into ghosts.

761

"""

762

# first thing, if the content is something we don't need to store, find

763

# that out.

764

line_bytes = ''.join(lines)

765

digest = sha_string(line_bytes)

766

if nostore_sha == digest:

767

raise errors.ExistingContent

768

769

present_parents = []

770

if parent_texts is None:

771

parent_texts = {}

772

# Do a single query to ascertain parent presence.

773

present_parent_map = self.get_parent_map(parents)

774

for parent in parents:

775

if parent in present_parent_map:

776

present_parents.append(parent)

777

778

# Currently we can only compress against the left most present parent.

779

if (len(present_parents) == 0 or

780

present_parents[0] != parents[0]):

781

delta = False

782

else:

783

# To speed the extract of texts the delta chain is limited

784

# to a fixed number of deltas. This should minimize both

785

# I/O and the time spend applying deltas.

786

delta = self._check_should_delta(present_parents[0])

787

788

text_length = len(line_bytes)

789

options = []

790

if lines:

791

if lines[-1][-1] != '\n':

792

# copy the contents of lines.

793

lines = lines[:]

794

options.append('no-eol')

795

lines[-1] = lines[-1] + '\n'

796

line_bytes += '\n'

797

798

for element in key:

799

if type(element) != str:

800

raise TypeError("key contains non-strings: %r" % (key,))

801

# Knit hunks are still last-element only

802

version_id = key[-1]

803

content = self._factory.make(lines, version_id)

804

if 'no-eol' in options:

805

# Hint to the content object that its text() call should strip the

806

# EOL.

807

content._should_strip_eol = True

808

if delta or (self._factory.annotated and len(present_parents) > 0):

809

# Merge annotations from parent texts if needed.

810

delta_hunks = self._merge_annotations(content, present_parents,

811

parent_texts, delta, self._factory.annotated,

812

left_matching_blocks)

813

814

if delta:

815

options.append('line-delta')

816

store_lines = self._factory.lower_line_delta(delta_hunks)

817

size, bytes = self._record_to_data(key, digest,

818

store_lines)

819

else:

820

options.append('fulltext')

821

# isinstance is slower and we have no hierarchy.

822

if self._factory.__class__ == KnitPlainFactory:

823

# Use the already joined bytes saving iteration time in

824

# _record_to_data.

825

size, bytes = self._record_to_data(key, digest,

826

lines, [line_bytes])

827

else:

828

# get mixed annotation + content and feed it into the

829

# serialiser.

830

store_lines = self._factory.lower_fulltext(content)

831

size, bytes = self._record_to_data(key, digest,

832

store_lines)

833

834

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

835

self._index.add_records(

836

((key, options, access_memo, parents),),

837

random_id=random_id)

838

return digest, text_length, content

839

840

def annotate(self, key):

841

"""See VersionedFiles.annotate."""

842

return self._factory.annotate(self, key)

843

844

def check(self, progress_bar=None):

845

"""See VersionedFiles.check()."""

846

# This doesn't actually test extraction of everything, but that will

847

# impact 'bzr check' substantially, and needs to be integrated with

848

# care. However, it does check for the obvious problem of a delta with

849

# no basis.

850

keys = self._index.keys()

851

parent_map = self.get_parent_map(keys)

852

for key in keys:

853

if self._index.get_method(key) != 'fulltext':

854

compression_parent = parent_map[key][0]

855

if compression_parent not in parent_map:

856

raise errors.KnitCorrupt(self,

857

"Missing basis parent %s for %s" % (

858

compression_parent, key))

859

for fallback_vfs in self._fallback_vfs:

860

fallback_vfs.check()

861

862

def _check_add(self, key, lines, random_id, check_content):

863

"""check that version_id and lines are safe to add."""

864

version_id = key[-1]

865

if contains_whitespace(version_id):

866

raise InvalidRevisionId(version_id, self)

867

self.check_not_reserved_id(version_id)

868

# TODO: If random_id==False and the key is already present, we should

869

# probably check that the existing content is identical to what is

870

# being inserted, and otherwise raise an exception. This would make

871

# the bundle code simpler.

872

if check_content:

873

self._check_lines_not_unicode(lines)

874

self._check_lines_are_lines(lines)

875

876

def _check_header(self, key, line):

877

rec = self._split_header(line)

878

self._check_header_version(rec, key[-1])

879

return rec

880

881

def _check_header_version(self, rec, version_id):

882

"""Checks the header version on original format knit records.

883

884

These have the last component of the key embedded in the record.

885

"""

886

if rec[1] != version_id:

887

raise KnitCorrupt(self,

888

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

889

890

def _check_should_delta(self, parent):

891

"""Iterate back through the parent listing, looking for a fulltext.

892

893

This is used when we want to decide whether to add a delta or a new

894

fulltext. It searches for _max_delta_chain parents. When it finds a

895

fulltext parent, it sees if the total size of the deltas leading up to

896

it is large enough to indicate that we want a new full text anyway.

897

898

Return True if we should create a new delta, False if we should use a

899

full text.

900

"""

901

delta_size = 0

902

fulltext_size = None

903

for count in xrange(self._max_delta_chain):

904

# XXX: Collapse these two queries:

905

try:

906

method = self._index.get_method(parent)

907

except RevisionNotPresent:

908

# Some basis is not locally present: always delta

909

return False

910

index, pos, size = self._index.get_position(parent)

911

if method == 'fulltext':

912

fulltext_size = size

913

break

914

delta_size += size

915

# We don't explicitly check for presence because this is in an

916

# inner loop, and if it's missing it'll fail anyhow.

917

# TODO: This should be asking for compression parent, not graph

918

# parent.

919

parent = self._index.get_parent_map([parent])[parent][0]

920

else:

921

# We couldn't find a fulltext, so we must create a new one

922

return False

923

# Simple heuristic - if the total I/O wold be greater as a delta than

924

# the originally installed fulltext, we create a new fulltext.

925

return fulltext_size > delta_size

926

927

def _build_details_to_components(self, build_details):

928

"""Convert a build_details tuple to a position tuple."""

929

# record_details, access_memo, compression_parent

930

return build_details[3], build_details[0], build_details[1]

931

932

def _get_components_positions(self, keys, allow_missing=False):

933

"""Produce a map of position data for the components of keys.

934

935

This data is intended to be used for retrieving the knit records.

936

937

A dict of key to (record_details, index_memo, next, parents) is

938

returned.

939

method is the way referenced data should be applied.

940

index_memo is the handle to pass to the data access to actually get the

941

data

942

next is the build-parent of the version, or None for fulltexts.

943

parents is the version_ids of the parents of this version

944

945

:param allow_missing: If True do not raise an error on a missing component,

946

just ignore it.

947

"""

948

component_data = {}

949

pending_components = keys

950

while pending_components:

951

build_details = self._index.get_build_details(pending_components)

952

current_components = set(pending_components)

953

pending_components = set()

954

for key, details in build_details.iteritems():

955

(index_memo, compression_parent, parents,

956

record_details) = details

957

method = record_details[0]

958

if compression_parent is not None:

959

pending_components.add(compression_parent)

960

component_data[key] = self._build_details_to_components(details)

961

missing = current_components.difference(build_details)

962

if missing and not allow_missing:

963

raise errors.RevisionNotPresent(missing.pop(), self)

964

return component_data

965

966

def _get_content(self, key, parent_texts={}):

967

"""Returns a content object that makes up the specified

968

version."""

969

cached_version = parent_texts.get(key, None)

970

if cached_version is not None:

971

# Ensure the cache dict is valid.

972

if not self.get_parent_map([key]):

973

raise RevisionNotPresent(key, self)

974

return cached_version

975

text_map, contents_map = self._get_content_maps([key])

976

return contents_map[key]

977

978

def _get_content_maps(self, keys, nonlocal_keys=None):

979

"""Produce maps of text and KnitContents

980

981

:param keys: The keys to produce content maps for.

982

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

983

which are known to not be in this knit, but rather in one of the

984

fallback knits.

985

:return: (text_map, content_map) where text_map contains the texts for

986

the requested versions and content_map contains the KnitContents.

987

"""

988

# FUTURE: This function could be improved for the 'extract many' case

989

# by tracking each component and only doing the copy when the number of

990

# children than need to apply delta's to it is > 1 or it is part of the

991

# final output.

992

keys = list(keys)

993

multiple_versions = len(keys) != 1

994

record_map = self._get_record_map(keys, allow_missing=True)

995

996

text_map = {}

997

content_map = {}

998

final_content = {}

999

if nonlocal_keys is None:

1000

nonlocal_keys = set()

1001

else:

1002

nonlocal_keys = frozenset(nonlocal_keys)

1003

missing_keys = set(nonlocal_keys)

1004

for source in self._fallback_vfs:

1005

if not missing_keys:

1006

break

1007

for record in source.get_record_stream(missing_keys,

1008

'unordered', True):

1009

if record.storage_kind == 'absent':

1010

continue

1011

missing_keys.remove(record.key)

1012

lines = split_lines(record.get_bytes_as('fulltext'))

1013

text_map[record.key] = lines

1014

content_map[record.key] = PlainKnitContent(lines, record.key)

1015

if record.key in keys:

1016

final_content[record.key] = content_map[record.key]

1017

for key in keys:

1018

if key in nonlocal_keys:

1019

# already handled

1020

continue

1021

components = []

1022

cursor = key

1023

while cursor is not None:

1024

try:

1025

record, record_details, digest, next = record_map[cursor]

1026

except KeyError:

1027

raise RevisionNotPresent(cursor, self)

1028

components.append((cursor, record, record_details, digest))

1029

cursor = next

1030

if cursor in content_map:

1031

# no need to plan further back

1032

components.append((cursor, None, None, None))

1033

break

1034

1035

content = None

1036

for (component_id, record, record_details,

1037

digest) in reversed(components):

1038

if component_id in content_map:

1039

content = content_map[component_id]

1040

else:

1041

content, delta = self._factory.parse_record(key[-1],

1042

record, record_details, content,

1043

copy_base_content=multiple_versions)

1044

if multiple_versions:

1045

content_map[component_id] = content

1046

1047

final_content[key] = content

1048

1049

# digest here is the digest from the last applied component.

1050

text = content.text()

1051

actual_sha = sha_strings(text)

1052

if actual_sha != digest:

1053

raise KnitCorrupt(self,

1054

'\n sha-1 %s'

1055

'\n of reconstructed text does not match'

1056

'\n expected %s'

1057

'\n for version %s' %

1058

(actual_sha, digest, key))

1059

text_map[key] = text

1060

return text_map, final_content

1061

1062

def get_parent_map(self, keys):

1063

"""Get a map of the graph parents of keys.

1064

1065

:param keys: The keys to look up parents for.

1066

:return: A mapping from keys to parents. Absent keys are absent from

1067

the mapping.

1068

"""

1069

return self._get_parent_map_with_sources(keys)[0]

1070

1071

def _get_parent_map_with_sources(self, keys):

1072

"""Get a map of the parents of keys.

1073

1074

:param keys: The keys to look up parents for.

1075

:return: A tuple. The first element is a mapping from keys to parents.

1076

Absent keys are absent from the mapping. The second element is a

1077

list with the locations each key was found in. The first element

1078

is the in-this-knit parents, the second the first fallback source,

1079

and so on.

1080

"""

1081

result = {}

1082

sources = [self._index] + self._fallback_vfs

1083

source_results = []

1084

missing = set(keys)

1085

for source in sources:

1086

if not missing:

1087

break

1088

new_result = source.get_parent_map(missing)

1089

source_results.append(new_result)

1090

result.update(new_result)

1091

missing.difference_update(set(new_result))

1092

return result, source_results

1093

1094

def _get_record_map(self, keys, allow_missing=False):

1095

"""Produce a dictionary of knit records.

1096

1097

:return: {key:(record, record_details, digest, next)}

1098

record

1099

data returned from read_records

1100

record_details

1101

opaque information to pass to parse_record

1102

digest

1103

SHA1 digest of the full text after all steps are done

1104

1105

build-parent of the version, i.e. the leftmost ancestor.

1106

Will be None if the record is not a delta.

1107

:param keys: The keys to build a map for

1108

:param allow_missing: If some records are missing, rather than

1109

error, just return the data that could be generated.

1110

"""

1111

position_map = self._get_components_positions(keys,

1112

allow_missing=allow_missing)

1113

# key = component_id, r = record_details, i_m = index_memo, n = next

1114

records = [(key, i_m) for key, (r, i_m, n)

1115

in position_map.iteritems()]

1116

record_map = {}

1117

for key, record, digest in \

1118

self._read_records_iter(records):

1119

(record_details, index_memo, next) = position_map[key]

1120

record_map[key] = record, record_details, digest, next

1121

return record_map

1122

1123

def get_record_stream(self, keys, ordering, include_delta_closure):

1124

"""Get a stream of records for keys.

1125

1126

:param keys: The keys to include.

1127

:param ordering: Either 'unordered' or 'topological'. A topologically

1128

sorted stream has compression parents strictly before their

1129

children.

1130

:param include_delta_closure: If True then the closure across any

1131

compression parents will be included (in the opaque data).

1132

:return: An iterator of ContentFactory objects, each of which is only

1133

valid until the iterator is advanced.

1134

"""

1135

# keys might be a generator

1136

keys = set(keys)

1137

if not keys:

1138

return

1139

if not self._index.has_graph:

1140

# Cannot topological order when no graph has been stored.

1141

ordering = 'unordered'

1142

if include_delta_closure:

1143

positions = self._get_components_positions(keys, allow_missing=True)

1144

else:

1145

build_details = self._index.get_build_details(keys)

1146

# map from key to

1147

# (record_details, access_memo, compression_parent_key)

1148

positions = dict((key, self._build_details_to_components(details))

1149

for key, details in build_details.iteritems())

1150

absent_keys = keys.difference(set(positions))

1151

# There may be more absent keys : if we're missing the basis component

1152

# and are trying to include the delta closure.

1153

if include_delta_closure:

1154

needed_from_fallback = set()

1155

# Build up reconstructable_keys dict. key:True in this dict means

1156

# the key can be reconstructed.

1157

reconstructable_keys = {}

1158

for key in keys:

1159

# the delta chain

1160

try:

1161

chain = [key, positions[key][2]]

1162

except KeyError:

1163

needed_from_fallback.add(key)

1164

continue

1165

result = True

1166

while chain[-1] is not None:

1167

if chain[-1] in reconstructable_keys:

1168

result = reconstructable_keys[chain[-1]]

1169

break

1170

else:

1171

try:

1172

chain.append(positions[chain[-1]][2])

1173

except KeyError:

1174

# missing basis component

1175

needed_from_fallback.add(chain[-1])

1176

result = True

1177

break

1178

for chain_key in chain[:-1]:

1179

reconstructable_keys[chain_key] = result

1180

if not result:

1181

needed_from_fallback.add(key)

1182

# Double index lookups here : need a unified api ?

1183

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1184

if ordering == 'topological':

1185

# Global topological sort

1186

present_keys = topo_sort(global_map)

1187

# Now group by source:

1188

source_keys = []

1189

current_source = None

1190

for key in present_keys:

1191

for parent_map in parent_maps:

1192

if key in parent_map:

1193

key_source = parent_map

1194

break

1195

if current_source is not key_source:

1196

source_keys.append((key_source, []))

1197

current_source = key_source

1198

source_keys[-1][1].append(key)

1199

else:

1200

# Just group by source; remote sources first.

1201

present_keys = []

1202

source_keys = []

1203

for parent_map in reversed(parent_maps):

1204

source_keys.append((parent_map, []))

1205

for key in parent_map:

1206

present_keys.append(key)

1207

source_keys[-1][1].append(key)

1208

absent_keys = keys - set(global_map)

1209

for key in absent_keys:

1210

yield AbsentContentFactory(key)

1211

# restrict our view to the keys we can answer.

1212

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1213

# XXX: At that point we need to consider the impact of double reads by

1214

# utilising components multiple times.

1215

if include_delta_closure:

1216

# XXX: get_content_maps performs its own index queries; allow state

1217

# to be passed in.

1218

text_map, _ = self._get_content_maps(present_keys,

1219

needed_from_fallback - absent_keys)

1220

for key in present_keys:

1221

yield FulltextContentFactory(key, global_map[key], None,

1222

''.join(text_map[key]))

1223

else:

1224

for source, keys in source_keys:

1225

if source is parent_maps[0]:

1226

# this KnitVersionedFiles

1227

records = [(key, positions[key][1]) for key in keys]

1228

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1229

(record_details, index_memo, _) = positions[key]

1230

yield KnitContentFactory(key, global_map[key],

1231

record_details, sha1, raw_data, self._factory.annotated, None)

1232

else:

1233

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1234

for record in vf.get_record_stream(keys, ordering,

1235

include_delta_closure):

1236

yield record

1237

1238

def get_sha1s(self, keys):

1239

"""See VersionedFiles.get_sha1s()."""

1240

missing = set(keys)

1241

record_map = self._get_record_map(missing, allow_missing=True)

1242

result = {}

1243

for key, details in record_map.iteritems():

1244

if key not in missing:

1245

continue

1246

# record entry 2 is the 'digest'.

1247

result[key] = details[2]

1248

missing.difference_update(set(result))

1249

for source in self._fallback_vfs:

1250

if not missing:

1251

break

1252

new_result = source.get_sha1s(missing)

1253

result.update(new_result)

1254

missing.difference_update(set(new_result))

1255

return result

1256

1257

def insert_record_stream(self, stream):

1258

"""Insert a record stream into this container.

1259

1260

:param stream: A stream of records to insert.

1261

:return: None

1262

:seealso VersionedFiles.get_record_stream:

1263

"""

1264

def get_adapter(adapter_key):

1265

try:

1266

return adapters[adapter_key]

1267

except KeyError:

1268

adapter_factory = adapter_registry.get(adapter_key)

1269

adapter = adapter_factory(self)

1270

adapters[adapter_key] = adapter

1271

return adapter

1272

if self._factory.annotated:

1273

# self is annotated, we need annotated knits to use directly.

1274

annotated = "annotated-"

1275

convertibles = []

1276

else:

1277

# self is not annotated, but we can strip annotations cheaply.

1278

annotated = ""

1279

convertibles = set(["knit-annotated-ft-gz"])

1280

if self._max_delta_chain:

1281

convertibles.add("knit-annotated-delta-gz")

1282

# The set of types we can cheaply adapt without needing basis texts.

1283

native_types = set()

1284

if self._max_delta_chain:

1285

native_types.add("knit-%sdelta-gz" % annotated)

1286

native_types.add("knit-%sft-gz" % annotated)

1287

knit_types = native_types.union(convertibles)

1288

adapters = {}

1289

# Buffer all index entries that we can't add immediately because their

1290

# basis parent is missing. We don't buffer all because generating

1291

# annotations may require access to some of the new records. However we

1292

# can't generate annotations from new deltas until their basis parent

1293

# is present anyway, so we get away with not needing an index that

1294

# includes the new keys.

1295

# key = basis_parent, value = index entry to add

1296

buffered_index_entries = {}

1297

for record in stream:

1298

parents = record.parents

1299

# Raise an error when a record is missing.

1300

if record.storage_kind == 'absent':

1301

raise RevisionNotPresent([record.key], self)

1302

if record.storage_kind in knit_types:

1303

if record.storage_kind not in native_types:

1304

try:

1305

adapter_key = (record.storage_kind, "knit-delta-gz")

1306

adapter = get_adapter(adapter_key)

1307

except KeyError:

1308

adapter_key = (record.storage_kind, "knit-ft-gz")

1309

adapter = get_adapter(adapter_key)

1310

bytes = adapter.get_bytes(

1311

record, record.get_bytes_as(record.storage_kind))

1312

else:

1313

bytes = record.get_bytes_as(record.storage_kind)

1314

options = [record._build_details[0]]

1315

if record._build_details[1]:

1316

options.append('no-eol')

1317

# Just blat it across.

1318

# Note: This does end up adding data on duplicate keys. As

1319

# modern repositories use atomic insertions this should not

1320

# lead to excessive growth in the event of interrupted fetches.

1321

# 'knit' repositories may suffer excessive growth, but as a

1322

# deprecated format this is tolerable. It can be fixed if

1323

# needed by in the kndx index support raising on a duplicate

1324

# add with identical parents and options.

1325

access_memo = self._access.add_raw_records(

1326

[(record.key, len(bytes))], bytes)[0]

1327

index_entry = (record.key, options, access_memo, parents)

1328

buffered = False

1329

if 'fulltext' not in options:

1330

basis_parent = parents[0]

1331

# Note that pack backed knits don't need to buffer here

1332

# because they buffer all writes to the transaction level,

1333

# but we don't expose that difference at the index level. If

1334

# the query here has sufficient cost to show up in

1335

# profiling we should do that.

1336

if basis_parent not in self.get_parent_map([basis_parent]):

1337

pending = buffered_index_entries.setdefault(

1338

basis_parent, [])

1339

pending.append(index_entry)

1340

buffered = True

1341

if not buffered:

1342

self._index.add_records([index_entry])

1343

elif record.storage_kind == 'fulltext':

1344

self.add_lines(record.key, parents,

1345

split_lines(record.get_bytes_as('fulltext')))

1346

else:

1347

adapter_key = record.storage_kind, 'fulltext'

1348

adapter = get_adapter(adapter_key)

1349

lines = split_lines(adapter.get_bytes(

1350

record, record.get_bytes_as(record.storage_kind)))

1351

try:

1352

self.add_lines(record.key, parents, lines)

1353

except errors.RevisionAlreadyPresent:

1354

pass

1355

# Add any records whose basis parent is now available.

1356

added_keys = [record.key]

1357

while added_keys:

1358

key = added_keys.pop(0)

1359

if key in buffered_index_entries:

1360

index_entries = buffered_index_entries[key]

1361

self._index.add_records(index_entries)

1362

added_keys.extend(

1363

[index_entry[0] for index_entry in index_entries])

1364

del buffered_index_entries[key]

1365

# If there were any deltas which had a missing basis parent, error.

1366

if buffered_index_entries:

1367

raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],

1368

self)

1369

1370

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1371

"""Iterate over the lines in the versioned files from keys.

1372

1373

This may return lines from other keys. Each item the returned

1374

iterator yields is a tuple of a line and a text version that that line

1375

is present in (not introduced in).

1376

1377

Ordering of results is in whatever order is most suitable for the

1378

underlying storage format.

1379

1380

If a progress bar is supplied, it may be used to indicate progress.

1381

The caller is responsible for cleaning up progress bars (because this

1382

is an iterator).

1383

1384

NOTES:

1385

* Lines are normalised by the underlying store: they will all have \n

1386

terminators.

1387

* Lines are returned in arbitrary order.

1388

1389

:return: An iterator over (line, key).

1390

"""

1391

if pb is None:

1392

pb = progress.DummyProgress()

1393

keys = set(keys)

1394

total = len(keys)

1395

# we don't care about inclusions, the caller cares.

1396

# but we need to setup a list of records to visit.

1397

# we need key, position, length

1398

key_records = []

1399

build_details = self._index.get_build_details(keys)

1400

for key, details in build_details.iteritems():

1401

if key in keys:

1402

key_records.append((key, details[0]))

1403

keys.remove(key)

1404

records_iter = enumerate(self._read_records_iter(key_records))

1405

for (key_idx, (key, data, sha_value)) in records_iter:

1406

pb.update('Walking content.', key_idx, total)

1407

compression_parent = build_details[key][1]

1408

if compression_parent is None:

1409

# fulltext

1410

line_iterator = self._factory.get_fulltext_content(data)

1411

else:

1412

# Delta

1413

line_iterator = self._factory.get_linedelta_content(data)

1414

# XXX: It might be more efficient to yield (key,

1415

# line_iterator) in the future. However for now, this is a simpler

1416

# change to integrate into the rest of the codebase. RBC 20071110

1417

for line in line_iterator:

1418

yield line, key

1419

for source in self._fallback_vfs:

1420

if not keys:

1421

break

1422

source_keys = set()

1423

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1424

source_keys.add(key)

1425

yield line, key

1426

keys.difference_update(source_keys)

1427

if keys:

1428

raise RevisionNotPresent(keys, self.filename)

1429

pb.update('Walking content.', total, total)

1430

1431

def _make_line_delta(self, delta_seq, new_content):

1432

"""Generate a line delta from delta_seq and new_content."""

1433

diff_hunks = []

1434

for op in delta_seq.get_opcodes():

1435

if op[0] == 'equal':

1436

continue

1437

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1438

return diff_hunks

1439

1440

def _merge_annotations(self, content, parents, parent_texts={},

1441

delta=None, annotated=None,

1442

left_matching_blocks=None):

1443

"""Merge annotations for content and generate deltas.

1444

1445

This is done by comparing the annotations based on changes to the text

1446

and generating a delta on the resulting full texts. If annotations are

1447

not being created then a simple delta is created.

1448

"""

1449

if left_matching_blocks is not None:

1450

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1451

else:

1452

delta_seq = None

1453

if annotated:

1454

for parent_key in parents:

1455

merge_content = self._get_content(parent_key, parent_texts)

1456

if (parent_key == parents[0] and delta_seq is not None):

1457

seq = delta_seq

1458

else:

1459

seq = patiencediff.PatienceSequenceMatcher(

1460

None, merge_content.text(), content.text())

1461

for i, j, n in seq.get_matching_blocks():

1462

if n == 0:

1463

continue

1464

# this copies (origin, text) pairs across to the new

1465

# content for any line that matches the last-checked

1466

# parent.

1467

content._lines[j:j+n] = merge_content._lines[i:i+n]

1468

# XXX: Robert says the following block is a workaround for a

1469

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1470

if content._lines and content._lines[-1][1][-1] != '\n':

1471

# The copied annotation was from a line without a trailing EOL,

1472

# reinstate one for the content object, to ensure correct

1473

# serialization.

1474

line = content._lines[-1][1] + '\n'

1475

content._lines[-1] = (content._lines[-1][0], line)

1476

if delta:

1477

if delta_seq is None:

1478

reference_content = self._get_content(parents[0], parent_texts)

1479

new_texts = content.text()

1480

old_texts = reference_content.text()

1481

delta_seq = patiencediff.PatienceSequenceMatcher(

1482

None, old_texts, new_texts)

1483

return self._make_line_delta(delta_seq, content)

1484

1485

def _parse_record(self, version_id, data):

1486

"""Parse an original format knit record.

1487

1488

These have the last element of the key only present in the stored data.

1489

"""

1490

rec, record_contents = self._parse_record_unchecked(data)

1491

self._check_header_version(rec, version_id)

1492

return record_contents, rec[3]

1493

1494

def _parse_record_header(self, key, raw_data):

1495

"""Parse a record header for consistency.

1496

1497

:return: the header and the decompressor stream.

1498

as (stream, header_record)

1499

"""

1500

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1501

try:

1502

# Current serialise

1503

rec = self._check_header(key, df.readline())

1504

except Exception, e:

1505

raise KnitCorrupt(self,

1506

"While reading {%s} got %s(%s)"

1507

% (key, e.__class__.__name__, str(e)))

1508

return df, rec

1509

1510

def _parse_record_unchecked(self, data):

1511

# profiling notes:

1512

# 4168 calls in 2880 217 internal

1513

# 4168 calls to _parse_record_header in 2121

1514

# 4168 calls to readlines in 330

1515

df = GzipFile(mode='rb', fileobj=StringIO(data))

1516

try:

1517

record_contents = df.readlines()

1518

except Exception, e:

1519

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1520

(data, e.__class__.__name__, str(e)))

1521

header = record_contents.pop(0)

1522

rec = self._split_header(header)

1523

last_line = record_contents.pop()

1524

if len(record_contents) != int(rec[2]):

1525

raise KnitCorrupt(self,

1526

'incorrect number of lines %s != %s'

1527

' for version {%s} %s'

1528

% (len(record_contents), int(rec[2]),

1529

rec[1], record_contents))

1530

if last_line != 'end %s\n' % rec[1]:

1531

raise KnitCorrupt(self,

1532

'unexpected version end line %r, wanted %r'

1533

% (last_line, rec[1]))

1534

df.close()

1535

return rec, record_contents

1536

1537

def _read_records_iter(self, records):

1538

"""Read text records from data file and yield result.

1539

1540

The result will be returned in whatever is the fastest to read.

1541

Not by the order requested. Also, multiple requests for the same

1542

record will only yield 1 response.

1543

:param records: A list of (key, access_memo) entries

1544

:return: Yields (key, contents, digest) in the order

1545

read, not the order requested

1546

"""

1547

if not records:

1548

return

1549

1550

# XXX: This smells wrong, IO may not be getting ordered right.

1551

needed_records = sorted(set(records), key=operator.itemgetter(1))

1552

if not needed_records:

1553

return

1554

1555

# The transport optimizes the fetching as well

1556

# (ie, reads continuous ranges.)

1557

raw_data = self._access.get_raw_records(

1558

[index_memo for key, index_memo in needed_records])

1559

1560

for (key, index_memo), data in \

1561

izip(iter(needed_records), raw_data):

1562

content, digest = self._parse_record(key[-1], data)

1563

yield key, content, digest

1564

1565

def _read_records_iter_raw(self, records):

1566

"""Read text records from data file and yield raw data.

1567

1568

This unpacks enough of the text record to validate the id is

1569

as expected but thats all.

1570

1571

Each item the iterator yields is (key, bytes, sha1_of_full_text).

1572

"""

1573

# setup an iterator of the external records:

1574

# uses readv so nice and fast we hope.

1575

if len(records):

1576

# grab the disk data needed.

1577

needed_offsets = [index_memo for key, index_memo

1578

in records]

1579

raw_records = self._access.get_raw_records(needed_offsets)

1580

1581

for key, index_memo in records:

1582

data = raw_records.next()

1583

# validate the header (note that we can only use the suffix in

1584

# current knit records).

1585

df, rec = self._parse_record_header(key, data)

1586

df.close()

1587

yield key, data, rec[3]

1588

1589

def _record_to_data(self, key, digest, lines, dense_lines=None):

1590

"""Convert key, digest, lines into a raw data block.

1591

1592

:param key: The key of the record. Currently keys are always serialised

1593

using just the trailing component.

1594

:param dense_lines: The bytes of lines but in a denser form. For

1595

instance, if lines is a list of 1000 bytestrings each ending in \n,

1596

dense_lines may be a list with one line in it, containing all the

1597

1000's lines and their \n's. Using dense_lines if it is already

1598

known is a win because the string join to create bytes in this

1599

function spends less time resizing the final string.

1600

:return: (len, a StringIO instance with the raw data ready to read.)

1601

"""

1602

# Note: using a string copy here increases memory pressure with e.g.

1603

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1604

# when doing the initial commit of a mozilla tree. RBC 20070921

1605

bytes = ''.join(chain(

1606

["version %s %d %s\n" % (key[-1],

1607

len(lines),

1608

digest)],

1609

dense_lines or lines,

1610

["end %s\n" % key[-1]]))

1611

if type(bytes) != str:

1612

raise AssertionError(

1613

'data must be plain bytes was %s' % type(bytes))

1614

if lines and lines[-1][-1] != '\n':

1615

raise ValueError('corrupt lines value %r' % lines)

1616

compressed_bytes = bytes_to_gzip(bytes)

1617

return len(compressed_bytes), compressed_bytes

1618

1619

def _split_header(self, line):

1620

rec = line.split()

1621

if len(rec) != 4:

1622

raise KnitCorrupt(self,

1623

'unexpected number of elements in record header')

1624

return rec

1625

1626

def keys(self):

1627

"""See VersionedFiles.keys."""

1628

if 'evil' in debug.debug_flags:

1629

trace.mutter_callsite(2, "keys scales with size of history")

1630

sources = [self._index] + self._fallback_vfs

1631

result = set()

1632

for source in sources:

1633

result.update(source.keys())

1634

return result

1635

1636

1637

1638

class _KndxIndex(object):

1639

"""Manages knit index files

1640

1641

The index is kept in memory and read on startup, to enable

1642

fast lookups of revision information. The cursor of the index

1643

file is always pointing to the end, making it easy to append

1644

entries.

1645

1646

_cache is a cache for fast mapping from version id to a Index

1647

object.

1648

1649

_history is a cache for fast mapping from indexes to version ids.

1650

1651

The index data format is dictionary compressed when it comes to

1652

parent references; a index entry may only have parents that with a

1653

lover index number. As a result, the index is topological sorted.

1654

1655

Duplicate entries may be written to the index for a single version id

1656

if this is done then the latter one completely replaces the former:

1657

this allows updates to correct version and parent information.

1658

Note that the two entries may share the delta, and that successive

1659

annotations and references MUST point to the first entry.

1660

1661

The index file on disc contains a header, followed by one line per knit

1662

record. The same revision can be present in an index file more than once.

1663

The first occurrence gets assigned a sequence number starting from 0.

1664

1665

The format of a single line is

1666

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1667

REVISION_ID is a utf8-encoded revision id

1668

FLAGS is a comma separated list of flags about the record. Values include

1669

no-eol, line-delta, fulltext.

1670

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1671

that the the compressed data starts at.

1672

LENGTH is the ascii representation of the length of the data file.

1673

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1674

REVISION_ID.

1675

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1676

revision id already in the knit that is a parent of REVISION_ID.

1677

The ' :' marker is the end of record marker.

1678

1679

partial writes:

1680

when a write is interrupted to the index file, it will result in a line

1681

that does not end in ' :'. If the ' :' is not present at the end of a line,

1682

or at the end of the file, then the record that is missing it will be

1683

ignored by the parser.

1684

1685

When writing new records to the index file, the data is preceded by '\n'

1686

to ensure that records always start on new lines even if the last write was

1687

interrupted. As a result its normal for the last line in the index to be

1688

missing a trailing newline. One can be added with no harmful effects.

1689

1690

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

1691

where prefix is e.g. the (fileid,) for .texts instances or () for

1692

constant-mapped things like .revisions, and the old state is

1693

tuple(history_vector, cache_dict). This is used to prevent having an

1694

ABI change with the C extension that reads .kndx files.

1695

"""

1696

1697

HEADER = "# bzr knit index 8\n"

1698

1699

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

1700

"""Create a _KndxIndex on transport using mapper."""

1701

self._transport = transport

1702

self._mapper = mapper

1703

self._get_scope = get_scope

1704

self._allow_writes = allow_writes

1705

self._is_locked = is_locked

1706

self._reset_cache()

1707

self.has_graph = True

1708

1709

def add_records(self, records, random_id=False):

1710

"""Add multiple records to the index.

1711

1712

:param records: a list of tuples:

1713

(key, options, access_memo, parents).

1714

:param random_id: If True the ids being added were randomly generated

1715

and no check for existence will be performed.

1716

"""

1717

paths = {}

1718

for record in records:

1719

key = record[0]

1720

prefix = key[:-1]

1721

path = self._mapper.map(key) + '.kndx'

1722

path_keys = paths.setdefault(path, (prefix, []))

1723

path_keys[1].append(record)

1724

for path in sorted(paths):

1725

prefix, path_keys = paths[path]

1726

self._load_prefixes([prefix])

1727

lines = []

1728

orig_history = self._kndx_cache[prefix][1][:]

1729

orig_cache = self._kndx_cache[prefix][0].copy()

1730

1731

try:

1732

for key, options, (_, pos, size), parents in path_keys:

1733

if parents is None:

1734

# kndx indices cannot be parentless.

1735

parents = ()

1736

line = "\n%s %s %s %s %s :" % (

1737

key[-1], ','.join(options), pos, size,

1738

self._dictionary_compress(parents))

1739

if type(line) != str:

1740

raise AssertionError(

1741

'data must be utf8 was %s' % type(line))

1742

lines.append(line)

1743

self._cache_key(key, options, pos, size, parents)

1744

if len(orig_history):

1745

self._transport.append_bytes(path, ''.join(lines))

1746

else:

1747

self._init_index(path, lines)

1748

except:

1749

# If any problems happen, restore the original values and re-raise

1750

self._kndx_cache[prefix] = (orig_cache, orig_history)

1751

raise

1752

1753

def _cache_key(self, key, options, pos, size, parent_keys):

1754

"""Cache a version record in the history array and index cache.

1755

1756

This is inlined into _load_data for performance. KEEP IN SYNC.

1757

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1758

indexes).

1759

"""

1760

prefix = key[:-1]

1761

version_id = key[-1]

1762

# last-element only for compatibilty with the C load_data.

1763

parents = tuple(parent[-1] for parent in parent_keys)

1764

for parent in parent_keys:

1765

if parent[:-1] != prefix:

1766

raise ValueError("mismatched prefixes for %r, %r" % (

1767

key, parent_keys))

1768

cache, history = self._kndx_cache[prefix]

1769

# only want the _history index to reference the 1st index entry

1770

# for version_id

1771

if version_id not in cache:

1772

index = len(history)

1773

history.append(version_id)

1774

else:

1775

index = cache[version_id][5]

1776

cache[version_id] = (version_id,

1777

options,

1778

pos,

1779

size,

1780

parents,

1781

index)

1782

1783

def check_header(self, fp):

1784

line = fp.readline()

1785

if line == '':

1786

# An empty file can actually be treated as though the file doesn't

1787

# exist yet.

1788

raise errors.NoSuchFile(self)

1789

if line != self.HEADER:

1790

raise KnitHeaderError(badline=line, filename=self)

1791

1792

def _check_read(self):

1793

if not self._is_locked():

1794

raise errors.ObjectNotLocked(self)

1795

if self._get_scope() != self._scope:

1796

self._reset_cache()

1797

1798

def _check_write_ok(self):

1799

"""Assert if not writes are permitted."""

1800

if not self._is_locked():

1801

raise errors.ObjectNotLocked(self)

1802

if self._get_scope() != self._scope:

1803

self._reset_cache()

1804

if self._mode != 'w':

1805

raise errors.ReadOnlyObjectDirtiedError(self)

1806

1807

def get_build_details(self, keys):

1808

"""Get the method, index_memo and compression parent for keys.

1809

1810

Ghosts are omitted from the result.

1811

1812

:param keys: An iterable of keys.

1813

:return: A dict of key:(index_memo, compression_parent, parents,

1814

record_details).

1815

index_memo

1816

opaque structure to pass to read_records to extract the raw

1817

data

1818

compression_parent

1819

Content that this record is built upon, may be None

1820

parents

1821

Logical parents of this node

1822

record_details

1823

extra information about the content which needs to be passed to

1824

Factory.parse_record

1825

"""

1826

prefixes = self._partition_keys(keys)

1827

parent_map = self.get_parent_map(keys)

1828

result = {}

1829

for key in keys:

1830

if key not in parent_map:

1831

continue # Ghost

1832

method = self.get_method(key)

1833

parents = parent_map[key]

1834

if method == 'fulltext':

1835

compression_parent = None

1836

else:

1837

compression_parent = parents[0]

1838

noeol = 'no-eol' in self.get_options(key)

1839

index_memo = self.get_position(key)

1840

result[key] = (index_memo, compression_parent,

1841

parents, (method, noeol))

1842

return result

1843

1844

def get_method(self, key):

1845

"""Return compression method of specified key."""

1846

options = self.get_options(key)

1847

if 'fulltext' in options:

1848

return 'fulltext'

1849

elif 'line-delta' in options:

1850

return 'line-delta'

1851

else:

1852

raise errors.KnitIndexUnknownMethod(self, options)

1853

1854

def get_options(self, key):

1855

"""Return a list representing options.

1856

1857

e.g. ['foo', 'bar']

1858

"""

1859

prefix, suffix = self._split_key(key)

1860

self._load_prefixes([prefix])

1861

try:

1862

return self._kndx_cache[prefix][0][suffix][1]

1863

except KeyError:

1864

raise RevisionNotPresent(key, self)

1865

1866

def get_parent_map(self, keys):

1867

"""Get a map of the parents of keys.

1868

1869

:param keys: The keys to look up parents for.

1870

:return: A mapping from keys to parents. Absent keys are absent from

1871

the mapping.

1872

"""

1873

# Parse what we need to up front, this potentially trades off I/O

1874

# locality (.kndx and .knit in the same block group for the same file

1875

# id) for less checking in inner loops.

1876

prefixes = set(key[:-1] for key in keys)

1877

self._load_prefixes(prefixes)

1878

result = {}

1879

for key in keys:

1880

prefix = key[:-1]

1881

try:

1882

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

1883

except KeyError:

1884

pass

1885

else:

1886

result[key] = tuple(prefix + (suffix,) for

1887

suffix in suffix_parents)

1888

return result

1889

1890

def get_position(self, key):

1891

"""Return details needed to access the version.

1892

1893

:return: a tuple (key, data position, size) to hand to the access

1894

logic to get the record.

1895

"""

1896

prefix, suffix = self._split_key(key)

1897

self._load_prefixes([prefix])

1898

entry = self._kndx_cache[prefix][0][suffix]

1899

return key, entry[2], entry[3]

1900

1901

def _init_index(self, path, extra_lines=[]):

1902

"""Initialize an index."""

1903

sio = StringIO()

1904

sio.write(self.HEADER)

1905

sio.writelines(extra_lines)

1906

sio.seek(0)

1907

self._transport.put_file_non_atomic(path, sio,

1908

create_parent_dir=True)

1909

# self._create_parent_dir)

1910

# mode=self._file_mode,

1911

# dir_mode=self._dir_mode)

1912

1913

def keys(self):

1914

"""Get all the keys in the collection.

1915

1916

The keys are not ordered.

1917

"""

1918

result = set()

1919

# Identify all key prefixes.

1920

# XXX: A bit hacky, needs polish.

1921

if type(self._mapper) == ConstantMapper:

1922

prefixes = [()]

1923

else:

1924

relpaths = set()

1925

for quoted_relpath in self._transport.iter_files_recursive():

1926

path, ext = os.path.splitext(quoted_relpath)

1927

relpaths.add(path)

1928

prefixes = [self._mapper.unmap(path) for path in relpaths]

1929

self._load_prefixes(prefixes)

1930

for prefix in prefixes:

1931

for suffix in self._kndx_cache[prefix][1]:

1932

result.add(prefix + (suffix,))

1933

return result

1934

1935

def _load_prefixes(self, prefixes):

1936

"""Load the indices for prefixes."""

1937

self._check_read()

1938

for prefix in prefixes:

1939

if prefix not in self._kndx_cache:

1940

# the load_data interface writes to these variables.

1941

self._cache = {}

1942

self._history = []

1943

self._filename = prefix

1944

try:

1945

path = self._mapper.map(prefix) + '.kndx'

1946

fp = self._transport.get(path)

1947

try:

1948

# _load_data may raise NoSuchFile if the target knit is

1949

# completely empty.

1950

_load_data(self, fp)

1951

finally:

1952

fp.close()

1953

self._kndx_cache[prefix] = (self._cache, self._history)

1954

del self._cache

1955

del self._filename

1956

del self._history

1957

except NoSuchFile:

1958

self._kndx_cache[prefix] = ({}, [])

1959

if type(self._mapper) == ConstantMapper:

1960

# preserve behaviour for revisions.kndx etc.

1961

self._init_index(path)

1962

del self._cache

1963

del self._filename

1964

del self._history

1965

1966

def _partition_keys(self, keys):

1967

"""Turn keys into a dict of prefix:suffix_list."""

1968

result = {}

1969

for key in keys:

1970

prefix_keys = result.setdefault(key[:-1], [])

1971

prefix_keys.append(key[-1])

1972

return result

1973

1974

def _dictionary_compress(self, keys):

1975

"""Dictionary compress keys.

1976

1977

:param keys: The keys to generate references to.

1978

:return: A string representation of keys. keys which are present are

1979

dictionary compressed, and others are emitted as fulltext with a

1980

'.' prefix.

1981

"""

1982

if not keys:

1983

return ''

1984

result_list = []

1985

prefix = keys[0][:-1]

1986

cache = self._kndx_cache[prefix][0]

1987

for key in keys:

1988

if key[:-1] != prefix:

1989

# kndx indices cannot refer across partitioned storage.

1990

raise ValueError("mismatched prefixes for %r" % keys)

1991

if key[-1] in cache:

1992

# -- inlined lookup() --

1993

result_list.append(str(cache[key[-1]][5]))

1994

# -- end lookup () --

1995

else:

1996

result_list.append('.' + key[-1])

1997

return ' '.join(result_list)

1998

1999

def _reset_cache(self):

2000

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2001

# (cache_dict, history_vector) for parsed kndx files.

2002

self._kndx_cache = {}

2003

self._scope = self._get_scope()

2004

allow_writes = self._allow_writes()

2005

if allow_writes:

2006

self._mode = 'w'

2007

else:

2008

self._mode = 'r'

2009

2010

def _split_key(self, key):

2011

"""Split key into a prefix and suffix."""

2012

return key[:-1], key[-1]

2013

2014

2015

class _KnitGraphIndex(object):

2016

"""A KnitVersionedFiles index layered on GraphIndex."""

2017

2018

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2019

add_callback=None):

2020

"""Construct a KnitGraphIndex on a graph_index.

2021

2022

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2023

:param is_locked: A callback to check whether the object should answer

2024

queries.

2025

:param deltas: Allow delta-compressed records.

2026

:param parents: If True, record knits parents, if not do not record

2027

parents.

2028

:param add_callback: If not None, allow additions to the index and call

2029

this callback with a list of added GraphIndex nodes:

2030

[(node, value, node_refs), ...]

2031

:param is_locked: A callback, returns True if the index is locked and

2032

thus usable.

2033

"""

2034

self._add_callback = add_callback

2035

self._graph_index = graph_index

2036

self._deltas = deltas

2037

self._parents = parents

2038

if deltas and not parents:

2039

# XXX: TODO: Delta tree and parent graph should be conceptually

2040

# separate.

2041

raise KnitCorrupt(self, "Cannot do delta compression without "

2042

"parent tracking.")

2043

self.has_graph = parents

2044

self._is_locked = is_locked

2045

2046

def __repr__(self):

2047

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2048

2049

def add_records(self, records, random_id=False):

2050

"""Add multiple records to the index.

2051

2052

This function does not insert data into the Immutable GraphIndex

2053

backing the KnitGraphIndex, instead it prepares data for insertion by

2054

the caller and checks that it is safe to insert then calls

2055

self._add_callback with the prepared GraphIndex nodes.

2056

2057

:param records: a list of tuples:

2058

(key, options, access_memo, parents).

2059

:param random_id: If True the ids being added were randomly generated

2060

and no check for existence will be performed.

2061

"""

2062

if not self._add_callback:

2063

raise errors.ReadOnlyError(self)

2064

# we hope there are no repositories with inconsistent parentage

2065

# anymore.

2066

2067

keys = {}

2068

for (key, options, access_memo, parents) in records:

2069

if self._parents:

2070

parents = tuple(parents)

2071

index, pos, size = access_memo

2072

if 'no-eol' in options:

2073

value = 'N'

2074

else:

2075

value = ' '

2076

value += "%d %d" % (pos, size)

2077

if not self._deltas:

2078

if 'line-delta' in options:

2079

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2080

if self._parents:

2081

if self._deltas:

2082

if 'line-delta' in options:

2083

node_refs = (parents, (parents[0],))

2084

else:

2085

node_refs = (parents, ())

2086

else:

2087

node_refs = (parents, )

2088

else:

2089

if parents:

2090

raise KnitCorrupt(self, "attempt to add node with parents "

2091

"in parentless index.")

2092

node_refs = ()

2093

keys[key] = (value, node_refs)

2094

# check for dups

2095

if not random_id:

2096

present_nodes = self._get_entries(keys)

2097

for (index, key, value, node_refs) in present_nodes:

2098

if (value[0] != keys[key][0][0] or

2099

node_refs != keys[key][1]):

2100

raise KnitCorrupt(self, "inconsistent details in add_records"

2101

": %s %s" % ((value, node_refs), keys[key]))

2102

del keys[key]

2103

result = []

2104

if self._parents:

2105

for key, (value, node_refs) in keys.iteritems():

2106

result.append((key, value, node_refs))

2107

else:

2108

for key, (value, node_refs) in keys.iteritems():

2109

result.append((key, value))

2110

self._add_callback(result)

2111

2112

def _check_read(self):

2113

"""raise if reads are not permitted."""

2114

if not self._is_locked():

2115

raise errors.ObjectNotLocked(self)

2116

2117

def _check_write_ok(self):

2118

"""Assert if writes are not permitted."""

2119

if not self._is_locked():

2120

raise errors.ObjectNotLocked(self)

2121

2122

def _compression_parent(self, an_entry):

2123

# return the key that an_entry is compressed against, or None

2124

# Grab the second parent list (as deltas implies parents currently)

2125

compression_parents = an_entry[3][1]

2126

if not compression_parents:

2127

return None

2128

if len(compression_parents) != 1:

2129

raise AssertionError(

2130

"Too many compression parents: %r" % compression_parents)

2131

return compression_parents[0]

2132

2133

def get_build_details(self, keys):

2134

"""Get the method, index_memo and compression parent for version_ids.

2135

2136

Ghosts are omitted from the result.

2137

2138

:param keys: An iterable of keys.

2139

:return: A dict of key:

2140

(index_memo, compression_parent, parents, record_details).

2141

index_memo

2142

opaque structure to pass to read_records to extract the raw

2143

data

2144

compression_parent

2145

Content that this record is built upon, may be None

2146

parents

2147

Logical parents of this node

2148

record_details

2149

extra information about the content which needs to be passed to

2150

Factory.parse_record

2151

"""

2152

self._check_read()

2153

result = {}

2154

entries = self._get_entries(keys, False)

2155

for entry in entries:

2156

key = entry[1]

2157

if not self._parents:

2158

parents = ()

2159

else:

2160

parents = entry[3][0]

2161

if not self._deltas:

2162

compression_parent_key = None

2163

else:

2164

compression_parent_key = self._compression_parent(entry)

2165

noeol = (entry[2][0] == 'N')

2166

if compression_parent_key:

2167

method = 'line-delta'

2168

else:

2169

method = 'fulltext'

2170

result[key] = (self._node_to_position(entry),

2171

compression_parent_key, parents,

2172

(method, noeol))

2173

return result

2174

2175

def _get_entries(self, keys, check_present=False):

2176

"""Get the entries for keys.

2177

2178

:param keys: An iterable of index key tuples.

2179

"""

2180

keys = set(keys)

2181

found_keys = set()

2182

if self._parents:

2183

for node in self._graph_index.iter_entries(keys):

2184

yield node

2185

found_keys.add(node[1])

2186

else:

2187

# adapt parentless index to the rest of the code.

2188

for node in self._graph_index.iter_entries(keys):

2189

yield node[0], node[1], node[2], ()

2190

found_keys.add(node[1])

2191

if check_present:

2192

missing_keys = keys.difference(found_keys)

2193

if missing_keys:

2194

raise RevisionNotPresent(missing_keys.pop(), self)

2195

2196

def get_method(self, key):

2197

"""Return compression method of specified key."""

2198

return self._get_method(self._get_node(key))

2199

2200

def _get_method(self, node):

2201

if not self._deltas:

2202

return 'fulltext'

2203

if self._compression_parent(node):

2204

return 'line-delta'

2205

else:

2206

return 'fulltext'

2207

2208

def _get_node(self, key):

2209

try:

2210

return list(self._get_entries([key]))[0]

2211

except IndexError:

2212

raise RevisionNotPresent(key, self)

2213

2214

def get_options(self, key):

2215

"""Return a list representing options.

2216

2217

e.g. ['foo', 'bar']

2218

"""

2219

node = self._get_node(key)

2220

options = [self._get_method(node)]

2221

if node[2][0] == 'N':

2222

options.append('no-eol')

2223

return options

2224

2225

def get_parent_map(self, keys):

2226

"""Get a map of the parents of keys.

2227

2228

:param keys: The keys to look up parents for.

2229

:return: A mapping from keys to parents. Absent keys are absent from

2230

the mapping.

2231

"""

2232

self._check_read()

2233

nodes = self._get_entries(keys)

2234

result = {}

2235

if self._parents:

2236

for node in nodes:

2237

result[node[1]] = node[3][0]

2238

else:

2239

for node in nodes:

2240

result[node[1]] = None

2241

return result

2242

2243

def get_position(self, key):

2244

"""Return details needed to access the version.

2245

2246

:return: a tuple (index, data position, size) to hand to the access

2247

logic to get the record.

2248

"""

2249

node = self._get_node(key)

2250

return self._node_to_position(node)

2251

2252

def keys(self):

2253

"""Get all the keys in the collection.

2254

2255

The keys are not ordered.

2256

"""

2257

self._check_read()

2258

return [node[1] for node in self._graph_index.iter_all_entries()]

2259

2260

def _node_to_position(self, node):

2261

"""Convert an index value to position details."""

2262

bits = node[2][1:].split(' ')

2263

return node[0], int(bits[0]), int(bits[1])

2264

2265

2266

class _KnitKeyAccess(object):

2267

"""Access to records in .knit files."""

2268

2269

def __init__(self, transport, mapper):

2270

"""Create a _KnitKeyAccess with transport and mapper.

2271

2272

:param transport: The transport the access object is rooted at.

2273

:param mapper: The mapper used to map keys to .knit files.

2274

"""

2275

self._transport = transport

2276

self._mapper = mapper

2277

2278

def add_raw_records(self, key_sizes, raw_data):

2279

"""Add raw knit bytes to a storage area.

2280

2281

The data is spooled to the container writer in one bytes-record per

2282

raw data item.

2283

2284

:param sizes: An iterable of tuples containing the key and size of each

2285

raw data segment.

2286

:param raw_data: A bytestring containing the data.

2287

:return: A list of memos to retrieve the record later. Each memo is an

2288

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

2289

length), where the key is the record key.

2290

"""

2291

if type(raw_data) != str:

2292

raise AssertionError(

2293

'data must be plain bytes was %s' % type(raw_data))

2294

result = []

2295

offset = 0

2296

# TODO: This can be tuned for writing to sftp and other servers where

2297

# append() is relatively expensive by grouping the writes to each key

2298

# prefix.

2299

for key, size in key_sizes:

2300

path = self._mapper.map(key)

2301

try:

2302

base = self._transport.append_bytes(path + '.knit',

2303

raw_data[offset:offset+size])

2304

except errors.NoSuchFile:

2305

self._transport.mkdir(osutils.dirname(path))

2306

base = self._transport.append_bytes(path + '.knit',

2307

raw_data[offset:offset+size])

2308

# if base == 0:

2309

# chmod.

2310

offset += size

2311

result.append((key, base, size))

2312

return result

2313

2314

def get_raw_records(self, memos_for_retrieval):

2315

"""Get the raw bytes for a records.

2316

2317

:param memos_for_retrieval: An iterable containing the access memo for

2318

retrieving the bytes.

2319

:return: An iterator over the bytes of the records.

2320

"""

2321

# first pass, group into same-index request to minimise readv's issued.

2322

request_lists = []

2323

current_prefix = None

2324

for (key, offset, length) in memos_for_retrieval:

2325

if current_prefix == key[:-1]:

2326

current_list.append((offset, length))

2327

else:

2328

if current_prefix is not None:

2329

request_lists.append((current_prefix, current_list))

2330

current_prefix = key[:-1]

2331

current_list = [(offset, length)]

2332

# handle the last entry

2333

if current_prefix is not None:

2334

request_lists.append((current_prefix, current_list))

2335

for prefix, read_vector in request_lists:

2336

path = self._mapper.map(prefix) + '.knit'

2337

for pos, data in self._transport.readv(path, read_vector):

2338

yield data

2339

2340

2341

class _DirectPackAccess(object):

2342

"""Access to data in one or more packs with less translation."""

2343

2344

def __init__(self, index_to_packs):

2345

"""Create a _DirectPackAccess object.

2346

2347

:param index_to_packs: A dict mapping index objects to the transport

2348

and file names for obtaining data.

2349

"""

2350

self._container_writer = None

2351

self._write_index = None

2352

self._indices = index_to_packs

2353

2354

def add_raw_records(self, key_sizes, raw_data):

2355

"""Add raw knit bytes to a storage area.

2356

2357

The data is spooled to the container writer in one bytes-record per

2358

raw data item.

2359

2360

:param sizes: An iterable of tuples containing the key and size of each

2361

raw data segment.

2362

:param raw_data: A bytestring containing the data.

2363

:return: A list of memos to retrieve the record later. Each memo is an

2364

opaque index memo. For _DirectPackAccess the memo is (index, pos,

2365

length), where the index field is the write_index object supplied

2366

to the PackAccess object.

2367

"""

2368

if type(raw_data) != str:

2369

raise AssertionError(

2370

'data must be plain bytes was %s' % type(raw_data))

2371

result = []

2372

offset = 0

2373

for key, size in key_sizes:

2374

p_offset, p_length = self._container_writer.add_bytes_record(

2375

raw_data[offset:offset+size], [])

2376

offset += size

2377

result.append((self._write_index, p_offset, p_length))

2378

return result

2379

2380

def get_raw_records(self, memos_for_retrieval):

2381

"""Get the raw bytes for a records.

2382

2383

:param memos_for_retrieval: An iterable containing the (index, pos,

2384

length) memo for retrieving the bytes. The Pack access method

2385

looks up the pack to use for a given record in its index_to_pack

2386

map.

2387

:return: An iterator over the bytes of the records.

2388

"""

2389

# first pass, group into same-index requests

2390

request_lists = []

2391

current_index = None

2392

for (index, offset, length) in memos_for_retrieval:

2393

if current_index == index:

2394

current_list.append((offset, length))

2395

else:

2396

if current_index is not None:

2397

request_lists.append((current_index, current_list))

2398

current_index = index

2399

current_list = [(offset, length)]

2400

# handle the last entry

2401

if current_index is not None:

2402

request_lists.append((current_index, current_list))

2403

for index, offsets in request_lists:

2404

transport, path = self._indices[index]

2405

reader = pack.make_readv_reader(transport, path, offsets)

2406

for names, read_func in reader.iter_records():

2407

yield read_func(None)

2408

2409

def set_writer(self, writer, index, transport_packname):

2410

"""Set a writer to use for adding data."""

2411

if index is not None:

2412

self._indices[index] = transport_packname

2413

self._container_writer = writer

2414

self._write_index = index

2415

2416

2417

# Deprecated, use PatienceSequenceMatcher instead

2418

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2419

2420

2421

def annotate_knit(knit, revision_id):

2422

"""Annotate a knit with no cached annotations.

2423

2424

This implementation is for knits with no cached annotations.

2425

It will work for knits with cached annotations, but this is not

2426

recommended.

2427

"""

2428

annotator = _KnitAnnotator(knit)

2429

return iter(annotator.annotate(revision_id))

2430

2431

2432

class _KnitAnnotator(object):

2433

"""Build up the annotations for a text."""

2434

2435

def __init__(self, knit):

2436

self._knit = knit

2437

2438

# Content objects, differs from fulltexts because of how final newlines

2439

# are treated by knits. the content objects here will always have a

2440

# final newline

2441

self._fulltext_contents = {}

2442

2443

# Annotated lines of specific revisions

2444

self._annotated_lines = {}

2445

2446

# Track the raw data for nodes that we could not process yet.

2447

# This maps the revision_id of the base to a list of children that will

2448

# annotated from it.

2449

self._pending_children = {}

2450

2451

# Nodes which cannot be extracted

2452

self._ghosts = set()

2453

2454

# Track how many children this node has, so we know if we need to keep

2455

# it

2456

self._annotate_children = {}

2457

self._compression_children = {}

2458

2459

self._all_build_details = {}

2460

# The children => parent revision_id graph

2461

self._revision_id_graph = {}

2462

2463

self._heads_provider = None

2464

2465

self._nodes_to_keep_annotations = set()

2466

self._generations_until_keep = 100

2467

2468

def set_generations_until_keep(self, value):

2469

"""Set the number of generations before caching a node.

2470

2471

Setting this to -1 will cache every merge node, setting this higher

2472

will cache fewer nodes.

2473

"""

2474

self._generations_until_keep = value

2475

2476

def _add_fulltext_content(self, revision_id, content_obj):

2477

self._fulltext_contents[revision_id] = content_obj

2478

# TODO: jam 20080305 It might be good to check the sha1digest here

2479

return content_obj.text()

2480

2481

def _check_parents(self, child, nodes_to_annotate):

2482

"""Check if all parents have been processed.

2483

2484

:param child: A tuple of (rev_id, parents, raw_content)

2485

:param nodes_to_annotate: If child is ready, add it to

2486

nodes_to_annotate, otherwise put it back in self._pending_children

2487

"""

2488

for parent_id in child[1]:

2489

if (parent_id not in self._annotated_lines):

2490

# This parent is present, but another parent is missing

2491

self._pending_children.setdefault(parent_id,

2492

[]).append(child)

2493

break

2494

else:

2495

# This one is ready to be processed

2496

nodes_to_annotate.append(child)

2497

2498

def _add_annotation(self, revision_id, fulltext, parent_ids,

2499

left_matching_blocks=None):

2500

"""Add an annotation entry.

2501

2502

All parents should already have been annotated.

2503

:return: A list of children that now have their parents satisfied.

2504

"""

2505

a = self._annotated_lines

2506

annotated_parent_lines = [a[p] for p in parent_ids]

2507

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2508

fulltext, revision_id, left_matching_blocks,

2509

heads_provider=self._get_heads_provider()))

2510

self._annotated_lines[revision_id] = annotated_lines

2511

for p in parent_ids:

2512

ann_children = self._annotate_children[p]

2513

ann_children.remove(revision_id)

2514

if (not ann_children

2515

and p not in self._nodes_to_keep_annotations):

2516

del self._annotated_lines[p]

2517

del self._all_build_details[p]

2518

if p in self._fulltext_contents:

2519

del self._fulltext_contents[p]

2520

# Now that we've added this one, see if there are any pending

2521

# deltas to be done, certainly this parent is finished

2522

nodes_to_annotate = []

2523

for child in self._pending_children.pop(revision_id, []):

2524

self._check_parents(child, nodes_to_annotate)

2525

return nodes_to_annotate

2526

2527

def _get_build_graph(self, key):

2528

"""Get the graphs for building texts and annotations.

2529

2530

The data you need for creating a full text may be different than the

2531

data you need to annotate that text. (At a minimum, you need both

2532

parents to create an annotation, but only need 1 parent to generate the

2533

fulltext.)

2534

2535

:return: A list of (key, index_memo) records, suitable for

2536

passing to read_records_iter to start reading in the raw data fro/

2537

the pack file.

2538

"""

2539

if key in self._annotated_lines:

2540

# Nothing to do

2541

return []

2542

pending = set([key])

2543

records = []

2544

generation = 0

2545

kept_generation = 0

2546

while pending:

2547

# get all pending nodes

2548

generation += 1

2549

this_iteration = pending

2550

build_details = self._knit._index.get_build_details(this_iteration)

2551

self._all_build_details.update(build_details)

2552

# new_nodes = self._knit._index._get_entries(this_iteration)

2553

pending = set()

2554

for key, details in build_details.iteritems():

2555

(index_memo, compression_parent, parents,

2556

record_details) = details

2557

self._revision_id_graph[key] = parents

2558

records.append((key, index_memo))

2559

# Do we actually need to check _annotated_lines?

2560

pending.update(p for p in parents

2561

if p not in self._all_build_details)

2562

if compression_parent:

2563

self._compression_children.setdefault(compression_parent,

2564

[]).append(key)

2565

if parents:

2566

for parent in parents:

2567

self._annotate_children.setdefault(parent,

2568

[]).append(key)

2569

num_gens = generation - kept_generation

2570

if ((num_gens >= self._generations_until_keep)

2571

and len(parents) > 1):

2572

kept_generation = generation

2573

self._nodes_to_keep_annotations.add(key)

2574

2575

missing_versions = this_iteration.difference(build_details.keys())

2576

self._ghosts.update(missing_versions)

2577

for missing_version in missing_versions:

2578

# add a key, no parents

2579

self._revision_id_graph[missing_version] = ()

2580

pending.discard(missing_version) # don't look for it

2581

if self._ghosts.intersection(self._compression_children):

2582

raise KnitCorrupt(

2583

"We cannot have nodes which have a ghost compression parent:\n"

2584

"ghosts: %r\n"

2585

"compression children: %r"

2586

% (self._ghosts, self._compression_children))

2587

# Cleanout anything that depends on a ghost so that we don't wait for

2588

# the ghost to show up

2589

for node in self._ghosts:

2590

if node in self._annotate_children:

2591

# We won't be building this node

2592

del self._annotate_children[node]

2593

# Generally we will want to read the records in reverse order, because

2594

# we find the parent nodes after the children

2595

records.reverse()

2596

return records

2597

2598

def _annotate_records(self, records):

2599

"""Build the annotations for the listed records."""

2600

# We iterate in the order read, rather than a strict order requested

2601

# However, process what we can, and put off to the side things that

2602

# still need parents, cleaning them up when those parents are

2603

# processed.

2604

for (rev_id, record,

2605

digest) in self._knit._read_records_iter(records):

2606

if rev_id in self._annotated_lines:

2607

continue

2608

parent_ids = self._revision_id_graph[rev_id]

2609

parent_ids = [p for p in parent_ids if p not in self._ghosts]

2610

details = self._all_build_details[rev_id]

2611

(index_memo, compression_parent, parents,

2612

record_details) = details

2613

nodes_to_annotate = []

2614

# TODO: Remove the punning between compression parents, and

2615

# parent_ids, we should be able to do this without assuming

2616

# the build order

2617

if len(parent_ids) == 0:

2618

# There are no parents for this node, so just add it

2619

# TODO: This probably needs to be decoupled

2620

fulltext_content, delta = self._knit._factory.parse_record(

2621

rev_id, record, record_details, None)

2622

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

2623

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

2624

parent_ids, left_matching_blocks=None))

2625

else:

2626

child = (rev_id, parent_ids, record)

2627

# Check if all the parents are present

2628

self._check_parents(child, nodes_to_annotate)

2629

while nodes_to_annotate:

2630

# Should we use a queue here instead of a stack?

2631

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

2632

(index_memo, compression_parent, parents,

2633

record_details) = self._all_build_details[rev_id]

2634

if compression_parent is not None:

2635

comp_children = self._compression_children[compression_parent]

2636

if rev_id not in comp_children:

2637

raise AssertionError("%r not in compression children %r"

2638

% (rev_id, comp_children))

2639

# If there is only 1 child, it is safe to reuse this

2640

# content

2641

reuse_content = (len(comp_children) == 1

2642

and compression_parent not in

2643

self._nodes_to_keep_annotations)

2644

if reuse_content:

2645

# Remove it from the cache since it will be changing

2646

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

2647

# Make sure to copy the fulltext since it might be

2648

# modified

2649

parent_fulltext = list(parent_fulltext_content.text())

2650

else:

2651

parent_fulltext_content = self._fulltext_contents[compression_parent]

2652

parent_fulltext = parent_fulltext_content.text()

2653

comp_children.remove(rev_id)

2654

fulltext_content, delta = self._knit._factory.parse_record(

2655

rev_id, record, record_details,

2656

parent_fulltext_content,

2657

copy_base_content=(not reuse_content))

2658

fulltext = self._add_fulltext_content(rev_id,

2659

fulltext_content)

2660

blocks = KnitContent.get_line_delta_blocks(delta,

2661

parent_fulltext, fulltext)

2662

else:

2663

fulltext_content = self._knit._factory.parse_fulltext(

2664

record, rev_id)

2665

fulltext = self._add_fulltext_content(rev_id,

2666

fulltext_content)

2667

blocks = None

2668

nodes_to_annotate.extend(

2669

self._add_annotation(rev_id, fulltext, parent_ids,

2670

left_matching_blocks=blocks))

2671

2672

def _get_heads_provider(self):

2673

"""Create a heads provider for resolving ancestry issues."""

2674

if self._heads_provider is not None:

2675

return self._heads_provider

2676

parent_provider = _mod_graph.DictParentsProvider(

2677

self._revision_id_graph)

2678

graph_obj = _mod_graph.Graph(parent_provider)

2679

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

2680

self._heads_provider = head_cache

2681

return head_cache

2682

2683

def annotate(self, key):

2684

"""Return the annotated fulltext at the given key.

2685

2686

:param key: The key to annotate.

2687

"""

2688

if True or len(self._knit._fallback_vfs) > 0:

2689

# stacked knits can't use the fast path at present.

2690

return self._simple_annotate(key)

2691

records = self._get_build_graph(key)

2692

if key in self._ghosts:

2693

raise errors.RevisionNotPresent(key, self._knit)

2694

self._annotate_records(records)

2695

return self._annotated_lines[key]

2696

2697

def _simple_annotate(self, key):

2698

"""Return annotated fulltext, rediffing from the full texts.

2699

2700

This is slow but makes no assumptions about the repository

2701

being able to produce line deltas.

2702

"""

2703

# TODO: this code generates a parent maps of present ancestors; it

2704

# could be split out into a separate method, and probably should use

2705

# iter_ancestry instead. -- mbp and robertc 20080704

2706

graph = Graph(self._knit)

2707

head_cache = _mod_graph.FrozenHeadsCache(graph)

2708

search = graph._make_breadth_first_searcher([key])

2709

keys = set()

2710

while True:

2711

try:

2712

present, ghosts = search.next_with_ghosts()

2713

except StopIteration:

2714

break

2715

keys.update(present)

2716

parent_map = self._knit.get_parent_map(keys)

2717

parent_cache = {}

2718

reannotate = annotate.reannotate

2719

for record in self._knit.get_record_stream(keys, 'topological', True):

2720

key = record.key

2721

fulltext = split_lines(record.get_bytes_as('fulltext'))

2722

parents = parent_map[key]

2723

if parents is not None:

2724

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

2725

else:

2726

parent_lines = []

2727

parent_cache[key] = list(

2728

reannotate(parent_lines, fulltext, key, None, head_cache))

2729

try:

2730

return parent_cache[key]

2731

except KeyError, e:

2732

raise errors.RevisionNotPresent(key, self._knit)

2733

2734

2735

try:

2736

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2737

except ImportError:

2738

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »