/brz/remove-bazaar : revision 4039.3.6

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2009-02-25 20:23:04 UTC
mto: This revision was merged to the branch mainline in revision 4051.
Revision ID: john@arbash-meinel.com-20090225202304-j52lrdrx8aw101uh

Turn _split_by_prefix into a classmethod, and add direct tests.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

errors,

osutils,

patiencediff,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

SHA1KnitCorrupt,

)

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_string,

sha_strings,

split_lines,

)

100

from bzrlib.versionedfile import (

101

AbsentContentFactory,

102

adapter_registry,

103

ConstantMapper,

104

ContentFactory,

105

ChunkedContentFactory,

106

VersionedFile,

107

VersionedFiles,

108

)

109

110

111

# TODO: Split out code specific to this format into an associated object.

112

113

# TODO: Can we put in some kind of value to check that the index and data

114

# files belong together?

115

116

# TODO: accommodate binaries, perhaps by storing a byte count

117

118

# TODO: function to check whole file

119

120

# TODO: atomically append data, then measure backwards from the cursor

121

# position after writing to work out where it was located. we may need to

122

# bypass python file buffering.

123

124

DATA_SUFFIX = '.knit'

125

INDEX_SUFFIX = '.kndx'

126

_STREAM_MIN_BUFFER_SIZE = 5*1024*1024

127

128

129

class KnitAdapter(object):

130

"""Base class for knit record adaption."""

131

132

def __init__(self, basis_vf):

133

"""Create an adapter which accesses full texts from basis_vf.

134

135

:param basis_vf: A versioned file to access basis texts of deltas from.

136

May be None for adapters that do not need to access basis texts.

137

"""

138

self._data = KnitVersionedFiles(None, None)

139

self._annotate_factory = KnitAnnotateFactory()

140

self._plain_factory = KnitPlainFactory()

141

self._basis_vf = basis_vf

142

143

144

class FTAnnotatedToUnannotated(KnitAdapter):

145

"""An adapter from FT annotated knits to unannotated ones."""

146

147

def get_bytes(self, factory):

148

annotated_compressed_bytes = factory._raw_record

149

rec, contents = \

150

self._data._parse_record_unchecked(annotated_compressed_bytes)

151

content = self._annotate_factory.parse_fulltext(contents, rec[1])

152

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

153

return bytes

154

155

156

class DeltaAnnotatedToUnannotated(KnitAdapter):

157

"""An adapter for deltas from annotated to unannotated."""

158

159

def get_bytes(self, factory):

160

annotated_compressed_bytes = factory._raw_record

161

rec, contents = \

162

self._data._parse_record_unchecked(annotated_compressed_bytes)

163

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

164

plain=True)

165

contents = self._plain_factory.lower_line_delta(delta)

166

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

167

return bytes

168

169

170

class FTAnnotatedToFullText(KnitAdapter):

171

"""An adapter from FT annotated knits to unannotated ones."""

172

173

def get_bytes(self, factory):

174

annotated_compressed_bytes = factory._raw_record

175

rec, contents = \

176

self._data._parse_record_unchecked(annotated_compressed_bytes)

177

content, delta = self._annotate_factory.parse_record(factory.key[-1],

178

contents, factory._build_details, None)

179

return ''.join(content.text())

180

181

182

class DeltaAnnotatedToFullText(KnitAdapter):

183

"""An adapter for deltas from annotated to unannotated."""

184

185

def get_bytes(self, factory):

186

annotated_compressed_bytes = factory._raw_record

187

rec, contents = \

188

self._data._parse_record_unchecked(annotated_compressed_bytes)

189

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

190

plain=True)

191

compression_parent = factory.parents[0]

192

basis_entry = self._basis_vf.get_record_stream(

193

[compression_parent], 'unordered', True).next()

194

if basis_entry.storage_kind == 'absent':

195

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

196

basis_chunks = basis_entry.get_bytes_as('chunked')

197

basis_lines = osutils.chunks_to_lines(basis_chunks)

198

# Manually apply the delta because we have one annotated content and

199

# one plain.

200

basis_content = PlainKnitContent(basis_lines, compression_parent)

201

basis_content.apply_delta(delta, rec[1])

202

basis_content._should_strip_eol = factory._build_details[1]

203

return ''.join(basis_content.text())

204

205

206

class FTPlainToFullText(KnitAdapter):

207

"""An adapter from FT plain knits to unannotated ones."""

208

209

def get_bytes(self, factory):

210

compressed_bytes = factory._raw_record

211

rec, contents = \

212

self._data._parse_record_unchecked(compressed_bytes)

213

content, delta = self._plain_factory.parse_record(factory.key[-1],

214

contents, factory._build_details, None)

215

return ''.join(content.text())

216

217

218

class DeltaPlainToFullText(KnitAdapter):

219

"""An adapter for deltas from annotated to unannotated."""

220

221

def get_bytes(self, factory):

222

compressed_bytes = factory._raw_record

223

rec, contents = \

224

self._data._parse_record_unchecked(compressed_bytes)

225

delta = self._plain_factory.parse_line_delta(contents, rec[1])

226

compression_parent = factory.parents[0]

227

# XXX: string splitting overhead.

228

basis_entry = self._basis_vf.get_record_stream(

229

[compression_parent], 'unordered', True).next()

230

if basis_entry.storage_kind == 'absent':

231

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

232

basis_chunks = basis_entry.get_bytes_as('chunked')

233

basis_lines = osutils.chunks_to_lines(basis_chunks)

234

basis_content = PlainKnitContent(basis_lines, compression_parent)

235

# Manually apply the delta because we have one annotated content and

236

# one plain.

237

content, _ = self._plain_factory.parse_record(rec[1], contents,

238

factory._build_details, basis_content)

239

return ''.join(content.text())

240

241

242

class KnitContentFactory(ContentFactory):

243

"""Content factory for streaming from knits.

244

245

:seealso ContentFactory:

246

"""

247

248

def __init__(self, key, parents, build_details, sha1, raw_record,

249

annotated, knit=None, network_bytes=None):

250

"""Create a KnitContentFactory for key.

251

252

:param key: The key.

253

:param parents: The parents.

254

:param build_details: The build details as returned from

255

get_build_details.

256

:param sha1: The sha1 expected from the full text of this object.

257

:param raw_record: The bytes of the knit data from disk.

258

:param annotated: True if the raw data is annotated.

259

:param network_bytes: None to calculate the network bytes on demand,

260

not-none if they are already known.

261

"""

262

ContentFactory.__init__(self)

263

self.sha1 = sha1

264

self.key = key

265

self.parents = parents

266

if build_details[0] == 'line-delta':

267

kind = 'delta'

268

else:

269

kind = 'ft'

270

if annotated:

271

annotated_kind = 'annotated-'

272

else:

273

annotated_kind = ''

274

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

275

self._raw_record = raw_record

276

self._network_bytes = network_bytes

277

self._build_details = build_details

278

self._knit = knit

279

280

def _create_network_bytes(self):

281

"""Create a fully serialised network version for transmission."""

282

# storage_kind, key, parents, Noeol, raw_record

283

key_bytes = '\x00'.join(self.key)

284

if self.parents is None:

285

parent_bytes = 'None:'

286

else:

287

parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents)

288

if self._build_details[1]:

289

noeol = 'N'

290

else:

291

noeol = ' '

292

network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes,

293

parent_bytes, noeol, self._raw_record)

294

self._network_bytes = network_bytes

295

296

def get_bytes_as(self, storage_kind):

297

if storage_kind == self.storage_kind:

298

if self._network_bytes is None:

299

self._create_network_bytes()

300

return self._network_bytes

301

if self._knit is not None:

302

if storage_kind == 'chunked':

303

return self._knit.get_lines(self.key[0])

304

elif storage_kind == 'fulltext':

305

return self._knit.get_text(self.key[0])

306

raise errors.UnavailableRepresentation(self.key, storage_kind,

307

self.storage_kind)

308

309

310

class LazyKnitContentFactory(ContentFactory):

311

"""A ContentFactory which can either generate full text or a wire form.

312

313

:seealso ContentFactory:

314

"""

315

316

def __init__(self, key, parents, generator, first):

317

"""Create a LazyKnitContentFactory.

318

319

:param key: The key of the record.

320

:param parents: The parents of the record.

321

:param generator: A _ContentMapGenerator containing the record for this

322

key.

323

:param first: Is this the first content object returned from generator?

324

if it is, its storage kind is knit-delta-closure, otherwise it is

325

knit-delta-closure-ref

326

"""

327

self.key = key

328

self.parents = parents

329

self.sha1 = None

330

self._generator = generator

331

self.storage_kind = "knit-delta-closure"

332

if not first:

333

self.storage_kind = self.storage_kind + "-ref"

334

self._first = first

335

336

def get_bytes_as(self, storage_kind):

337

if storage_kind == self.storage_kind:

338

if self._first:

339

return self._generator._wire_bytes()

340

else:

341

# all the keys etc are contained in the bytes returned in the

342

# first record.

343

return ''

344

if storage_kind in ('chunked', 'fulltext'):

345

chunks = self._generator._get_one_work(self.key).text()

346

if storage_kind == 'chunked':

347

return chunks

348

else:

349

return ''.join(chunks)

350

raise errors.UnavailableRepresentation(self.key, storage_kind,

351

self.storage_kind)

352

353

354

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

355

"""Convert a network record to a iterator over stream records.

356

357

:param storage_kind: The storage kind of the record.

358

Must be 'knit-delta-closure'.

359

:param bytes: The bytes of the record on the network.

360

"""

361

generator = _NetworkContentMapGenerator(bytes, line_end)

362

return generator.get_record_stream()

363

364

365

def knit_network_to_record(storage_kind, bytes, line_end):

366

"""Convert a network record to a record object.

367

368

:param storage_kind: The storage kind of the record.

369

:param bytes: The bytes of the record on the network.

370

"""

371

start = line_end

372

line_end = bytes.find('\n', start)

373

key = tuple(bytes[start:line_end].split('\x00'))

374

start = line_end + 1

375

line_end = bytes.find('\n', start)

376

parent_line = bytes[start:line_end]

377

if parent_line == 'None:':

378

parents = None

379

else:

380

parents = tuple(

381

[tuple(segment.split('\x00')) for segment in parent_line.split('\t')

382

if segment])

383

start = line_end + 1

384

noeol = bytes[start] == 'N'

385

if 'ft' in storage_kind:

386

method = 'fulltext'

387

else:

388

method = 'line-delta'

389

build_details = (method, noeol)

390

start = start + 1

391

raw_record = bytes[start:]

392

annotated = 'annotated' in storage_kind

393

return [KnitContentFactory(key, parents, build_details, None, raw_record,

394

annotated, network_bytes=bytes)]

395

396

397

class KnitContent(object):

398

"""Content of a knit version to which deltas can be applied.

399

400

This is always stored in memory as a list of lines with \n at the end,

401

plus a flag saying if the final ending is really there or not, because that

402

corresponds to the on-disk knit representation.

403

"""

404

405

def __init__(self):

406

self._should_strip_eol = False

407

408

def apply_delta(self, delta, new_version_id):

409

"""Apply delta to this object to become new_version_id."""

410

raise NotImplementedError(self.apply_delta)

411

412

def line_delta_iter(self, new_lines):

413

"""Generate line-based delta from this content to new_lines."""

414

new_texts = new_lines.text()

415

old_texts = self.text()

416

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

417

for tag, i1, i2, j1, j2 in s.get_opcodes():

418

if tag == 'equal':

419

continue

420

# ofrom, oto, length, data

421

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

422

423

def line_delta(self, new_lines):

424

return list(self.line_delta_iter(new_lines))

425

426

@staticmethod

427

def get_line_delta_blocks(knit_delta, source, target):

428

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

429

target_len = len(target)

430

s_pos = 0

431

t_pos = 0

432

for s_begin, s_end, t_len, new_text in knit_delta:

433

true_n = s_begin - s_pos

434

n = true_n

435

if n > 0:

436

# knit deltas do not provide reliable info about whether the

437

# last line of a file matches, due to eol handling.

438

if source[s_pos + n -1] != target[t_pos + n -1]:

439

n-=1

440

if n > 0:

441

yield s_pos, t_pos, n

442

t_pos += t_len + true_n

443

s_pos = s_end

444

n = target_len - t_pos

445

if n > 0:

446

if source[s_pos + n -1] != target[t_pos + n -1]:

447

n-=1

448

if n > 0:

449

yield s_pos, t_pos, n

450

yield s_pos + (target_len - t_pos), target_len, 0

451

452

453

class AnnotatedKnitContent(KnitContent):

454

"""Annotated content."""

455

456

def __init__(self, lines):

457

KnitContent.__init__(self)

458

self._lines = lines

459

460

def annotate(self):

461

"""Return a list of (origin, text) for each content line."""

462

lines = self._lines[:]

463

if self._should_strip_eol:

464

origin, last_line = lines[-1]

465

lines[-1] = (origin, last_line.rstrip('\n'))

466

return lines

467

468

def apply_delta(self, delta, new_version_id):

469

"""Apply delta to this object to become new_version_id."""

470

offset = 0

471

lines = self._lines

472

for start, end, count, delta_lines in delta:

473

lines[offset+start:offset+end] = delta_lines

474

offset = offset + (start - end) + count

475

476

def text(self):

477

try:

478

lines = [text for origin, text in self._lines]

479

except ValueError, e:

480

# most commonly (only?) caused by the internal form of the knit

481

# missing annotation information because of a bug - see thread

482

# around 20071015

483

raise KnitCorrupt(self,

484

"line in annotated knit missing annotation information: %s"

485

% (e,))

486

if self._should_strip_eol:

487

lines[-1] = lines[-1].rstrip('\n')

488

return lines

489

490

def copy(self):

491

return AnnotatedKnitContent(self._lines[:])

492

493

494

class PlainKnitContent(KnitContent):

495

"""Unannotated content.

496

497

When annotate[_iter] is called on this content, the same version is reported

498

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

499

objects.

500

"""

501

502

def __init__(self, lines, version_id):

503

KnitContent.__init__(self)

504

self._lines = lines

505

self._version_id = version_id

506

507

def annotate(self):

508

"""Return a list of (origin, text) for each content line."""

509

return [(self._version_id, line) for line in self._lines]

510

511

def apply_delta(self, delta, new_version_id):

512

"""Apply delta to this object to become new_version_id."""

513

offset = 0

514

lines = self._lines

515

for start, end, count, delta_lines in delta:

516

lines[offset+start:offset+end] = delta_lines

517

offset = offset + (start - end) + count

518

self._version_id = new_version_id

519

520

def copy(self):

521

return PlainKnitContent(self._lines[:], self._version_id)

522

523

def text(self):

524

lines = self._lines

525

if self._should_strip_eol:

526

lines = lines[:]

527

lines[-1] = lines[-1].rstrip('\n')

528

return lines

529

530

531

class _KnitFactory(object):

532

"""Base class for common Factory functions."""

533

534

def parse_record(self, version_id, record, record_details,

535

base_content, copy_base_content=True):

536

"""Parse a record into a full content object.

537

538

:param version_id: The official version id for this content

539

:param record: The data returned by read_records_iter()

540

:param record_details: Details about the record returned by

541

get_build_details

542

:param base_content: If get_build_details returns a compression_parent,

543

you must return a base_content here, else use None

544

:param copy_base_content: When building from the base_content, decide

545

you can either copy it and return a new object, or modify it in

546

place.

547

:return: (content, delta) A Content object and possibly a line-delta,

548

delta may be None

549

"""

550

method, noeol = record_details

551

if method == 'line-delta':

552

if copy_base_content:

553

content = base_content.copy()

554

else:

555

content = base_content

556

delta = self.parse_line_delta(record, version_id)

557

content.apply_delta(delta, version_id)

558

else:

559

content = self.parse_fulltext(record, version_id)

560

delta = None

561

content._should_strip_eol = noeol

562

return (content, delta)

563

564

565

class KnitAnnotateFactory(_KnitFactory):

566

"""Factory for creating annotated Content objects."""

567

568

annotated = True

569

570

def make(self, lines, version_id):

571

num_lines = len(lines)

572

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

573

574

def parse_fulltext(self, content, version_id):

575

"""Convert fulltext to internal representation

576

577

fulltext content is of the format

578

revid(utf8) plaintext\n

579

internal representation is of the format:

580

(revid, plaintext)

581

"""

582

# TODO: jam 20070209 The tests expect this to be returned as tuples,

583

# but the code itself doesn't really depend on that.

584

# Figure out a way to not require the overhead of turning the

585

# list back into tuples.

586

lines = [tuple(line.split(' ', 1)) for line in content]

587

return AnnotatedKnitContent(lines)

588

589

def parse_line_delta_iter(self, lines):

590

return iter(self.parse_line_delta(lines))

591

592

def parse_line_delta(self, lines, version_id, plain=False):

593

"""Convert a line based delta into internal representation.

594

595

line delta is in the form of:

596

intstart intend intcount

597

1..count lines:

598

revid(utf8) newline\n

599

internal representation is

600

(start, end, count, [1..count tuples (revid, newline)])

601

602

:param plain: If True, the lines are returned as a plain

603

list without annotations, not as a list of (origin, content) tuples, i.e.

604

(start, end, count, [1..count newline])

605

"""

606

result = []

607

lines = iter(lines)

608

next = lines.next

609

610

cache = {}

611

def cache_and_return(line):

612

origin, text = line.split(' ', 1)

613

return cache.setdefault(origin, origin), text

614

615

# walk through the lines parsing.

616

# Note that the plain test is explicitly pulled out of the

617

# loop to minimise any performance impact

618

if plain:

619

for header in lines:

620

start, end, count = [int(n) for n in header.split(',')]

621

contents = [next().split(' ', 1)[1] for i in xrange(count)]

622

result.append((start, end, count, contents))

623

else:

624

for header in lines:

625

start, end, count = [int(n) for n in header.split(',')]

626

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

627

result.append((start, end, count, contents))

628

return result

629

630

def get_fulltext_content(self, lines):

631

"""Extract just the content lines from a fulltext."""

632

return (line.split(' ', 1)[1] for line in lines)

633

634

def get_linedelta_content(self, lines):

635

"""Extract just the content from a line delta.

636

637

This doesn't return all of the extra information stored in a delta.

638

Only the actual content lines.

639

"""

640

lines = iter(lines)

641

next = lines.next

642

for header in lines:

643

header = header.split(',')

644

count = int(header[2])

645

for i in xrange(count):

646

origin, text = next().split(' ', 1)

647

yield text

648

649

def lower_fulltext(self, content):

650

"""convert a fulltext content record into a serializable form.

651

652

see parse_fulltext which this inverts.

653

"""

654

# TODO: jam 20070209 We only do the caching thing to make sure that

655

# the origin is a valid utf-8 line, eventually we could remove it

656

return ['%s %s' % (o, t) for o, t in content._lines]

657

658

def lower_line_delta(self, delta):

659

"""convert a delta into a serializable form.

660

661

See parse_line_delta which this inverts.

662

"""

663

# TODO: jam 20070209 We only do the caching thing to make sure that

664

# the origin is a valid utf-8 line, eventually we could remove it

665

out = []

666

for start, end, c, lines in delta:

667

out.append('%d,%d,%d\n' % (start, end, c))

668

out.extend(origin + ' ' + text

669

for origin, text in lines)

670

return out

671

672

def annotate(self, knit, key):

673

content = knit._get_content(key)

674

# adjust for the fact that serialised annotations are only key suffixes

675

# for this factory.

676

if type(key) == tuple:

677

prefix = key[:-1]

678

origins = content.annotate()

679

result = []

680

for origin, line in origins:

681

result.append((prefix + (origin,), line))

682

return result

683

else:

684

# XXX: This smells a bit. Why would key ever be a non-tuple here?

685

# Aren't keys defined to be tuples? -- spiv 20080618

686

return content.annotate()

687

688

689

class KnitPlainFactory(_KnitFactory):

690

"""Factory for creating plain Content objects."""

691

692

annotated = False

693

694

def make(self, lines, version_id):

695

return PlainKnitContent(lines, version_id)

696

697

def parse_fulltext(self, content, version_id):

698

"""This parses an unannotated fulltext.

699

700

Note that this is not a noop - the internal representation

701

has (versionid, line) - its just a constant versionid.

702

"""

703

return self.make(content, version_id)

704

705

def parse_line_delta_iter(self, lines, version_id):

706

cur = 0

707

num_lines = len(lines)

708

while cur < num_lines:

709

header = lines[cur]

710

cur += 1

711

start, end, c = [int(n) for n in header.split(',')]

712

yield start, end, c, lines[cur:cur+c]

713

cur += c

714

715

def parse_line_delta(self, lines, version_id):

716

return list(self.parse_line_delta_iter(lines, version_id))

717

718

def get_fulltext_content(self, lines):

719

"""Extract just the content lines from a fulltext."""

720

return iter(lines)

721

722

def get_linedelta_content(self, lines):

723

"""Extract just the content from a line delta.

724

725

This doesn't return all of the extra information stored in a delta.

726

Only the actual content lines.

727

"""

728

lines = iter(lines)

729

next = lines.next

730

for header in lines:

731

header = header.split(',')

732

count = int(header[2])

733

for i in xrange(count):

734

yield next()

735

736

def lower_fulltext(self, content):

737

return content.text()

738

739

def lower_line_delta(self, delta):

740

out = []

741

for start, end, c, lines in delta:

742

out.append('%d,%d,%d\n' % (start, end, c))

743

out.extend(lines)

744

return out

745

746

def annotate(self, knit, key):

747

annotator = _KnitAnnotator(knit)

748

return annotator.annotate(key)

749

750

751

752

def make_file_factory(annotated, mapper):

753

"""Create a factory for creating a file based KnitVersionedFiles.

754

755

This is only functional enough to run interface tests, it doesn't try to

756

provide a full pack environment.

757

758

:param annotated: knit annotations are wanted.

759

:param mapper: The mapper from keys to paths.

760

"""

761

def factory(transport):

762

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

763

access = _KnitKeyAccess(transport, mapper)

764

return KnitVersionedFiles(index, access, annotated=annotated)

765

return factory

766

767

768

def make_pack_factory(graph, delta, keylength):

769

"""Create a factory for creating a pack based VersionedFiles.

770

771

This is only functional enough to run interface tests, it doesn't try to

772

provide a full pack environment.

773

774

:param graph: Store a graph.

775

:param delta: Delta compress contents.

776

:param keylength: How long should keys be.

777

"""

778

def factory(transport):

779

parents = graph or delta

780

ref_length = 0

781

if graph:

782

ref_length += 1

783

if delta:

784

ref_length += 1

785

max_delta_chain = 200

786

else:

787

max_delta_chain = 0

788

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

789

key_elements=keylength)

790

stream = transport.open_write_stream('newpack')

791

writer = pack.ContainerWriter(stream.write)

792

writer.begin()

793

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

794

deltas=delta, add_callback=graph_index.add_nodes)

795

access = _DirectPackAccess({})

796

access.set_writer(writer, graph_index, (transport, 'newpack'))

797

result = KnitVersionedFiles(index, access,

798

max_delta_chain=max_delta_chain)

799

result.stream = stream

800

result.writer = writer

801

return result

802

return factory

803

804

805

def cleanup_pack_knit(versioned_files):

806

versioned_files.stream.close()

807

versioned_files.writer.end()

808

809

810

def _get_total_build_size(self, keys, positions):

811

"""Determine the total bytes to build these keys.

812

813

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

814

don't inherit from a common base.)

815

816

:param keys: Keys that we want to build

817

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

818

as returned by _get_components_positions)

819

:return: Number of bytes to build those keys

820

"""

821

all_build_index_memos = {}

822

build_keys = keys

823

while build_keys:

824

next_keys = set()

825

for key in build_keys:

826

# This is mostly for the 'stacked' case

827

# Where we will be getting the data from a fallback

828

if key not in positions:

829

continue

830

_, index_memo, compression_parent = positions[key]

831

all_build_index_memos[key] = index_memo

832

if compression_parent not in all_build_index_memos:

833

next_keys.add(compression_parent)

834

build_keys = next_keys

835

return sum([index_memo[2] for index_memo

836

in all_build_index_memos.itervalues()])

837

838

839

class KnitVersionedFiles(VersionedFiles):

840

"""Storage for many versioned files using knit compression.

841

842

Backend storage is managed by indices and data objects.

843

844

:ivar _index: A _KnitGraphIndex or similar that can describe the

845

parents, graph, compression and data location of entries in this

846

KnitVersionedFiles. Note that this is only the index for

847

*this* vfs; if there are fallbacks they must be queried separately.

848

"""

849

850

def __init__(self, index, data_access, max_delta_chain=200,

851

annotated=False, reload_func=None):

852

"""Create a KnitVersionedFiles with index and data_access.

853

854

:param index: The index for the knit data.

855

:param data_access: The access object to store and retrieve knit

856

records.

857

:param max_delta_chain: The maximum number of deltas to permit during

858

insertion. Set to 0 to prohibit the use of deltas.

859

:param annotated: Set to True to cause annotations to be calculated and

860

stored during insertion.

861

:param reload_func: An function that can be called if we think we need

862

to reload the pack listing and try again. See

863

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

864

"""

865

self._index = index

866

self._access = data_access

867

self._max_delta_chain = max_delta_chain

868

if annotated:

869

self._factory = KnitAnnotateFactory()

870

else:

871

self._factory = KnitPlainFactory()

872

self._fallback_vfs = []

873

self._reload_func = reload_func

874

875

def __repr__(self):

876

return "%s(%r, %r)" % (

877

self.__class__.__name__,

878

self._index,

879

self._access)

880

881

def add_fallback_versioned_files(self, a_versioned_files):

882

"""Add a source of texts for texts not present in this knit.

883

884

:param a_versioned_files: A VersionedFiles object.

885

"""

886

self._fallback_vfs.append(a_versioned_files)

887

888

def add_lines(self, key, parents, lines, parent_texts=None,

889

left_matching_blocks=None, nostore_sha=None, random_id=False,

890

check_content=True):

891

"""See VersionedFiles.add_lines()."""

892

self._index._check_write_ok()

893

self._check_add(key, lines, random_id, check_content)

894

if parents is None:

895

# The caller might pass None if there is no graph data, but kndx

896

# indexes can't directly store that, so we give them

897

# an empty tuple instead.

898

parents = ()

899

return self._add(key, lines, parents,

900

parent_texts, left_matching_blocks, nostore_sha, random_id)

901

902

def _add(self, key, lines, parents, parent_texts,

903

left_matching_blocks, nostore_sha, random_id):

904

"""Add a set of lines on top of version specified by parents.

905

906

Any versions not present will be converted into ghosts.

907

"""

908

# first thing, if the content is something we don't need to store, find

909

# that out.

910

line_bytes = ''.join(lines)

911

digest = sha_string(line_bytes)

912

if nostore_sha == digest:

913

raise errors.ExistingContent

914

915

present_parents = []

916

if parent_texts is None:

917

parent_texts = {}

918

# Do a single query to ascertain parent presence; we only compress

919

# against parents in the same kvf.

920

present_parent_map = self._index.get_parent_map(parents)

921

for parent in parents:

922

if parent in present_parent_map:

923

present_parents.append(parent)

924

925

# Currently we can only compress against the left most present parent.

926

if (len(present_parents) == 0 or

927

present_parents[0] != parents[0]):

928

delta = False

929

else:

930

# To speed the extract of texts the delta chain is limited

931

# to a fixed number of deltas. This should minimize both

932

# I/O and the time spend applying deltas.

933

delta = self._check_should_delta(present_parents[0])

934

935

text_length = len(line_bytes)

936

options = []

937

if lines:

938

if lines[-1][-1] != '\n':

939

# copy the contents of lines.

940

lines = lines[:]

941

options.append('no-eol')

942

lines[-1] = lines[-1] + '\n'

943

line_bytes += '\n'

944

945

for element in key:

946

if type(element) != str:

947

raise TypeError("key contains non-strings: %r" % (key,))

948

# Knit hunks are still last-element only

949

version_id = key[-1]

950

content = self._factory.make(lines, version_id)

951

if 'no-eol' in options:

952

# Hint to the content object that its text() call should strip the

953

# EOL.

954

content._should_strip_eol = True

955

if delta or (self._factory.annotated and len(present_parents) > 0):

956

# Merge annotations from parent texts if needed.

957

delta_hunks = self._merge_annotations(content, present_parents,

958

parent_texts, delta, self._factory.annotated,

959

left_matching_blocks)

960

961

if delta:

962

options.append('line-delta')

963

store_lines = self._factory.lower_line_delta(delta_hunks)

964

size, bytes = self._record_to_data(key, digest,

965

store_lines)

966

else:

967

options.append('fulltext')

968

# isinstance is slower and we have no hierarchy.

969

if self._factory.__class__ == KnitPlainFactory:

970

# Use the already joined bytes saving iteration time in

971

# _record_to_data.

972

size, bytes = self._record_to_data(key, digest,

973

lines, [line_bytes])

974

else:

975

# get mixed annotation + content and feed it into the

976

# serialiser.

977

store_lines = self._factory.lower_fulltext(content)

978

size, bytes = self._record_to_data(key, digest,

979

store_lines)

980

981

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

982

self._index.add_records(

983

((key, options, access_memo, parents),),

984

random_id=random_id)

985

return digest, text_length, content

986

987

def annotate(self, key):

988

"""See VersionedFiles.annotate."""

989

return self._factory.annotate(self, key)

990

991

def check(self, progress_bar=None):

992

"""See VersionedFiles.check()."""

993

# This doesn't actually test extraction of everything, but that will

994

# impact 'bzr check' substantially, and needs to be integrated with

995

# care. However, it does check for the obvious problem of a delta with

996

# no basis.

997

keys = self._index.keys()

998

parent_map = self.get_parent_map(keys)

999

for key in keys:

1000

if self._index.get_method(key) != 'fulltext':

1001

compression_parent = parent_map[key][0]

1002

if compression_parent not in parent_map:

1003

raise errors.KnitCorrupt(self,

1004

"Missing basis parent %s for %s" % (

1005

compression_parent, key))

1006

for fallback_vfs in self._fallback_vfs:

1007

fallback_vfs.check()

1008

1009

def _check_add(self, key, lines, random_id, check_content):

1010

"""check that version_id and lines are safe to add."""

1011

version_id = key[-1]

1012

if contains_whitespace(version_id):

1013

raise InvalidRevisionId(version_id, self)

1014

self.check_not_reserved_id(version_id)

1015

# TODO: If random_id==False and the key is already present, we should

1016

# probably check that the existing content is identical to what is

1017

# being inserted, and otherwise raise an exception. This would make

1018

# the bundle code simpler.

1019

if check_content:

1020

self._check_lines_not_unicode(lines)

1021

self._check_lines_are_lines(lines)

1022

1023

def _check_header(self, key, line):

1024

rec = self._split_header(line)

1025

self._check_header_version(rec, key[-1])

1026

return rec

1027

1028

def _check_header_version(self, rec, version_id):

1029

"""Checks the header version on original format knit records.

1030

1031

These have the last component of the key embedded in the record.

1032

"""

1033

if rec[1] != version_id:

1034

raise KnitCorrupt(self,

1035

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

1036

1037

def _check_should_delta(self, parent):

1038

"""Iterate back through the parent listing, looking for a fulltext.

1039

1040

This is used when we want to decide whether to add a delta or a new

1041

fulltext. It searches for _max_delta_chain parents. When it finds a

1042

fulltext parent, it sees if the total size of the deltas leading up to

1043

it is large enough to indicate that we want a new full text anyway.

1044

1045

Return True if we should create a new delta, False if we should use a

1046

full text.

1047

"""

1048

delta_size = 0

1049

fulltext_size = None

1050

for count in xrange(self._max_delta_chain):

1051

try:

1052

# Note that this only looks in the index of this particular

1053

# KnitVersionedFiles, not in the fallbacks. This ensures that

1054

# we won't store a delta spanning physical repository

1055

# boundaries.

1056

build_details = self._index.get_build_details([parent])

1057

parent_details = build_details[parent]

1058

except (RevisionNotPresent, KeyError), e:

1059

# Some basis is not locally present: always fulltext

1060

return False

1061

index_memo, compression_parent, _, _ = parent_details

1062

_, _, size = index_memo

1063

if compression_parent is None:

1064

fulltext_size = size

1065

break

1066

delta_size += size

1067

# We don't explicitly check for presence because this is in an

1068

# inner loop, and if it's missing it'll fail anyhow.

1069

parent = compression_parent

1070

else:

1071

# We couldn't find a fulltext, so we must create a new one

1072

return False

1073

# Simple heuristic - if the total I/O wold be greater as a delta than

1074

# the originally installed fulltext, we create a new fulltext.

1075

return fulltext_size > delta_size

1076

1077

def _build_details_to_components(self, build_details):

1078

"""Convert a build_details tuple to a position tuple."""

1079

# record_details, access_memo, compression_parent

1080

return build_details[3], build_details[0], build_details[1]

1081

1082

def _get_components_positions(self, keys, allow_missing=False):

1083

"""Produce a map of position data for the components of keys.

1084

1085

This data is intended to be used for retrieving the knit records.

1086

1087

A dict of key to (record_details, index_memo, next, parents) is

1088

returned.

1089

method is the way referenced data should be applied.

1090

index_memo is the handle to pass to the data access to actually get the

1091

data

1092

next is the build-parent of the version, or None for fulltexts.

1093

parents is the version_ids of the parents of this version

1094

1095

:param allow_missing: If True do not raise an error on a missing component,

1096

just ignore it.

1097

"""

1098

component_data = {}

1099

pending_components = keys

1100

while pending_components:

1101

build_details = self._index.get_build_details(pending_components)

1102

current_components = set(pending_components)

1103

pending_components = set()

1104

for key, details in build_details.iteritems():

1105

(index_memo, compression_parent, parents,

1106

record_details) = details

1107

method = record_details[0]

1108

if compression_parent is not None:

1109

pending_components.add(compression_parent)

1110

component_data[key] = self._build_details_to_components(details)

1111

missing = current_components.difference(build_details)

1112

if missing and not allow_missing:

1113

raise errors.RevisionNotPresent(missing.pop(), self)

1114

return component_data

1115

1116

def _get_content(self, key, parent_texts={}):

1117

"""Returns a content object that makes up the specified

1118

version."""

1119

cached_version = parent_texts.get(key, None)

1120

if cached_version is not None:

1121

# Ensure the cache dict is valid.

1122

if not self.get_parent_map([key]):

1123

raise RevisionNotPresent(key, self)

1124

return cached_version

1125

generator = _VFContentMapGenerator(self, [key])

1126

return generator._get_content(key)

1127

1128

def get_parent_map(self, keys):

1129

"""Get a map of the graph parents of keys.

1130

1131

:param keys: The keys to look up parents for.

1132

:return: A mapping from keys to parents. Absent keys are absent from

1133

the mapping.

1134

"""

1135

return self._get_parent_map_with_sources(keys)[0]

1136

1137

def _get_parent_map_with_sources(self, keys):

1138

"""Get a map of the parents of keys.

1139

1140

:param keys: The keys to look up parents for.

1141

:return: A tuple. The first element is a mapping from keys to parents.

1142

Absent keys are absent from the mapping. The second element is a

1143

list with the locations each key was found in. The first element

1144

is the in-this-knit parents, the second the first fallback source,

1145

and so on.

1146

"""

1147

result = {}

1148

sources = [self._index] + self._fallback_vfs

1149

source_results = []

1150

missing = set(keys)

1151

for source in sources:

1152

if not missing:

1153

break

1154

new_result = source.get_parent_map(missing)

1155

source_results.append(new_result)

1156

result.update(new_result)

1157

missing.difference_update(set(new_result))

1158

return result, source_results

1159

1160

def _get_record_map(self, keys, allow_missing=False):

1161

"""Produce a dictionary of knit records.

1162

1163

:return: {key:(record, record_details, digest, next)}

1164

record

1165

data returned from read_records (a KnitContentobject)

1166

record_details

1167

opaque information to pass to parse_record

1168

digest

1169

SHA1 digest of the full text after all steps are done

1170

1171

build-parent of the version, i.e. the leftmost ancestor.

1172

Will be None if the record is not a delta.

1173

:param keys: The keys to build a map for

1174

:param allow_missing: If some records are missing, rather than

1175

error, just return the data that could be generated.

1176

"""

1177

raw_map = self._get_record_map_unparsed(keys,

1178

allow_missing=allow_missing)

1179

return self._raw_map_to_record_map(raw_map)

1180

1181

def _raw_map_to_record_map(self, raw_map):

1182

"""Parse the contents of _get_record_map_unparsed.

1183

1184

:return: see _get_record_map.

1185

"""

1186

result = {}

1187

for key in raw_map:

1188

data, record_details, next = raw_map[key]

1189

content, digest = self._parse_record(key[-1], data)

1190

result[key] = content, record_details, digest, next

1191

return result

1192

1193

def _get_record_map_unparsed(self, keys, allow_missing=False):

1194

"""Get the raw data for reconstructing keys without parsing it.

1195

1196

:return: A dict suitable for parsing via _raw_map_to_record_map.

1197

key-> raw_bytes, (method, noeol), compression_parent

1198

"""

1199

# This retries the whole request if anything fails. Potentially we

1200

# could be a bit more selective. We could track the keys whose records

1201

# we have successfully found, and then only request the new records

1202

# from there. However, _get_components_positions grabs the whole build

1203

# chain, which means we'll likely try to grab the same records again

1204

# anyway. Also, can the build chains change as part of a pack

1205

# operation? We wouldn't want to end up with a broken chain.

1206

while True:

1207

try:

1208

position_map = self._get_components_positions(keys,

1209

allow_missing=allow_missing)

1210

# key = component_id, r = record_details, i_m = index_memo,

1211

# n = next

1212

records = [(key, i_m) for key, (r, i_m, n)

1213

in position_map.iteritems()]

1214

# Sort by the index memo, so that we request records from the

1215

# same pack file together, and in forward-sorted order

1216

records.sort(key=operator.itemgetter(1))

1217

raw_record_map = {}

1218

for key, data in self._read_records_iter_unchecked(records):

1219

(record_details, index_memo, next) = position_map[key]

1220

raw_record_map[key] = data, record_details, next

1221

return raw_record_map

1222

except errors.RetryWithNewPacks, e:

1223

self._access.reload_or_raise(e)

1224

1225

@classmethod

1226

def _split_by_prefix(cls, keys):

1227

"""For the given keys, split them up based on their prefix.

1228

1229

To keep memory pressure somewhat under control, split the

1230

requests back into per-file-id requests, otherwise "bzr co"

1231

extracts the full tree into memory before writing it to disk.

1232

This should be revisited if _get_content_maps() can ever cross

1233

file-id boundaries.

1234

1235

The keys for a given file_id are kept in the same relative order.

1236

Ordering between file_ids is not, though prefix_order will return the

1237

order that the key was first seen.

1238

1239

:param keys: An iterable of key tuples

1240

:return: (split_map, prefix_order)

1241

split_map A dictionary mapping prefix => keys

1242

prefix_order The order that we saw the various prefixes

1243

"""

1244

split_by_prefix = {}

1245

prefix_order = []

1246

for key in keys:

1247

if len(key) == 1:

1248

prefix = ''

1249

else:

1250

prefix = key[0]

1251

1252

if prefix in split_by_prefix:

1253

split_by_prefix[prefix].append(key)

1254

else:

1255

split_by_prefix[prefix] = [key]

1256

prefix_order.append(prefix)

1257

return split_by_prefix, prefix_order

1258

1259

def _group_keys_for_io(self, keys, non_local_keys, positions):

1260

"""For the given keys, group them into 'best-sized' requests.

1261

1262

The idea is to avoid making 1 request per file, but to never try to

1263

unpack an entire 1.5GB source tree in a single pass. Also when

1264

possible, we should try to group requests to the same pack file

1265

together.

1266

1267

:return: yield (keys, non_local) tuples that indicate what keys should

1268

be fetched next.

1269

"""

1270

# TODO: Ideally we would group on 2 factors. We want to extract texts

1271

# from the same pack file together, and we want to extract all

1272

# the texts for a given build-chain together. Ultimately it

1273

# probably needs a better global view.

1274

total_keys = len(keys)

1275

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1276

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1277

cur_keys = []

1278

cur_non_local = set()

1279

cur_size = 0

1280

result = []

1281

sizes = []

1282

for prefix in prefix_order:

1283

keys = prefix_split_keys[prefix]

1284

non_local = prefix_split_non_local_keys.get(prefix, [])

1285

1286

this_size = self._index._get_total_build_size(keys, positions)

1287

cur_size += this_size

1288

cur_keys.extend(keys)

1289

cur_non_local.update(non_local)

1290

if cur_size > _STREAM_MIN_BUFFER_SIZE:

1291

result.append((cur_keys, cur_non_local))

1292

sizes.append(cur_size)

1293

cur_keys = []

1294

cur_non_local = set()

1295

cur_size = 0

1296

if cur_keys:

1297

result.append((cur_keys, cur_non_local))

1298

sizes.append(cur_size)

1299

trace.mutter('Collapsed %d keys into %d requests w/ %d file_ids'

1300

' w/ sizes: %s', total_keys, len(result),

1301

len(prefix_split_keys), sizes)

1302

return result

1303

1304

def get_record_stream(self, keys, ordering, include_delta_closure):

1305

"""Get a stream of records for keys.

1306

1307

:param keys: The keys to include.

1308

:param ordering: Either 'unordered' or 'topological'. A topologically

1309

sorted stream has compression parents strictly before their

1310

children.

1311

:param include_delta_closure: If True then the closure across any

1312

compression parents will be included (in the opaque data).

1313

:return: An iterator of ContentFactory objects, each of which is only

1314

valid until the iterator is advanced.

1315

"""

1316

# keys might be a generator

1317

keys = set(keys)

1318

if not keys:

1319

return

1320

if not self._index.has_graph:

1321

# Cannot topological order when no graph has been stored.

1322

ordering = 'unordered'

1323

1324

remaining_keys = keys

1325

while True:

1326

try:

1327

keys = set(remaining_keys)

1328

for content_factory in self._get_remaining_record_stream(keys,

1329

ordering, include_delta_closure):

1330

remaining_keys.discard(content_factory.key)

1331

yield content_factory

1332

return

1333

except errors.RetryWithNewPacks, e:

1334

self._access.reload_or_raise(e)

1335

1336

def _get_remaining_record_stream(self, keys, ordering,

1337

include_delta_closure):

1338

"""This function is the 'retry' portion for get_record_stream."""

1339

if include_delta_closure:

1340

positions = self._get_components_positions(keys, allow_missing=True)

1341

else:

1342

build_details = self._index.get_build_details(keys)

1343

# map from key to

1344

# (record_details, access_memo, compression_parent_key)

1345

positions = dict((key, self._build_details_to_components(details))

1346

for key, details in build_details.iteritems())

1347

absent_keys = keys.difference(set(positions))

1348

# There may be more absent keys : if we're missing the basis component

1349

# and are trying to include the delta closure.

1350

# XXX: We should not ever need to examine remote sources because we do

1351

# not permit deltas across versioned files boundaries.

1352

if include_delta_closure:

1353

needed_from_fallback = set()

1354

# Build up reconstructable_keys dict. key:True in this dict means

1355

# the key can be reconstructed.

1356

reconstructable_keys = {}

1357

for key in keys:

1358

# the delta chain

1359

try:

1360

chain = [key, positions[key][2]]

1361

except KeyError:

1362

needed_from_fallback.add(key)

1363

continue

1364

result = True

1365

while chain[-1] is not None:

1366

if chain[-1] in reconstructable_keys:

1367

result = reconstructable_keys[chain[-1]]

1368

break

1369

else:

1370

try:

1371

chain.append(positions[chain[-1]][2])

1372

except KeyError:

1373

# missing basis component

1374

needed_from_fallback.add(chain[-1])

1375

result = True

1376

break

1377

for chain_key in chain[:-1]:

1378

reconstructable_keys[chain_key] = result

1379

if not result:

1380

needed_from_fallback.add(key)

1381

# Double index lookups here : need a unified api ?

1382

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1383

if ordering == 'topological':

1384

# Global topological sort

1385

present_keys = tsort.topo_sort(global_map)

1386

# Now group by source:

1387

source_keys = []

1388

current_source = None

1389

for key in present_keys:

1390

for parent_map in parent_maps:

1391

if key in parent_map:

1392

key_source = parent_map

1393

break

1394

if current_source is not key_source:

1395

source_keys.append((key_source, []))

1396

current_source = key_source

1397

source_keys[-1][1].append(key)

1398

else:

1399

if ordering != 'unordered':

1400

raise AssertionError('valid values for ordering are:'

1401

' "unordered" or "topological" not: %r'

1402

% (ordering,))

1403

# Just group by source; remote sources first.

1404

present_keys = []

1405

source_keys = []

1406

for parent_map in reversed(parent_maps):

1407

source_keys.append((parent_map, []))

1408

for key in parent_map:

1409

present_keys.append(key)

1410

source_keys[-1][1].append(key)

1411

# We have been requested to return these records in an order that

1412

# suits us. So we ask the index to give us an optimally sorted

1413

# order.

1414

for source, sub_keys in source_keys:

1415

if source is parent_maps[0]:

1416

# Only sort the keys for this VF

1417

self._index._sort_keys_by_io(sub_keys, positions)

1418

absent_keys = keys - set(global_map)

1419

for key in absent_keys:

1420

yield AbsentContentFactory(key)

1421

# restrict our view to the keys we can answer.

1422

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1423

# XXX: At that point we need to consider the impact of double reads by

1424

# utilising components multiple times.

1425

if include_delta_closure:

1426

# XXX: get_content_maps performs its own index queries; allow state

1427

# to be passed in.

1428

non_local_keys = needed_from_fallback - absent_keys

1429

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1430

non_local_keys,

1431

positions):

1432

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1433

global_map)

1434

for record in generator.get_record_stream():

1435

yield record

1436

else:

1437

for source, keys in source_keys:

1438

if source is parent_maps[0]:

1439

# this KnitVersionedFiles

1440

records = [(key, positions[key][1]) for key in keys]

1441

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1442

(record_details, index_memo, _) = positions[key]

1443

yield KnitContentFactory(key, global_map[key],

1444

record_details, sha1, raw_data, self._factory.annotated, None)

1445

else:

1446

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1447

for record in vf.get_record_stream(keys, ordering,

1448

include_delta_closure):

1449

yield record

1450

1451

def get_sha1s(self, keys):

1452

"""See VersionedFiles.get_sha1s()."""

1453

missing = set(keys)

1454

record_map = self._get_record_map(missing, allow_missing=True)

1455

result = {}

1456

for key, details in record_map.iteritems():

1457

if key not in missing:

1458

continue

1459

# record entry 2 is the 'digest'.

1460

result[key] = details[2]

1461

missing.difference_update(set(result))

1462

for source in self._fallback_vfs:

1463

if not missing:

1464

break

1465

new_result = source.get_sha1s(missing)

1466

result.update(new_result)

1467

missing.difference_update(set(new_result))

1468

return result

1469

1470

def insert_record_stream(self, stream):

1471

"""Insert a record stream into this container.

1472

1473

:param stream: A stream of records to insert.

1474

:return: None

1475

:seealso VersionedFiles.get_record_stream:

1476

"""

1477

def get_adapter(adapter_key):

1478

try:

1479

return adapters[adapter_key]

1480

except KeyError:

1481

adapter_factory = adapter_registry.get(adapter_key)

1482

adapter = adapter_factory(self)

1483

adapters[adapter_key] = adapter

1484

return adapter

1485

delta_types = set()

1486

if self._factory.annotated:

1487

# self is annotated, we need annotated knits to use directly.

1488

annotated = "annotated-"

1489

convertibles = []

1490

else:

1491

# self is not annotated, but we can strip annotations cheaply.

1492

annotated = ""

1493

convertibles = set(["knit-annotated-ft-gz"])

1494

if self._max_delta_chain:

1495

delta_types.add("knit-annotated-delta-gz")

1496

convertibles.add("knit-annotated-delta-gz")

1497

# The set of types we can cheaply adapt without needing basis texts.

1498

native_types = set()

1499

if self._max_delta_chain:

1500

native_types.add("knit-%sdelta-gz" % annotated)

1501

delta_types.add("knit-%sdelta-gz" % annotated)

1502

native_types.add("knit-%sft-gz" % annotated)

1503

knit_types = native_types.union(convertibles)

1504

adapters = {}

1505

# Buffer all index entries that we can't add immediately because their

1506

# basis parent is missing. We don't buffer all because generating

1507

# annotations may require access to some of the new records. However we

1508

# can't generate annotations from new deltas until their basis parent

1509

# is present anyway, so we get away with not needing an index that

1510

# includes the new keys.

1511

1512

# See <http://launchpad.net/bugs/300177> about ordering of compression

1513

# parents in the records - to be conservative, we insist that all

1514

# parents must be present to avoid expanding to a fulltext.

1515

1516

# key = basis_parent, value = index entry to add

1517

buffered_index_entries = {}

1518

for record in stream:

1519

parents = record.parents

1520

if record.storage_kind in delta_types:

1521

# TODO: eventually the record itself should track

1522

# compression_parent

1523

compression_parent = parents[0]

1524

else:

1525

compression_parent = None

1526

# Raise an error when a record is missing.

1527

if record.storage_kind == 'absent':

1528

raise RevisionNotPresent([record.key], self)

1529

elif ((record.storage_kind in knit_types)

1530

and (compression_parent is None

1531

or not self._fallback_vfs

1532

or self._index.has_key(compression_parent)

1533

or not self.has_key(compression_parent))):

1534

# we can insert the knit record literally if either it has no

1535

# compression parent OR we already have its basis in this kvf

1536

# OR the basis is not present even in the fallbacks. In the

1537

# last case it will either turn up later in the stream and all

1538

# will be well, or it won't turn up at all and we'll raise an

1539

# error at the end.

1540

1541

# TODO: self.has_key is somewhat redundant with

1542

# self._index.has_key; we really want something that directly

1543

# asks if it's only present in the fallbacks. -- mbp 20081119

1544

if record.storage_kind not in native_types:

1545

try:

1546

adapter_key = (record.storage_kind, "knit-delta-gz")

1547

adapter = get_adapter(adapter_key)

1548

except KeyError:

1549

adapter_key = (record.storage_kind, "knit-ft-gz")

1550

adapter = get_adapter(adapter_key)

1551

bytes = adapter.get_bytes(record)

1552

else:

1553

# It's a knit record, it has a _raw_record field (even if

1554

# it was reconstituted from a network stream).

1555

bytes = record._raw_record

1556

options = [record._build_details[0]]

1557

if record._build_details[1]:

1558

options.append('no-eol')

1559

# Just blat it across.

1560

# Note: This does end up adding data on duplicate keys. As

1561

# modern repositories use atomic insertions this should not

1562

# lead to excessive growth in the event of interrupted fetches.

1563

# 'knit' repositories may suffer excessive growth, but as a

1564

# deprecated format this is tolerable. It can be fixed if

1565

# needed by in the kndx index support raising on a duplicate

1566

# add with identical parents and options.

1567

access_memo = self._access.add_raw_records(

1568

[(record.key, len(bytes))], bytes)[0]

1569

index_entry = (record.key, options, access_memo, parents)

1570

buffered = False

1571

if 'fulltext' not in options:

1572

# Not a fulltext, so we need to make sure the compression

1573

# parent will also be present.

1574

# Note that pack backed knits don't need to buffer here

1575

# because they buffer all writes to the transaction level,

1576

# but we don't expose that difference at the index level. If

1577

# the query here has sufficient cost to show up in

1578

# profiling we should do that.

1579

1580

# They're required to be physically in this

1581

# KnitVersionedFiles, not in a fallback.

1582

if not self._index.has_key(compression_parent):

1583

pending = buffered_index_entries.setdefault(

1584

compression_parent, [])

1585

pending.append(index_entry)

1586

buffered = True

1587

if not buffered:

1588

self._index.add_records([index_entry])

1589

elif record.storage_kind == 'chunked':

1590

self.add_lines(record.key, parents,

1591

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1592

else:

1593

# Not suitable for direct insertion as a

1594

# delta, either because it's not the right format, or this

1595

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1596

# 0) or because it depends on a base only present in the

1597

# fallback kvfs.

1598

try:

1599

# Try getting a fulltext directly from the record.

1600

bytes = record.get_bytes_as('fulltext')

1601

except errors.UnavailableRepresentation:

1602

adapter_key = record.storage_kind, 'fulltext'

1603

adapter = get_adapter(adapter_key)

1604

bytes = adapter.get_bytes(record)

1605

lines = split_lines(bytes)

1606

try:

1607

self.add_lines(record.key, parents, lines)

1608

except errors.RevisionAlreadyPresent:

1609

pass

1610

# Add any records whose basis parent is now available.

1611

added_keys = [record.key]

1612

while added_keys:

1613

key = added_keys.pop(0)

1614

if key in buffered_index_entries:

1615

index_entries = buffered_index_entries[key]

1616

self._index.add_records(index_entries)

1617

added_keys.extend(

1618

[index_entry[0] for index_entry in index_entries])

1619

del buffered_index_entries[key]

1620

if buffered_index_entries:

1621

# There were index entries buffered at the end of the stream,

1622

# So these need to be added (if the index supports holding such

1623

# entries for later insertion)

1624

for key in buffered_index_entries:

1625

index_entries = buffered_index_entries[key]

1626

self._index.add_records(index_entries,

1627

missing_compression_parents=True)

1628

1629

def get_missing_compression_parent_keys(self):

1630

"""Return an iterable of keys of missing compression parents.

1631

1632

Check this after calling insert_record_stream to find out if there are

1633

any missing compression parents. If there are, the records that

1634

depend on them are not able to be inserted safely. For atomic

1635

KnitVersionedFiles built on packs, the transaction should be aborted or

1636

suspended - commit will fail at this point. Nonatomic knits will error

1637

earlier because they have no staging area to put pending entries into.

1638

"""

1639

return self._index.get_missing_compression_parents()

1640

1641

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1642

"""Iterate over the lines in the versioned files from keys.

1643

1644

This may return lines from other keys. Each item the returned

1645

iterator yields is a tuple of a line and a text version that that line

1646

is present in (not introduced in).

1647

1648

Ordering of results is in whatever order is most suitable for the

1649

underlying storage format.

1650

1651

If a progress bar is supplied, it may be used to indicate progress.

1652

The caller is responsible for cleaning up progress bars (because this

1653

is an iterator).

1654

1655

NOTES:

1656

* Lines are normalised by the underlying store: they will all have \\n

1657

terminators.

1658

* Lines are returned in arbitrary order.

1659

* If a requested key did not change any lines (or didn't have any

1660

lines), it may not be mentioned at all in the result.

1661

1662

:return: An iterator over (line, key).

1663

"""

1664

if pb is None:

1665

pb = progress.DummyProgress()

1666

keys = set(keys)

1667

total = len(keys)

1668

done = False

1669

while not done:

1670

try:

1671

# we don't care about inclusions, the caller cares.

1672

# but we need to setup a list of records to visit.

1673

# we need key, position, length

1674

key_records = []

1675

build_details = self._index.get_build_details(keys)

1676

for key, details in build_details.iteritems():

1677

if key in keys:

1678

key_records.append((key, details[0]))

1679

records_iter = enumerate(self._read_records_iter(key_records))

1680

for (key_idx, (key, data, sha_value)) in records_iter:

1681

pb.update('Walking content.', key_idx, total)

1682

compression_parent = build_details[key][1]

1683

if compression_parent is None:

1684

# fulltext

1685

line_iterator = self._factory.get_fulltext_content(data)

1686

else:

1687

# Delta

1688

line_iterator = self._factory.get_linedelta_content(data)

1689

# Now that we are yielding the data for this key, remove it

1690

# from the list

1691

keys.remove(key)

1692

# XXX: It might be more efficient to yield (key,

1693

# line_iterator) in the future. However for now, this is a

1694

# simpler change to integrate into the rest of the

1695

# codebase. RBC 20071110

1696

for line in line_iterator:

1697

yield line, key

1698

done = True

1699

except errors.RetryWithNewPacks, e:

1700

self._access.reload_or_raise(e)

1701

# If there are still keys we've not yet found, we look in the fallback

1702

# vfs, and hope to find them there. Note that if the keys are found

1703

# but had no changes or no content, the fallback may not return

1704

# anything.

1705

if keys and not self._fallback_vfs:

1706

# XXX: strictly the second parameter is meant to be the file id

1707

# but it's not easily accessible here.

1708

raise RevisionNotPresent(keys, repr(self))

1709

for source in self._fallback_vfs:

1710

if not keys:

1711

break

1712

source_keys = set()

1713

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1714

source_keys.add(key)

1715

yield line, key

1716

keys.difference_update(source_keys)

1717

pb.update('Walking content.', total, total)

1718

1719

def _make_line_delta(self, delta_seq, new_content):

1720

"""Generate a line delta from delta_seq and new_content."""

1721

diff_hunks = []

1722

for op in delta_seq.get_opcodes():

1723

if op[0] == 'equal':

1724

continue

1725

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1726

return diff_hunks

1727

1728

def _merge_annotations(self, content, parents, parent_texts={},

1729

delta=None, annotated=None,

1730

left_matching_blocks=None):

1731

"""Merge annotations for content and generate deltas.

1732

1733

This is done by comparing the annotations based on changes to the text

1734

and generating a delta on the resulting full texts. If annotations are

1735

not being created then a simple delta is created.

1736

"""

1737

if left_matching_blocks is not None:

1738

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1739

else:

1740

delta_seq = None

1741

if annotated:

1742

for parent_key in parents:

1743

merge_content = self._get_content(parent_key, parent_texts)

1744

if (parent_key == parents[0] and delta_seq is not None):

1745

seq = delta_seq

1746

else:

1747

seq = patiencediff.PatienceSequenceMatcher(

1748

None, merge_content.text(), content.text())

1749

for i, j, n in seq.get_matching_blocks():

1750

if n == 0:

1751

continue

1752

# this copies (origin, text) pairs across to the new

1753

# content for any line that matches the last-checked

1754

# parent.

1755

content._lines[j:j+n] = merge_content._lines[i:i+n]

1756

# XXX: Robert says the following block is a workaround for a

1757

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1758

if content._lines and content._lines[-1][1][-1] != '\n':

1759

# The copied annotation was from a line without a trailing EOL,

1760

# reinstate one for the content object, to ensure correct

1761

# serialization.

1762

line = content._lines[-1][1] + '\n'

1763

content._lines[-1] = (content._lines[-1][0], line)

1764

if delta:

1765

if delta_seq is None:

1766

reference_content = self._get_content(parents[0], parent_texts)

1767

new_texts = content.text()

1768

old_texts = reference_content.text()

1769

delta_seq = patiencediff.PatienceSequenceMatcher(

1770

None, old_texts, new_texts)

1771

return self._make_line_delta(delta_seq, content)

1772

1773

def _parse_record(self, version_id, data):

1774

"""Parse an original format knit record.

1775

1776

These have the last element of the key only present in the stored data.

1777

"""

1778

rec, record_contents = self._parse_record_unchecked(data)

1779

self._check_header_version(rec, version_id)

1780

return record_contents, rec[3]

1781

1782

def _parse_record_header(self, key, raw_data):

1783

"""Parse a record header for consistency.

1784

1785

:return: the header and the decompressor stream.

1786

as (stream, header_record)

1787

"""

1788

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1789

try:

1790

# Current serialise

1791

rec = self._check_header(key, df.readline())

1792

except Exception, e:

1793

raise KnitCorrupt(self,

1794

"While reading {%s} got %s(%s)"

1795

% (key, e.__class__.__name__, str(e)))

1796

return df, rec

1797

1798

def _parse_record_unchecked(self, data):

1799

# profiling notes:

1800

# 4168 calls in 2880 217 internal

1801

# 4168 calls to _parse_record_header in 2121

1802

# 4168 calls to readlines in 330

1803

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1804

try:

1805

record_contents = df.readlines()

1806

except Exception, e:

1807

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1808

(data, e.__class__.__name__, str(e)))

1809

header = record_contents.pop(0)

1810

rec = self._split_header(header)

1811

last_line = record_contents.pop()

1812

if len(record_contents) != int(rec[2]):

1813

raise KnitCorrupt(self,

1814

'incorrect number of lines %s != %s'

1815

' for version {%s} %s'

1816

% (len(record_contents), int(rec[2]),

1817

rec[1], record_contents))

1818

if last_line != 'end %s\n' % rec[1]:

1819

raise KnitCorrupt(self,

1820

'unexpected version end line %r, wanted %r'

1821

% (last_line, rec[1]))

1822

df.close()

1823

return rec, record_contents

1824

1825

def _read_records_iter(self, records):

1826

"""Read text records from data file and yield result.

1827

1828

The result will be returned in whatever is the fastest to read.

1829

Not by the order requested. Also, multiple requests for the same

1830

record will only yield 1 response.

1831

:param records: A list of (key, access_memo) entries

1832

:return: Yields (key, contents, digest) in the order

1833

read, not the order requested

1834

"""

1835

if not records:

1836

return

1837

1838

# XXX: This smells wrong, IO may not be getting ordered right.

1839

needed_records = sorted(set(records), key=operator.itemgetter(1))

1840

if not needed_records:

1841

return

1842

1843

# The transport optimizes the fetching as well

1844

# (ie, reads continuous ranges.)

1845

raw_data = self._access.get_raw_records(

1846

[index_memo for key, index_memo in needed_records])

1847

1848

for (key, index_memo), data in \

1849

izip(iter(needed_records), raw_data):

1850

content, digest = self._parse_record(key[-1], data)

1851

yield key, content, digest

1852

1853

def _read_records_iter_raw(self, records):

1854

"""Read text records from data file and yield raw data.

1855

1856

This unpacks enough of the text record to validate the id is

1857

as expected but thats all.

1858

1859

Each item the iterator yields is (key, bytes,

1860

expected_sha1_of_full_text).

1861

"""

1862

for key, data in self._read_records_iter_unchecked(records):

1863

# validate the header (note that we can only use the suffix in

1864

# current knit records).

1865

df, rec = self._parse_record_header(key, data)

1866

df.close()

1867

yield key, data, rec[3]

1868

1869

def _read_records_iter_unchecked(self, records):

1870

"""Read text records from data file and yield raw data.

1871

1872

No validation is done.

1873

1874

Yields tuples of (key, data).

1875

"""

1876

# setup an iterator of the external records:

1877

# uses readv so nice and fast we hope.

1878

if len(records):

1879

# grab the disk data needed.

1880

needed_offsets = [index_memo for key, index_memo

1881

in records]

1882

raw_records = self._access.get_raw_records(needed_offsets)

1883

1884

for key, index_memo in records:

1885

data = raw_records.next()

1886

yield key, data

1887

1888

def _record_to_data(self, key, digest, lines, dense_lines=None):

1889

"""Convert key, digest, lines into a raw data block.

1890

1891

:param key: The key of the record. Currently keys are always serialised

1892

using just the trailing component.

1893

:param dense_lines: The bytes of lines but in a denser form. For

1894

instance, if lines is a list of 1000 bytestrings each ending in \n,

1895

dense_lines may be a list with one line in it, containing all the

1896

1000's lines and their \n's. Using dense_lines if it is already

1897

known is a win because the string join to create bytes in this

1898

function spends less time resizing the final string.

1899

:return: (len, a StringIO instance with the raw data ready to read.)

1900

"""

1901

# Note: using a string copy here increases memory pressure with e.g.

1902

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1903

# when doing the initial commit of a mozilla tree. RBC 20070921

1904

bytes = ''.join(chain(

1905

["version %s %d %s\n" % (key[-1],

1906

len(lines),

1907

digest)],

1908

dense_lines or lines,

1909

["end %s\n" % key[-1]]))

1910

if type(bytes) != str:

1911

raise AssertionError(

1912

'data must be plain bytes was %s' % type(bytes))

1913

if lines and lines[-1][-1] != '\n':

1914

raise ValueError('corrupt lines value %r' % lines)

1915

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1916

return len(compressed_bytes), compressed_bytes

1917

1918

def _split_header(self, line):

1919

rec = line.split()

1920

if len(rec) != 4:

1921

raise KnitCorrupt(self,

1922

'unexpected number of elements in record header')

1923

return rec

1924

1925

def keys(self):

1926

"""See VersionedFiles.keys."""

1927

if 'evil' in debug.debug_flags:

1928

trace.mutter_callsite(2, "keys scales with size of history")

1929

sources = [self._index] + self._fallback_vfs

1930

result = set()

1931

for source in sources:

1932

result.update(source.keys())

1933

return result

1934

1935

1936

class _ContentMapGenerator(object):

1937

"""Generate texts or expose raw deltas for a set of texts."""

1938

1939

def _get_content(self, key):

1940

"""Get the content object for key."""

1941

# Note that _get_content is only called when the _ContentMapGenerator

1942

# has been constructed with just one key requested for reconstruction.

1943

if key in self.nonlocal_keys:

1944

record = self.get_record_stream().next()

1945

# Create a content object on the fly

1946

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1947

return PlainKnitContent(lines, record.key)

1948

else:

1949

# local keys we can ask for directly

1950

return self._get_one_work(key)

1951

1952

def get_record_stream(self):

1953

"""Get a record stream for the keys requested during __init__."""

1954

for record in self._work():

1955

yield record

1956

1957

def _work(self):

1958

"""Produce maps of text and KnitContents as dicts.

1959

1960

:return: (text_map, content_map) where text_map contains the texts for

1961

the requested versions and content_map contains the KnitContents.

1962

"""

1963

# NB: By definition we never need to read remote sources unless texts

1964

# are requested from them: we don't delta across stores - and we

1965

# explicitly do not want to to prevent data loss situations.

1966

if self.global_map is None:

1967

self.global_map = self.vf.get_parent_map(self.keys)

1968

nonlocal_keys = self.nonlocal_keys

1969

1970

missing_keys = set(nonlocal_keys)

1971

# Read from remote versioned file instances and provide to our caller.

1972

for source in self.vf._fallback_vfs:

1973

if not missing_keys:

1974

break

1975

# Loop over fallback repositories asking them for texts - ignore

1976

# any missing from a particular fallback.

1977

for record in source.get_record_stream(missing_keys,

1978

'unordered', True):

1979

if record.storage_kind == 'absent':

1980

# Not in thie particular stream, may be in one of the

1981

# other fallback vfs objects.

1982

continue

1983

missing_keys.remove(record.key)

1984

yield record

1985

1986

self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,

1987

allow_missing=True)

1988

first = True

1989

for key in self.keys:

1990

if key in self.nonlocal_keys:

1991

continue

1992

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

1993

first = False

1994

1995

def _get_one_work(self, requested_key):

1996

# Now, if we have calculated everything already, just return the

1997

# desired text.

1998

if requested_key in self._contents_map:

1999

return self._contents_map[requested_key]

2000

# To simplify things, parse everything at once - code that wants one text

2001

# probably wants them all.

2002

# FUTURE: This function could be improved for the 'extract many' case

2003

# by tracking each component and only doing the copy when the number of

2004

# children than need to apply delta's to it is > 1 or it is part of the

2005

# final output.

2006

multiple_versions = len(self.keys) != 1

2007

if self._record_map is None:

2008

self._record_map = self.vf._raw_map_to_record_map(

2009

self._raw_record_map)

2010

record_map = self._record_map

2011

# raw_record_map is key:

2012

# Have read and parsed records at this point.

2013

for key in self.keys:

2014

if key in self.nonlocal_keys:

2015

# already handled

2016

continue

2017

components = []

2018

cursor = key

2019

while cursor is not None:

2020

try:

2021

record, record_details, digest, next = record_map[cursor]

2022

except KeyError:

2023

raise RevisionNotPresent(cursor, self)

2024

components.append((cursor, record, record_details, digest))

2025

cursor = next

2026

if cursor in self._contents_map:

2027

# no need to plan further back

2028

components.append((cursor, None, None, None))

2029

break

2030

2031

content = None

2032

for (component_id, record, record_details,

2033

digest) in reversed(components):

2034

if component_id in self._contents_map:

2035

content = self._contents_map[component_id]

2036

else:

2037

content, delta = self._factory.parse_record(key[-1],

2038

record, record_details, content,

2039

copy_base_content=multiple_versions)

2040

if multiple_versions:

2041

self._contents_map[component_id] = content

2042

2043

# digest here is the digest from the last applied component.

2044

text = content.text()

2045

actual_sha = sha_strings(text)

2046

if actual_sha != digest:

2047

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2048

if multiple_versions:

2049

return self._contents_map[requested_key]

2050

else:

2051

return content

2052

2053

def _wire_bytes(self):

2054

"""Get the bytes to put on the wire for 'key'.

2055

2056

The first collection of bytes asked for returns the serialised

2057

raw_record_map and the additional details (key, parent) for key.

2058

Subsequent calls return just the additional details (key, parent).

2059

The wire storage_kind given for the first key is 'knit-delta-closure',

2060

For subsequent keys it is 'knit-delta-closure-ref'.

2061

2062

:param key: A key from the content generator.

2063

:return: Bytes to put on the wire.

2064

"""

2065

lines = []

2066

# kind marker for dispatch on the far side,

2067

lines.append('knit-delta-closure')

2068

# Annotated or not

2069

if self.vf._factory.annotated:

2070

lines.append('annotated')

2071

else:

2072

lines.append('')

2073

# then the list of keys

2074

lines.append('\t'.join(['\x00'.join(key) for key in self.keys

2075

if key not in self.nonlocal_keys]))

2076

# then the _raw_record_map in serialised form:

2077

map_byte_list = []

2078

# for each item in the map:

2079

# 1 line with key

2080

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2081

# one line with method

2082

# one line with noeol

2083

# one line with next ('' for None)

2084

# one line with byte count of the record bytes

2085

# the record bytes

2086

for key, (record_bytes, (method, noeol), next) in \

2087

self._raw_record_map.iteritems():

2088

key_bytes = '\x00'.join(key)

2089

parents = self.global_map.get(key, None)

2090

if parents is None:

2091

parent_bytes = 'None:'

2092

else:

2093

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

2094

method_bytes = method

2095

if noeol:

2096

noeol_bytes = "T"

2097

else:

2098

noeol_bytes = "F"

2099

if next:

2100

next_bytes = '\x00'.join(next)

2101

else:

2102

next_bytes = ''

2103

map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (

2104

key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2105

len(record_bytes), record_bytes))

2106

map_bytes = ''.join(map_byte_list)

2107

lines.append(map_bytes)

2108

bytes = '\n'.join(lines)

2109

return bytes

2110

2111

2112

class _VFContentMapGenerator(_ContentMapGenerator):

2113

"""Content map generator reading from a VersionedFiles object."""

2114

2115

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2116

global_map=None, raw_record_map=None):

2117

"""Create a _ContentMapGenerator.

2118

2119

:param versioned_files: The versioned files that the texts are being

2120

extracted from.

2121

:param keys: The keys to produce content maps for.

2122

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2123

which are known to not be in this knit, but rather in one of the

2124

fallback knits.

2125

:param global_map: The result of get_parent_map(keys) (or a supermap).

2126

This is required if get_record_stream() is to be used.

2127

:param raw_record_map: A unparsed raw record map to use for answering

2128

contents.

2129

"""

2130

# The vf to source data from

2131

self.vf = versioned_files

2132

# The keys desired

2133

self.keys = list(keys)

2134

# Keys known to be in fallback vfs objects

2135

if nonlocal_keys is None:

2136

self.nonlocal_keys = set()

2137

else:

2138

self.nonlocal_keys = frozenset(nonlocal_keys)

2139

# Parents data for keys to be returned in get_record_stream

2140

self.global_map = global_map

2141

# The chunked lists for self.keys in text form

2142

self._text_map = {}

2143

# A cache of KnitContent objects used in extracting texts.

2144

self._contents_map = {}

2145

# All the knit records needed to assemble the requested keys as full

2146

# texts.

2147

self._record_map = None

2148

if raw_record_map is None:

2149

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2150

allow_missing=True)

2151

else:

2152

self._raw_record_map = raw_record_map

2153

# the factory for parsing records

2154

self._factory = self.vf._factory

2155

2156

2157

class _NetworkContentMapGenerator(_ContentMapGenerator):

2158

"""Content map generator sourced from a network stream."""

2159

2160

def __init__(self, bytes, line_end):

2161

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2162

self._bytes = bytes

2163

self.global_map = {}

2164

self._raw_record_map = {}

2165

self._contents_map = {}

2166

self._record_map = None

2167

self.nonlocal_keys = []

2168

# Get access to record parsing facilities

2169

self.vf = KnitVersionedFiles(None, None)

2170

start = line_end

2171

# Annotated or not

2172

line_end = bytes.find('\n', start)

2173

line = bytes[start:line_end]

2174

start = line_end + 1

2175

if line == 'annotated':

2176

self._factory = KnitAnnotateFactory()

2177

else:

2178

self._factory = KnitPlainFactory()

2179

# list of keys to emit in get_record_stream

2180

line_end = bytes.find('\n', start)

2181

line = bytes[start:line_end]

2182

start = line_end + 1

2183

self.keys = [

2184

tuple(segment.split('\x00')) for segment in line.split('\t')

2185

if segment]

2186

# now a loop until the end. XXX: It would be nice if this was just a

2187

# bunch of the same records as get_record_stream(..., False) gives, but

2188

# there is a decent sized gap stopping that at the moment.

2189

end = len(bytes)

2190

while start < end:

2191

# 1 line with key

2192

line_end = bytes.find('\n', start)

2193

key = tuple(bytes[start:line_end].split('\x00'))

2194

start = line_end + 1

2195

# 1 line with parents (None: for None, '' for ())

2196

line_end = bytes.find('\n', start)

2197

line = bytes[start:line_end]

2198

if line == 'None:':

2199

parents = None

2200

else:

2201

parents = tuple(

2202

[tuple(segment.split('\x00')) for segment in line.split('\t')

2203

if segment])

2204

self.global_map[key] = parents

2205

start = line_end + 1

2206

# one line with method

2207

line_end = bytes.find('\n', start)

2208

line = bytes[start:line_end]

2209

method = line

2210

start = line_end + 1

2211

# one line with noeol

2212

line_end = bytes.find('\n', start)

2213

line = bytes[start:line_end]

2214

noeol = line == "T"

2215

start = line_end + 1

2216

# one line with next ('' for None)

2217

line_end = bytes.find('\n', start)

2218

line = bytes[start:line_end]

2219

if not line:

2220

next = None

2221

else:

2222

next = tuple(bytes[start:line_end].split('\x00'))

2223

start = line_end + 1

2224

# one line with byte count of the record bytes

2225

line_end = bytes.find('\n', start)

2226

line = bytes[start:line_end]

2227

count = int(line)

2228

start = line_end + 1

2229

# the record bytes

2230

record_bytes = bytes[start:start+count]

2231

start = start + count

2232

# put it in the map

2233

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2234

2235

def get_record_stream(self):

2236

"""Get a record stream for for keys requested by the bytestream."""

2237

first = True

2238

for key in self.keys:

2239

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2240

first = False

2241

2242

def _wire_bytes(self):

2243

return self._bytes

2244

2245

2246

class _KndxIndex(object):

2247

"""Manages knit index files

2248

2249

The index is kept in memory and read on startup, to enable

2250

fast lookups of revision information. The cursor of the index

2251

file is always pointing to the end, making it easy to append

2252

entries.

2253

2254

_cache is a cache for fast mapping from version id to a Index

2255

object.

2256

2257

_history is a cache for fast mapping from indexes to version ids.

2258

2259

The index data format is dictionary compressed when it comes to

2260

parent references; a index entry may only have parents that with a

2261

lover index number. As a result, the index is topological sorted.

2262

2263

Duplicate entries may be written to the index for a single version id

2264

if this is done then the latter one completely replaces the former:

2265

this allows updates to correct version and parent information.

2266

Note that the two entries may share the delta, and that successive

2267

annotations and references MUST point to the first entry.

2268

2269

The index file on disc contains a header, followed by one line per knit

2270

record. The same revision can be present in an index file more than once.

2271

The first occurrence gets assigned a sequence number starting from 0.

2272

2273

The format of a single line is

2274

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

2275

REVISION_ID is a utf8-encoded revision id

2276

FLAGS is a comma separated list of flags about the record. Values include

2277

no-eol, line-delta, fulltext.

2278

BYTE_OFFSET is the ascii representation of the byte offset in the data file

2279

that the the compressed data starts at.

2280

LENGTH is the ascii representation of the length of the data file.

2281

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

2282

REVISION_ID.

2283

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

2284

revision id already in the knit that is a parent of REVISION_ID.

2285

The ' :' marker is the end of record marker.

2286

2287

partial writes:

2288

when a write is interrupted to the index file, it will result in a line

2289

that does not end in ' :'. If the ' :' is not present at the end of a line,

2290

or at the end of the file, then the record that is missing it will be

2291

ignored by the parser.

2292

2293

When writing new records to the index file, the data is preceded by '\n'

2294

to ensure that records always start on new lines even if the last write was

2295

interrupted. As a result its normal for the last line in the index to be

2296

missing a trailing newline. One can be added with no harmful effects.

2297

2298

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

2299

where prefix is e.g. the (fileid,) for .texts instances or () for

2300

constant-mapped things like .revisions, and the old state is

2301

tuple(history_vector, cache_dict). This is used to prevent having an

2302

ABI change with the C extension that reads .kndx files.

2303

"""

2304

2305

HEADER = "# bzr knit index 8\n"

2306

2307

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

2308

"""Create a _KndxIndex on transport using mapper."""

2309

self._transport = transport

2310

self._mapper = mapper

2311

self._get_scope = get_scope

2312

self._allow_writes = allow_writes

2313

self._is_locked = is_locked

2314

self._reset_cache()

2315

self.has_graph = True

2316

2317

def add_records(self, records, random_id=False, missing_compression_parents=False):

2318

"""Add multiple records to the index.

2319

2320

:param records: a list of tuples:

2321

(key, options, access_memo, parents).

2322

:param random_id: If True the ids being added were randomly generated

2323

and no check for existence will be performed.

2324

:param missing_compression_parents: If True the records being added are

2325

only compressed against texts already in the index (or inside

2326

records). If False the records all refer to unavailable texts (or

2327

texts inside records) as compression parents.

2328

"""

2329

if missing_compression_parents:

2330

# It might be nice to get the edge of the records. But keys isn't

2331

# _wrong_.

2332

keys = sorted(record[0] for record in records)

2333

raise errors.RevisionNotPresent(keys, self)

2334

paths = {}

2335

for record in records:

2336

key = record[0]

2337

prefix = key[:-1]

2338

path = self._mapper.map(key) + '.kndx'

2339

path_keys = paths.setdefault(path, (prefix, []))

2340

path_keys[1].append(record)

2341

for path in sorted(paths):

2342

prefix, path_keys = paths[path]

2343

self._load_prefixes([prefix])

2344

lines = []

2345

orig_history = self._kndx_cache[prefix][1][:]

2346

orig_cache = self._kndx_cache[prefix][0].copy()

2347

2348

try:

2349

for key, options, (_, pos, size), parents in path_keys:

2350

if parents is None:

2351

# kndx indices cannot be parentless.

2352

parents = ()

2353

line = "\n%s %s %s %s %s :" % (

2354

key[-1], ','.join(options), pos, size,

2355

self._dictionary_compress(parents))

2356

if type(line) != str:

2357

raise AssertionError(

2358

'data must be utf8 was %s' % type(line))

2359

lines.append(line)

2360

self._cache_key(key, options, pos, size, parents)

2361

if len(orig_history):

2362

self._transport.append_bytes(path, ''.join(lines))

2363

else:

2364

self._init_index(path, lines)

2365

except:

2366

# If any problems happen, restore the original values and re-raise

2367

self._kndx_cache[prefix] = (orig_cache, orig_history)

2368

raise

2369

2370

def scan_unvalidated_index(self, graph_index):

2371

"""See _KnitGraphIndex.scan_unvalidated_index."""

2372

# Because kndx files do not support atomic insertion via separate index

2373

# files, they do not support this method.

2374

raise NotImplementedError(self.scan_unvalidated_index)

2375

2376

def get_missing_compression_parents(self):

2377

"""See _KnitGraphIndex.get_missing_compression_parents."""

2378

# Because kndx files do not support atomic insertion via separate index

2379

# files, they do not support this method.

2380

raise NotImplementedError(self.get_missing_compression_parents)

2381

2382

def _cache_key(self, key, options, pos, size, parent_keys):

2383

"""Cache a version record in the history array and index cache.

2384

2385

This is inlined into _load_data for performance. KEEP IN SYNC.

2386

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

2387

indexes).

2388

"""

2389

prefix = key[:-1]

2390

version_id = key[-1]

2391

# last-element only for compatibilty with the C load_data.

2392

parents = tuple(parent[-1] for parent in parent_keys)

2393

for parent in parent_keys:

2394

if parent[:-1] != prefix:

2395

raise ValueError("mismatched prefixes for %r, %r" % (

2396

key, parent_keys))

2397

cache, history = self._kndx_cache[prefix]

2398

# only want the _history index to reference the 1st index entry

2399

# for version_id

2400

if version_id not in cache:

2401

index = len(history)

2402

history.append(version_id)

2403

else:

2404

index = cache[version_id][5]

2405

cache[version_id] = (version_id,

2406

options,

2407

pos,

2408

size,

2409

parents,

2410

index)

2411

2412

def check_header(self, fp):

2413

line = fp.readline()

2414

if line == '':

2415

# An empty file can actually be treated as though the file doesn't

2416

# exist yet.

2417

raise errors.NoSuchFile(self)

2418

if line != self.HEADER:

2419

raise KnitHeaderError(badline=line, filename=self)

2420

2421

def _check_read(self):

2422

if not self._is_locked():

2423

raise errors.ObjectNotLocked(self)

2424

if self._get_scope() != self._scope:

2425

self._reset_cache()

2426

2427

def _check_write_ok(self):

2428

"""Assert if not writes are permitted."""

2429

if not self._is_locked():

2430

raise errors.ObjectNotLocked(self)

2431

if self._get_scope() != self._scope:

2432

self._reset_cache()

2433

if self._mode != 'w':

2434

raise errors.ReadOnlyObjectDirtiedError(self)

2435

2436

def get_build_details(self, keys):

2437

"""Get the method, index_memo and compression parent for keys.

2438

2439

Ghosts are omitted from the result.

2440

2441

:param keys: An iterable of keys.

2442

:return: A dict of key:(index_memo, compression_parent, parents,

2443

record_details).

2444

index_memo

2445

opaque structure to pass to read_records to extract the raw

2446

data

2447

compression_parent

2448

Content that this record is built upon, may be None

2449

parents

2450

Logical parents of this node

2451

record_details

2452

extra information about the content which needs to be passed to

2453

Factory.parse_record

2454

"""

2455

parent_map = self.get_parent_map(keys)

2456

result = {}

2457

for key in keys:

2458

if key not in parent_map:

2459

continue # Ghost

2460

method = self.get_method(key)

2461

parents = parent_map[key]

2462

if method == 'fulltext':

2463

compression_parent = None

2464

else:

2465

compression_parent = parents[0]

2466

noeol = 'no-eol' in self.get_options(key)

2467

index_memo = self.get_position(key)

2468

result[key] = (index_memo, compression_parent,

2469

parents, (method, noeol))

2470

return result

2471

2472

def get_method(self, key):

2473

"""Return compression method of specified key."""

2474

options = self.get_options(key)

2475

if 'fulltext' in options:

2476

return 'fulltext'

2477

elif 'line-delta' in options:

2478

return 'line-delta'

2479

else:

2480

raise errors.KnitIndexUnknownMethod(self, options)

2481

2482

def get_options(self, key):

2483

"""Return a list representing options.

2484

2485

e.g. ['foo', 'bar']

2486

"""

2487

prefix, suffix = self._split_key(key)

2488

self._load_prefixes([prefix])

2489

try:

2490

return self._kndx_cache[prefix][0][suffix][1]

2491

except KeyError:

2492

raise RevisionNotPresent(key, self)

2493

2494

def get_parent_map(self, keys):

2495

"""Get a map of the parents of keys.

2496

2497

:param keys: The keys to look up parents for.

2498

:return: A mapping from keys to parents. Absent keys are absent from

2499

the mapping.

2500

"""

2501

# Parse what we need to up front, this potentially trades off I/O

2502

# locality (.kndx and .knit in the same block group for the same file

2503

# id) for less checking in inner loops.

2504

prefixes = set(key[:-1] for key in keys)

2505

self._load_prefixes(prefixes)

2506

result = {}

2507

for key in keys:

2508

prefix = key[:-1]

2509

try:

2510

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2511

except KeyError:

2512

pass

2513

else:

2514

result[key] = tuple(prefix + (suffix,) for

2515

suffix in suffix_parents)

2516

return result

2517

2518

def get_position(self, key):

2519

"""Return details needed to access the version.

2520

2521

:return: a tuple (key, data position, size) to hand to the access

2522

logic to get the record.

2523

"""

2524

prefix, suffix = self._split_key(key)

2525

self._load_prefixes([prefix])

2526

entry = self._kndx_cache[prefix][0][suffix]

2527

return key, entry[2], entry[3]

2528

2529

has_key = _mod_index._has_key_from_parent_map

2530

2531

def _init_index(self, path, extra_lines=[]):

2532

"""Initialize an index."""

2533

sio = StringIO()

2534

sio.write(self.HEADER)

2535

sio.writelines(extra_lines)

2536

sio.seek(0)

2537

self._transport.put_file_non_atomic(path, sio,

2538

create_parent_dir=True)

2539

# self._create_parent_dir)

2540

# mode=self._file_mode,

2541

# dir_mode=self._dir_mode)

2542

2543

def keys(self):

2544

"""Get all the keys in the collection.

2545

2546

The keys are not ordered.

2547

"""

2548

result = set()

2549

# Identify all key prefixes.

2550

# XXX: A bit hacky, needs polish.

2551

if type(self._mapper) == ConstantMapper:

2552

prefixes = [()]

2553

else:

2554

relpaths = set()

2555

for quoted_relpath in self._transport.iter_files_recursive():

2556

path, ext = os.path.splitext(quoted_relpath)

2557

relpaths.add(path)

2558

prefixes = [self._mapper.unmap(path) for path in relpaths]

2559

self._load_prefixes(prefixes)

2560

for prefix in prefixes:

2561

for suffix in self._kndx_cache[prefix][1]:

2562

result.add(prefix + (suffix,))

2563

return result

2564

2565

def _load_prefixes(self, prefixes):

2566

"""Load the indices for prefixes."""

2567

self._check_read()

2568

for prefix in prefixes:

2569

if prefix not in self._kndx_cache:

2570

# the load_data interface writes to these variables.

2571

self._cache = {}

2572

self._history = []

2573

self._filename = prefix

2574

try:

2575

path = self._mapper.map(prefix) + '.kndx'

2576

fp = self._transport.get(path)

2577

try:

2578

# _load_data may raise NoSuchFile if the target knit is

2579

# completely empty.

2580

_load_data(self, fp)

2581

finally:

2582

fp.close()

2583

self._kndx_cache[prefix] = (self._cache, self._history)

2584

del self._cache

2585

del self._filename

2586

del self._history

2587

except NoSuchFile:

2588

self._kndx_cache[prefix] = ({}, [])

2589

if type(self._mapper) == ConstantMapper:

2590

# preserve behaviour for revisions.kndx etc.

2591

self._init_index(path)

2592

del self._cache

2593

del self._filename

2594

del self._history

2595

2596

missing_keys = _mod_index._missing_keys_from_parent_map

2597

2598

def _partition_keys(self, keys):

2599

"""Turn keys into a dict of prefix:suffix_list."""

2600

result = {}

2601

for key in keys:

2602

prefix_keys = result.setdefault(key[:-1], [])

2603

prefix_keys.append(key[-1])

2604

return result

2605

2606

def _dictionary_compress(self, keys):

2607

"""Dictionary compress keys.

2608

2609

:param keys: The keys to generate references to.

2610

:return: A string representation of keys. keys which are present are

2611

dictionary compressed, and others are emitted as fulltext with a

2612

'.' prefix.

2613

"""

2614

if not keys:

2615

return ''

2616

result_list = []

2617

prefix = keys[0][:-1]

2618

cache = self._kndx_cache[prefix][0]

2619

for key in keys:

2620

if key[:-1] != prefix:

2621

# kndx indices cannot refer across partitioned storage.

2622

raise ValueError("mismatched prefixes for %r" % keys)

2623

if key[-1] in cache:

2624

# -- inlined lookup() --

2625

result_list.append(str(cache[key[-1]][5]))

2626

# -- end lookup () --

2627

else:

2628

result_list.append('.' + key[-1])

2629

return ' '.join(result_list)

2630

2631

def _reset_cache(self):

2632

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2633

# (cache_dict, history_vector) for parsed kndx files.

2634

self._kndx_cache = {}

2635

self._scope = self._get_scope()

2636

allow_writes = self._allow_writes()

2637

if allow_writes:

2638

self._mode = 'w'

2639

else:

2640

self._mode = 'r'

2641

2642

def _sort_keys_by_io(self, keys, positions):

2643

"""Figure out an optimal order to read the records for the given keys.

2644

2645

Sort keys, grouped by index and sorted by position.

2646

2647

:param keys: A list of keys whose records we want to read. This will be

2648

sorted 'in-place'.

2649

:param positions: A dict, such as the one returned by

2650

_get_components_positions()

2651

:return: None

2652

"""

2653

def get_sort_key(key):

2654

index_memo = positions[key][1]

2655

# Group by prefix and position. index_memo[0] is the key, so it is

2656

# (file_id, revision_id) and we don't want to sort on revision_id,

2657

# index_memo[1] is the position, and index_memo[2] is the size,

2658

# which doesn't matter for the sort

2659

return index_memo[0][:-1], index_memo[1]

2660

return keys.sort(key=get_sort_key)

2661

2662

_get_total_build_size = _get_total_build_size

2663

2664

def _split_key(self, key):

2665

"""Split key into a prefix and suffix."""

2666

return key[:-1], key[-1]

2667

2668

2669

class _KnitGraphIndex(object):

2670

"""A KnitVersionedFiles index layered on GraphIndex."""

2671

2672

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2673

add_callback=None):

2674

"""Construct a KnitGraphIndex on a graph_index.

2675

2676

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2677

:param is_locked: A callback to check whether the object should answer

2678

queries.

2679

:param deltas: Allow delta-compressed records.

2680

:param parents: If True, record knits parents, if not do not record

2681

parents.

2682

:param add_callback: If not None, allow additions to the index and call

2683

this callback with a list of added GraphIndex nodes:

2684

[(node, value, node_refs), ...]

2685

:param is_locked: A callback, returns True if the index is locked and

2686

thus usable.

2687

"""

2688

self._add_callback = add_callback

2689

self._graph_index = graph_index

2690

self._deltas = deltas

2691

self._parents = parents

2692

if deltas and not parents:

2693

# XXX: TODO: Delta tree and parent graph should be conceptually

2694

# separate.

2695

raise KnitCorrupt(self, "Cannot do delta compression without "

2696

"parent tracking.")

2697

self.has_graph = parents

2698

self._is_locked = is_locked

2699

self._missing_compression_parents = set()

2700

2701

def __repr__(self):

2702

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2703

2704

def add_records(self, records, random_id=False,

2705

missing_compression_parents=False):

2706

"""Add multiple records to the index.

2707

2708

This function does not insert data into the Immutable GraphIndex

2709

backing the KnitGraphIndex, instead it prepares data for insertion by

2710

the caller and checks that it is safe to insert then calls

2711

self._add_callback with the prepared GraphIndex nodes.

2712

2713

:param records: a list of tuples:

2714

(key, options, access_memo, parents).

2715

:param random_id: If True the ids being added were randomly generated

2716

and no check for existence will be performed.

2717

:param missing_compression_parents: If True the records being added are

2718

only compressed against texts already in the index (or inside

2719

records). If False the records all refer to unavailable texts (or

2720

texts inside records) as compression parents.

2721

"""

2722

if not self._add_callback:

2723

raise errors.ReadOnlyError(self)

2724

# we hope there are no repositories with inconsistent parentage

2725

# anymore.

2726

2727

keys = {}

2728

compression_parents = set()

2729

for (key, options, access_memo, parents) in records:

2730

if self._parents:

2731

parents = tuple(parents)

2732

index, pos, size = access_memo

2733

if 'no-eol' in options:

2734

value = 'N'

2735

else:

2736

value = ' '

2737

value += "%d %d" % (pos, size)

2738

if not self._deltas:

2739

if 'line-delta' in options:

2740

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2741

if self._parents:

2742

if self._deltas:

2743

if 'line-delta' in options:

2744

node_refs = (parents, (parents[0],))

2745

if missing_compression_parents:

2746

compression_parents.add(parents[0])

2747

else:

2748

node_refs = (parents, ())

2749

else:

2750

node_refs = (parents, )

2751

else:

2752

if parents:

2753

raise KnitCorrupt(self, "attempt to add node with parents "

2754

"in parentless index.")

2755

node_refs = ()

2756

keys[key] = (value, node_refs)

2757

# check for dups

2758

if not random_id:

2759

present_nodes = self._get_entries(keys)

2760

for (index, key, value, node_refs) in present_nodes:

2761

if (value[0] != keys[key][0][0] or

2762

node_refs[:1] != keys[key][1][:1]):

2763

raise KnitCorrupt(self, "inconsistent details in add_records"

2764

": %s %s" % ((value, node_refs), keys[key]))

2765

del keys[key]

2766

result = []

2767

if self._parents:

2768

for key, (value, node_refs) in keys.iteritems():

2769

result.append((key, value, node_refs))

2770

else:

2771

for key, (value, node_refs) in keys.iteritems():

2772

result.append((key, value))

2773

self._add_callback(result)

2774

if missing_compression_parents:

2775

# This may appear to be incorrect (it does not check for

2776

# compression parents that are in the existing graph index),

2777

# but such records won't have been buffered, so this is

2778

# actually correct: every entry when

2779

# missing_compression_parents==True either has a missing parent, or

2780

# a parent that is one of the keys in records.

2781

compression_parents.difference_update(keys)

2782

self._missing_compression_parents.update(compression_parents)

2783

# Adding records may have satisfied missing compression parents.

2784

self._missing_compression_parents.difference_update(keys)

2785

2786

def scan_unvalidated_index(self, graph_index):

2787

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2788

2789

This allows this _KnitGraphIndex to keep track of any missing

2790

compression parents we may want to have filled in to make those

2791

indices valid.

2792

2793

:param graph_index: A GraphIndex

2794

"""

2795

if self._deltas:

2796

new_missing = graph_index.external_references(ref_list_num=1)

2797

new_missing.difference_update(self.get_parent_map(new_missing))

2798

self._missing_compression_parents.update(new_missing)

2799

2800

def get_missing_compression_parents(self):

2801

"""Return the keys of missing compression parents.

2802

2803

Missing compression parents occur when a record stream was missing

2804

basis texts, or a index was scanned that had missing basis texts.

2805

"""

2806

return frozenset(self._missing_compression_parents)

2807

2808

def _check_read(self):

2809

"""raise if reads are not permitted."""

2810

if not self._is_locked():

2811

raise errors.ObjectNotLocked(self)

2812

2813

def _check_write_ok(self):

2814

"""Assert if writes are not permitted."""

2815

if not self._is_locked():

2816

raise errors.ObjectNotLocked(self)

2817

2818

def _compression_parent(self, an_entry):

2819

# return the key that an_entry is compressed against, or None

2820

# Grab the second parent list (as deltas implies parents currently)

2821

compression_parents = an_entry[3][1]

2822

if not compression_parents:

2823

return None

2824

if len(compression_parents) != 1:

2825

raise AssertionError(

2826

"Too many compression parents: %r" % compression_parents)

2827

return compression_parents[0]

2828

2829

def get_build_details(self, keys):

2830

"""Get the method, index_memo and compression parent for version_ids.

2831

2832

Ghosts are omitted from the result.

2833

2834

:param keys: An iterable of keys.

2835

:return: A dict of key:

2836

(index_memo, compression_parent, parents, record_details).

2837

index_memo

2838

opaque structure to pass to read_records to extract the raw

2839

data

2840

compression_parent

2841

Content that this record is built upon, may be None

2842

parents

2843

Logical parents of this node

2844

record_details

2845

extra information about the content which needs to be passed to

2846

Factory.parse_record

2847

"""

2848

self._check_read()

2849

result = {}

2850

entries = self._get_entries(keys, False)

2851

for entry in entries:

2852

key = entry[1]

2853

if not self._parents:

2854

parents = ()

2855

else:

2856

parents = entry[3][0]

2857

if not self._deltas:

2858

compression_parent_key = None

2859

else:

2860

compression_parent_key = self._compression_parent(entry)

2861

noeol = (entry[2][0] == 'N')

2862

if compression_parent_key:

2863

method = 'line-delta'

2864

else:

2865

method = 'fulltext'

2866

result[key] = (self._node_to_position(entry),

2867

compression_parent_key, parents,

2868

(method, noeol))

2869

return result

2870

2871

def _get_entries(self, keys, check_present=False):

2872

"""Get the entries for keys.

2873

2874

:param keys: An iterable of index key tuples.

2875

"""

2876

keys = set(keys)

2877

found_keys = set()

2878

if self._parents:

2879

for node in self._graph_index.iter_entries(keys):

2880

yield node

2881

found_keys.add(node[1])

2882

else:

2883

# adapt parentless index to the rest of the code.

2884

for node in self._graph_index.iter_entries(keys):

2885

yield node[0], node[1], node[2], ()

2886

found_keys.add(node[1])

2887

if check_present:

2888

missing_keys = keys.difference(found_keys)

2889

if missing_keys:

2890

raise RevisionNotPresent(missing_keys.pop(), self)

2891

2892

def get_method(self, key):

2893

"""Return compression method of specified key."""

2894

return self._get_method(self._get_node(key))

2895

2896

def _get_method(self, node):

2897

if not self._deltas:

2898

return 'fulltext'

2899

if self._compression_parent(node):

2900

return 'line-delta'

2901

else:

2902

return 'fulltext'

2903

2904

def _get_node(self, key):

2905

try:

2906

return list(self._get_entries([key]))[0]

2907

except IndexError:

2908

raise RevisionNotPresent(key, self)

2909

2910

def get_options(self, key):

2911

"""Return a list representing options.

2912

2913

e.g. ['foo', 'bar']

2914

"""

2915

node = self._get_node(key)

2916

options = [self._get_method(node)]

2917

if node[2][0] == 'N':

2918

options.append('no-eol')

2919

return options

2920

2921

def get_parent_map(self, keys):

2922

"""Get a map of the parents of keys.

2923

2924

:param keys: The keys to look up parents for.

2925

:return: A mapping from keys to parents. Absent keys are absent from

2926

the mapping.

2927

"""

2928

self._check_read()

2929

nodes = self._get_entries(keys)

2930

result = {}

2931

if self._parents:

2932

for node in nodes:

2933

result[node[1]] = node[3][0]

2934

else:

2935

for node in nodes:

2936

result[node[1]] = None

2937

return result

2938

2939

def get_position(self, key):

2940

"""Return details needed to access the version.

2941

2942

:return: a tuple (index, data position, size) to hand to the access

2943

logic to get the record.

2944

"""

2945

node = self._get_node(key)

2946

return self._node_to_position(node)

2947

2948

has_key = _mod_index._has_key_from_parent_map

2949

2950

def keys(self):

2951

"""Get all the keys in the collection.

2952

2953

The keys are not ordered.

2954

"""

2955

self._check_read()

2956

return [node[1] for node in self._graph_index.iter_all_entries()]

2957

2958

missing_keys = _mod_index._missing_keys_from_parent_map

2959

2960

def _node_to_position(self, node):

2961

"""Convert an index value to position details."""

2962

bits = node[2][1:].split(' ')

2963

return node[0], int(bits[0]), int(bits[1])

2964

2965

def _sort_keys_by_io(self, keys, positions):

2966

"""Figure out an optimal order to read the records for the given keys.

2967

2968

Sort keys, grouped by index and sorted by position.

2969

2970

:param keys: A list of keys whose records we want to read. This will be

2971

sorted 'in-place'.

2972

:param positions: A dict, such as the one returned by

2973

_get_components_positions()

2974

:return: None

2975

"""

2976

def get_index_memo(key):

2977

# index_memo is at offset [1]. It is made up of (GraphIndex,

2978

# position, size). GI is an object, which will be unique for each

2979

# pack file. This causes us to group by pack file, then sort by

2980

# position. Size doesn't matter, but it isn't worth breaking up the

2981

# tuple.

2982

return positions[key][1]

2983

return keys.sort(key=get_index_memo)

2984

2985

_get_total_build_size = _get_total_build_size

2986

2987

2988

class _KnitKeyAccess(object):

2989

"""Access to records in .knit files."""

2990

2991

def __init__(self, transport, mapper):

2992

"""Create a _KnitKeyAccess with transport and mapper.

2993

2994

:param transport: The transport the access object is rooted at.

2995

:param mapper: The mapper used to map keys to .knit files.

2996

"""

2997

self._transport = transport

2998

self._mapper = mapper

2999

3000

def add_raw_records(self, key_sizes, raw_data):

3001

"""Add raw knit bytes to a storage area.

3002

3003

The data is spooled to the container writer in one bytes-record per

3004

raw data item.

3005

3006

:param sizes: An iterable of tuples containing the key and size of each

3007

raw data segment.

3008

:param raw_data: A bytestring containing the data.

3009

:return: A list of memos to retrieve the record later. Each memo is an

3010

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3011

length), where the key is the record key.

3012

"""

3013

if type(raw_data) != str:

3014

raise AssertionError(

3015

'data must be plain bytes was %s' % type(raw_data))

3016

result = []

3017

offset = 0

3018

# TODO: This can be tuned for writing to sftp and other servers where

3019

# append() is relatively expensive by grouping the writes to each key

3020

# prefix.

3021

for key, size in key_sizes:

3022

path = self._mapper.map(key)

3023

try:

3024

base = self._transport.append_bytes(path + '.knit',

3025

raw_data[offset:offset+size])

3026

except errors.NoSuchFile:

3027

self._transport.mkdir(osutils.dirname(path))

3028

base = self._transport.append_bytes(path + '.knit',

3029

raw_data[offset:offset+size])

3030

# if base == 0:

3031

# chmod.

3032

offset += size

3033

result.append((key, base, size))

3034

return result

3035

3036

def get_raw_records(self, memos_for_retrieval):

3037

"""Get the raw bytes for a records.

3038

3039

:param memos_for_retrieval: An iterable containing the access memo for

3040

retrieving the bytes.

3041

:return: An iterator over the bytes of the records.

3042

"""

3043

# first pass, group into same-index request to minimise readv's issued.

3044

request_lists = []

3045

current_prefix = None

3046

for (key, offset, length) in memos_for_retrieval:

3047

if current_prefix == key[:-1]:

3048

current_list.append((offset, length))

3049

else:

3050

if current_prefix is not None:

3051

request_lists.append((current_prefix, current_list))

3052

current_prefix = key[:-1]

3053

current_list = [(offset, length)]

3054

# handle the last entry

3055

if current_prefix is not None:

3056

request_lists.append((current_prefix, current_list))

3057

for prefix, read_vector in request_lists:

3058

path = self._mapper.map(prefix) + '.knit'

3059

for pos, data in self._transport.readv(path, read_vector):

3060

yield data

3061

3062

3063

class _DirectPackAccess(object):

3064

"""Access to data in one or more packs with less translation."""

3065

3066

def __init__(self, index_to_packs, reload_func=None):

3067

"""Create a _DirectPackAccess object.

3068

3069

:param index_to_packs: A dict mapping index objects to the transport

3070

and file names for obtaining data.

3071

:param reload_func: A function to call if we determine that the pack

3072

files have moved and we need to reload our caches. See

3073

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

3074

"""

3075

self._container_writer = None

3076

self._write_index = None

3077

self._indices = index_to_packs

3078

self._reload_func = reload_func

3079

3080

def add_raw_records(self, key_sizes, raw_data):

3081

"""Add raw knit bytes to a storage area.

3082

3083

The data is spooled to the container writer in one bytes-record per

3084

raw data item.

3085

3086

:param sizes: An iterable of tuples containing the key and size of each

3087

raw data segment.

3088

:param raw_data: A bytestring containing the data.

3089

:return: A list of memos to retrieve the record later. Each memo is an

3090

opaque index memo. For _DirectPackAccess the memo is (index, pos,

3091

length), where the index field is the write_index object supplied

3092

to the PackAccess object.

3093

"""

3094

if type(raw_data) != str:

3095

raise AssertionError(

3096

'data must be plain bytes was %s' % type(raw_data))

3097

result = []

3098

offset = 0

3099

for key, size in key_sizes:

3100

p_offset, p_length = self._container_writer.add_bytes_record(

3101

raw_data[offset:offset+size], [])

3102

offset += size

3103

result.append((self._write_index, p_offset, p_length))

3104

return result

3105

3106

def get_raw_records(self, memos_for_retrieval):

3107

"""Get the raw bytes for a records.

3108

3109

:param memos_for_retrieval: An iterable containing the (index, pos,

3110

length) memo for retrieving the bytes. The Pack access method

3111

looks up the pack to use for a given record in its index_to_pack

3112

map.

3113

:return: An iterator over the bytes of the records.

3114

"""

3115

# first pass, group into same-index requests

3116

request_lists = []

3117

current_index = None

3118

for (index, offset, length) in memos_for_retrieval:

3119

if current_index == index:

3120

current_list.append((offset, length))

3121

else:

3122

if current_index is not None:

3123

request_lists.append((current_index, current_list))

3124

current_index = index

3125

current_list = [(offset, length)]

3126

# handle the last entry

3127

if current_index is not None:

3128

request_lists.append((current_index, current_list))

3129

for index, offsets in request_lists:

3130

try:

3131

transport, path = self._indices[index]

3132

except KeyError:

3133

# A KeyError here indicates that someone has triggered an index

3134

# reload, and this index has gone missing, we need to start

3135

# over.

3136

if self._reload_func is None:

3137

# If we don't have a _reload_func there is nothing that can

3138

# be done

3139

raise

3140

raise errors.RetryWithNewPacks(index,

3141

reload_occurred=True,

3142

exc_info=sys.exc_info())

3143

try:

3144

reader = pack.make_readv_reader(transport, path, offsets)

3145

for names, read_func in reader.iter_records():

3146

yield read_func(None)

3147

except errors.NoSuchFile:

3148

# A NoSuchFile error indicates that a pack file has gone

3149

# missing on disk, we need to trigger a reload, and start over.

3150

if self._reload_func is None:

3151

raise

3152

raise errors.RetryWithNewPacks(transport.abspath(path),

3153

reload_occurred=False,

3154

exc_info=sys.exc_info())

3155

3156

def set_writer(self, writer, index, transport_packname):

3157

"""Set a writer to use for adding data."""

3158

if index is not None:

3159

self._indices[index] = transport_packname

3160

self._container_writer = writer

3161

self._write_index = index

3162

3163

def reload_or_raise(self, retry_exc):

3164

"""Try calling the reload function, or re-raise the original exception.

3165

3166

This should be called after _DirectPackAccess raises a

3167

RetryWithNewPacks exception. This function will handle the common logic

3168

of determining when the error is fatal versus being temporary.

3169

It will also make sure that the original exception is raised, rather

3170

than the RetryWithNewPacks exception.

3171

3172

If this function returns, then the calling function should retry

3173

whatever operation was being performed. Otherwise an exception will

3174

be raised.

3175

3176

:param retry_exc: A RetryWithNewPacks exception.

3177

"""

3178

is_error = False

3179

if self._reload_func is None:

3180

is_error = True

3181

elif not self._reload_func():

3182

# The reload claimed that nothing changed

3183

if not retry_exc.reload_occurred:

3184

# If there wasn't an earlier reload, then we really were

3185

# expecting to find changes. We didn't find them, so this is a

3186

# hard error

3187

is_error = True

3188

if is_error:

3189

exc_class, exc_value, exc_traceback = retry_exc.exc_info

3190

raise exc_class, exc_value, exc_traceback

3191

3192

3193

# Deprecated, use PatienceSequenceMatcher instead

3194

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

3195

3196

3197

def annotate_knit(knit, revision_id):

3198

"""Annotate a knit with no cached annotations.

3199

3200

This implementation is for knits with no cached annotations.

3201

It will work for knits with cached annotations, but this is not

3202

recommended.

3203

"""

3204

annotator = _KnitAnnotator(knit)

3205

return iter(annotator.annotate(revision_id))

3206

3207

3208

class _KnitAnnotator(object):

3209

"""Build up the annotations for a text."""

3210

3211

def __init__(self, knit):

3212

self._knit = knit

3213

3214

# Content objects, differs from fulltexts because of how final newlines

3215

# are treated by knits. the content objects here will always have a

3216

# final newline

3217

self._fulltext_contents = {}

3218

3219

# Annotated lines of specific revisions

3220

self._annotated_lines = {}

3221

3222

# Track the raw data for nodes that we could not process yet.

3223

# This maps the revision_id of the base to a list of children that will

3224

# annotated from it.

3225

self._pending_children = {}

3226

3227

# Nodes which cannot be extracted

3228

self._ghosts = set()

3229

3230

# Track how many children this node has, so we know if we need to keep

3231

# it

3232

self._annotate_children = {}

3233

self._compression_children = {}

3234

3235

self._all_build_details = {}

3236

# The children => parent revision_id graph

3237

self._revision_id_graph = {}

3238

3239

self._heads_provider = None

3240

3241

self._nodes_to_keep_annotations = set()

3242

self._generations_until_keep = 100

3243

3244

def set_generations_until_keep(self, value):

3245

"""Set the number of generations before caching a node.

3246

3247

Setting this to -1 will cache every merge node, setting this higher

3248

will cache fewer nodes.

3249

"""

3250

self._generations_until_keep = value

3251

3252

def _add_fulltext_content(self, revision_id, content_obj):

3253

self._fulltext_contents[revision_id] = content_obj

3254

# TODO: jam 20080305 It might be good to check the sha1digest here

3255

return content_obj.text()

3256

3257

def _check_parents(self, child, nodes_to_annotate):

3258

"""Check if all parents have been processed.

3259

3260

:param child: A tuple of (rev_id, parents, raw_content)

3261

:param nodes_to_annotate: If child is ready, add it to

3262

nodes_to_annotate, otherwise put it back in self._pending_children

3263

"""

3264

for parent_id in child[1]:

3265

if (parent_id not in self._annotated_lines):

3266

# This parent is present, but another parent is missing

3267

self._pending_children.setdefault(parent_id,

3268

[]).append(child)

3269

break

3270

else:

3271

# This one is ready to be processed

3272

nodes_to_annotate.append(child)

3273

3274

def _add_annotation(self, revision_id, fulltext, parent_ids,

3275

left_matching_blocks=None):

3276

"""Add an annotation entry.

3277

3278

All parents should already have been annotated.

3279

:return: A list of children that now have their parents satisfied.

3280

"""

3281

a = self._annotated_lines

3282

annotated_parent_lines = [a[p] for p in parent_ids]

3283

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3284

fulltext, revision_id, left_matching_blocks,

3285

heads_provider=self._get_heads_provider()))

3286

self._annotated_lines[revision_id] = annotated_lines

3287

for p in parent_ids:

3288

ann_children = self._annotate_children[p]

3289

ann_children.remove(revision_id)

3290

if (not ann_children

3291

and p not in self._nodes_to_keep_annotations):

3292

del self._annotated_lines[p]

3293

del self._all_build_details[p]

3294

if p in self._fulltext_contents:

3295

del self._fulltext_contents[p]

3296

# Now that we've added this one, see if there are any pending

3297

# deltas to be done, certainly this parent is finished

3298

nodes_to_annotate = []

3299

for child in self._pending_children.pop(revision_id, []):

3300

self._check_parents(child, nodes_to_annotate)

3301

return nodes_to_annotate

3302

3303

def _get_build_graph(self, key):

3304

"""Get the graphs for building texts and annotations.

3305

3306

The data you need for creating a full text may be different than the

3307

data you need to annotate that text. (At a minimum, you need both

3308

parents to create an annotation, but only need 1 parent to generate the

3309

fulltext.)

3310

3311

:return: A list of (key, index_memo) records, suitable for

3312

passing to read_records_iter to start reading in the raw data fro/

3313

the pack file.

3314

"""

3315

if key in self._annotated_lines:

3316

# Nothing to do

3317

return []

3318

pending = set([key])

3319

records = []

3320

generation = 0

3321

kept_generation = 0

3322

while pending:

3323

# get all pending nodes

3324

generation += 1

3325

this_iteration = pending

3326

build_details = self._knit._index.get_build_details(this_iteration)

3327

self._all_build_details.update(build_details)

3328

# new_nodes = self._knit._index._get_entries(this_iteration)

3329

pending = set()

3330

for key, details in build_details.iteritems():

3331

(index_memo, compression_parent, parents,

3332

record_details) = details

3333

self._revision_id_graph[key] = parents

3334

records.append((key, index_memo))

3335

# Do we actually need to check _annotated_lines?

3336

pending.update(p for p in parents

3337

if p not in self._all_build_details)

3338

if compression_parent:

3339

self._compression_children.setdefault(compression_parent,

3340

[]).append(key)

3341

if parents:

3342

for parent in parents:

3343

self._annotate_children.setdefault(parent,

3344

[]).append(key)

3345

num_gens = generation - kept_generation

3346

if ((num_gens >= self._generations_until_keep)

3347

and len(parents) > 1):

3348

kept_generation = generation

3349

self._nodes_to_keep_annotations.add(key)

3350

3351

missing_versions = this_iteration.difference(build_details.keys())

3352

self._ghosts.update(missing_versions)

3353

for missing_version in missing_versions:

3354

# add a key, no parents

3355

self._revision_id_graph[missing_version] = ()

3356

pending.discard(missing_version) # don't look for it

3357

if self._ghosts.intersection(self._compression_children):

3358

raise KnitCorrupt(

3359

"We cannot have nodes which have a ghost compression parent:\n"

3360

"ghosts: %r\n"

3361

"compression children: %r"

3362

% (self._ghosts, self._compression_children))

3363

# Cleanout anything that depends on a ghost so that we don't wait for

3364

# the ghost to show up

3365

for node in self._ghosts:

3366

if node in self._annotate_children:

3367

# We won't be building this node

3368

del self._annotate_children[node]

3369

# Generally we will want to read the records in reverse order, because

3370

# we find the parent nodes after the children

3371

records.reverse()

3372

return records

3373

3374

def _annotate_records(self, records):

3375

"""Build the annotations for the listed records."""

3376

# We iterate in the order read, rather than a strict order requested

3377

# However, process what we can, and put off to the side things that

3378

# still need parents, cleaning them up when those parents are

3379

# processed.

3380

for (rev_id, record,

3381

digest) in self._knit._read_records_iter(records):

3382

if rev_id in self._annotated_lines:

3383

continue

3384

parent_ids = self._revision_id_graph[rev_id]

3385

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3386

details = self._all_build_details[rev_id]

3387

(index_memo, compression_parent, parents,

3388

record_details) = details

3389

nodes_to_annotate = []

3390

# TODO: Remove the punning between compression parents, and

3391

# parent_ids, we should be able to do this without assuming

3392

# the build order

3393

if len(parent_ids) == 0:

3394

# There are no parents for this node, so just add it

3395

# TODO: This probably needs to be decoupled

3396

fulltext_content, delta = self._knit._factory.parse_record(

3397

rev_id, record, record_details, None)

3398

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3399

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3400

parent_ids, left_matching_blocks=None))

3401

else:

3402

child = (rev_id, parent_ids, record)

3403

# Check if all the parents are present

3404

self._check_parents(child, nodes_to_annotate)

3405

while nodes_to_annotate:

3406

# Should we use a queue here instead of a stack?

3407

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3408

(index_memo, compression_parent, parents,

3409

record_details) = self._all_build_details[rev_id]

3410

blocks = None

3411

if compression_parent is not None:

3412

comp_children = self._compression_children[compression_parent]

3413

if rev_id not in comp_children:

3414

raise AssertionError("%r not in compression children %r"

3415

% (rev_id, comp_children))

3416

# If there is only 1 child, it is safe to reuse this

3417

# content

3418

reuse_content = (len(comp_children) == 1

3419

and compression_parent not in

3420

self._nodes_to_keep_annotations)

3421

if reuse_content:

3422

# Remove it from the cache since it will be changing

3423

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3424

# Make sure to copy the fulltext since it might be

3425

# modified

3426

parent_fulltext = list(parent_fulltext_content.text())

3427

else:

3428

parent_fulltext_content = self._fulltext_contents[compression_parent]

3429

parent_fulltext = parent_fulltext_content.text()

3430

comp_children.remove(rev_id)

3431

fulltext_content, delta = self._knit._factory.parse_record(

3432

rev_id, record, record_details,

3433

parent_fulltext_content,

3434

copy_base_content=(not reuse_content))

3435

fulltext = self._add_fulltext_content(rev_id,

3436

fulltext_content)

3437

if compression_parent == parent_ids[0]:

3438

# the compression_parent is the left parent, so we can

3439

# re-use the delta

3440

blocks = KnitContent.get_line_delta_blocks(delta,

3441

parent_fulltext, fulltext)

3442

else:

3443

fulltext_content = self._knit._factory.parse_fulltext(

3444

record, rev_id)

3445

fulltext = self._add_fulltext_content(rev_id,

3446

fulltext_content)

3447

nodes_to_annotate.extend(

3448

self._add_annotation(rev_id, fulltext, parent_ids,

3449

left_matching_blocks=blocks))

3450

3451

def _get_heads_provider(self):

3452

"""Create a heads provider for resolving ancestry issues."""

3453

if self._heads_provider is not None:

3454

return self._heads_provider

3455

parent_provider = _mod_graph.DictParentsProvider(

3456

self._revision_id_graph)

3457

graph_obj = _mod_graph.Graph(parent_provider)

3458

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3459

self._heads_provider = head_cache

3460

return head_cache

3461

3462

def annotate(self, key):

3463

"""Return the annotated fulltext at the given key.

3464

3465

:param key: The key to annotate.

3466

"""

3467

if len(self._knit._fallback_vfs) > 0:

3468

# stacked knits can't use the fast path at present.

3469

return self._simple_annotate(key)

3470

while True:

3471

try:

3472

records = self._get_build_graph(key)

3473

if key in self._ghosts:

3474

raise errors.RevisionNotPresent(key, self._knit)

3475

self._annotate_records(records)

3476

return self._annotated_lines[key]

3477

except errors.RetryWithNewPacks, e:

3478

self._knit._access.reload_or_raise(e)

3479

# The cached build_details are no longer valid

3480

self._all_build_details.clear()

3481

3482

def _simple_annotate(self, key):

3483

"""Return annotated fulltext, rediffing from the full texts.

3484

3485

This is slow but makes no assumptions about the repository

3486

being able to produce line deltas.

3487

"""

3488

# TODO: this code generates a parent maps of present ancestors; it

3489

# could be split out into a separate method, and probably should use

3490

# iter_ancestry instead. -- mbp and robertc 20080704

3491

graph = _mod_graph.Graph(self._knit)

3492

head_cache = _mod_graph.FrozenHeadsCache(graph)

3493

search = graph._make_breadth_first_searcher([key])

3494

keys = set()

3495

while True:

3496

try:

3497

present, ghosts = search.next_with_ghosts()

3498

except StopIteration:

3499

break

3500

keys.update(present)

3501

parent_map = self._knit.get_parent_map(keys)

3502

parent_cache = {}

3503

reannotate = annotate.reannotate

3504

for record in self._knit.get_record_stream(keys, 'topological', True):

3505

key = record.key

3506

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

3507

parents = parent_map[key]

3508

if parents is not None:

3509

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

3510

else:

3511

parent_lines = []

3512

parent_cache[key] = list(

3513

reannotate(parent_lines, fulltext, key, None, head_cache))

3514

try:

3515

return parent_cache[key]

3516

except KeyError, e:

3517

raise errors.RevisionNotPresent(key, self._knit)

3518

3519

3520

try:

3521

from bzrlib._knit_load_data_c import _load_data_c as _load_data

3522

except ImportError:

3523

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »