/brz/remove-bazaar : revision 4095.1.3

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2009-03-09 12:58:02 UTC
mto: This revision was merged to the branch mainline in revision 4099.
Revision ID: mbp@sourcefrog.net-20090309125802-guvsapvb980yt85n

Add test for failures inside pyrex readdir

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2-windows.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

errors,

osutils,

patiencediff,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

SHA1KnitCorrupt,

)

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_string,

sha_strings,

split_lines,

)

100

from bzrlib.versionedfile import (

101

AbsentContentFactory,

102

adapter_registry,

103

ConstantMapper,

104

ContentFactory,

105

ChunkedContentFactory,

106

VersionedFile,

107

VersionedFiles,

108

)

109

110

111

# TODO: Split out code specific to this format into an associated object.

112

113

# TODO: Can we put in some kind of value to check that the index and data

114

# files belong together?

115

116

# TODO: accommodate binaries, perhaps by storing a byte count

117

118

# TODO: function to check whole file

119

120

# TODO: atomically append data, then measure backwards from the cursor

121

# position after writing to work out where it was located. we may need to

122

# bypass python file buffering.

123

124

DATA_SUFFIX = '.knit'

125

INDEX_SUFFIX = '.kndx'

126

_STREAM_MIN_BUFFER_SIZE = 5*1024*1024

127

128

129

class KnitAdapter(object):

130

"""Base class for knit record adaption."""

131

132

def __init__(self, basis_vf):

133

"""Create an adapter which accesses full texts from basis_vf.

134

135

:param basis_vf: A versioned file to access basis texts of deltas from.

136

May be None for adapters that do not need to access basis texts.

137

"""

138

self._data = KnitVersionedFiles(None, None)

139

self._annotate_factory = KnitAnnotateFactory()

140

self._plain_factory = KnitPlainFactory()

141

self._basis_vf = basis_vf

142

143

144

class FTAnnotatedToUnannotated(KnitAdapter):

145

"""An adapter from FT annotated knits to unannotated ones."""

146

147

def get_bytes(self, factory):

148

annotated_compressed_bytes = factory._raw_record

149

rec, contents = \

150

self._data._parse_record_unchecked(annotated_compressed_bytes)

151

content = self._annotate_factory.parse_fulltext(contents, rec[1])

152

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

153

return bytes

154

155

156

class DeltaAnnotatedToUnannotated(KnitAdapter):

157

"""An adapter for deltas from annotated to unannotated."""

158

159

def get_bytes(self, factory):

160

annotated_compressed_bytes = factory._raw_record

161

rec, contents = \

162

self._data._parse_record_unchecked(annotated_compressed_bytes)

163

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

164

plain=True)

165

contents = self._plain_factory.lower_line_delta(delta)

166

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

167

return bytes

168

169

170

class FTAnnotatedToFullText(KnitAdapter):

171

"""An adapter from FT annotated knits to unannotated ones."""

172

173

def get_bytes(self, factory):

174

annotated_compressed_bytes = factory._raw_record

175

rec, contents = \

176

self._data._parse_record_unchecked(annotated_compressed_bytes)

177

content, delta = self._annotate_factory.parse_record(factory.key[-1],

178

contents, factory._build_details, None)

179

return ''.join(content.text())

180

181

182

class DeltaAnnotatedToFullText(KnitAdapter):

183

"""An adapter for deltas from annotated to unannotated."""

184

185

def get_bytes(self, factory):

186

annotated_compressed_bytes = factory._raw_record

187

rec, contents = \

188

self._data._parse_record_unchecked(annotated_compressed_bytes)

189

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

190

plain=True)

191

compression_parent = factory.parents[0]

192

basis_entry = self._basis_vf.get_record_stream(

193

[compression_parent], 'unordered', True).next()

194

if basis_entry.storage_kind == 'absent':

195

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

196

basis_chunks = basis_entry.get_bytes_as('chunked')

197

basis_lines = osutils.chunks_to_lines(basis_chunks)

198

# Manually apply the delta because we have one annotated content and

199

# one plain.

200

basis_content = PlainKnitContent(basis_lines, compression_parent)

201

basis_content.apply_delta(delta, rec[1])

202

basis_content._should_strip_eol = factory._build_details[1]

203

return ''.join(basis_content.text())

204

205

206

class FTPlainToFullText(KnitAdapter):

207

"""An adapter from FT plain knits to unannotated ones."""

208

209

def get_bytes(self, factory):

210

compressed_bytes = factory._raw_record

211

rec, contents = \

212

self._data._parse_record_unchecked(compressed_bytes)

213

content, delta = self._plain_factory.parse_record(factory.key[-1],

214

contents, factory._build_details, None)

215

return ''.join(content.text())

216

217

218

class DeltaPlainToFullText(KnitAdapter):

219

"""An adapter for deltas from annotated to unannotated."""

220

221

def get_bytes(self, factory):

222

compressed_bytes = factory._raw_record

223

rec, contents = \

224

self._data._parse_record_unchecked(compressed_bytes)

225

delta = self._plain_factory.parse_line_delta(contents, rec[1])

226

compression_parent = factory.parents[0]

227

# XXX: string splitting overhead.

228

basis_entry = self._basis_vf.get_record_stream(

229

[compression_parent], 'unordered', True).next()

230

if basis_entry.storage_kind == 'absent':

231

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

232

basis_chunks = basis_entry.get_bytes_as('chunked')

233

basis_lines = osutils.chunks_to_lines(basis_chunks)

234

basis_content = PlainKnitContent(basis_lines, compression_parent)

235

# Manually apply the delta because we have one annotated content and

236

# one plain.

237

content, _ = self._plain_factory.parse_record(rec[1], contents,

238

factory._build_details, basis_content)

239

return ''.join(content.text())

240

241

242

class KnitContentFactory(ContentFactory):

243

"""Content factory for streaming from knits.

244

245

:seealso ContentFactory:

246

"""

247

248

def __init__(self, key, parents, build_details, sha1, raw_record,

249

annotated, knit=None, network_bytes=None):

250

"""Create a KnitContentFactory for key.

251

252

:param key: The key.

253

:param parents: The parents.

254

:param build_details: The build details as returned from

255

get_build_details.

256

:param sha1: The sha1 expected from the full text of this object.

257

:param raw_record: The bytes of the knit data from disk.

258

:param annotated: True if the raw data is annotated.

259

:param network_bytes: None to calculate the network bytes on demand,

260

not-none if they are already known.

261

"""

262

ContentFactory.__init__(self)

263

self.sha1 = sha1

264

self.key = key

265

self.parents = parents

266

if build_details[0] == 'line-delta':

267

kind = 'delta'

268

else:

269

kind = 'ft'

270

if annotated:

271

annotated_kind = 'annotated-'

272

else:

273

annotated_kind = ''

274

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

275

self._raw_record = raw_record

276

self._network_bytes = network_bytes

277

self._build_details = build_details

278

self._knit = knit

279

280

def _create_network_bytes(self):

281

"""Create a fully serialised network version for transmission."""

282

# storage_kind, key, parents, Noeol, raw_record

283

key_bytes = '\x00'.join(self.key)

284

if self.parents is None:

285

parent_bytes = 'None:'

286

else:

287

parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents)

288

if self._build_details[1]:

289

noeol = 'N'

290

else:

291

noeol = ' '

292

network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes,

293

parent_bytes, noeol, self._raw_record)

294

self._network_bytes = network_bytes

295

296

def get_bytes_as(self, storage_kind):

297

if storage_kind == self.storage_kind:

298

if self._network_bytes is None:

299

self._create_network_bytes()

300

return self._network_bytes

301

if self._knit is not None:

302

if storage_kind == 'chunked':

303

return self._knit.get_lines(self.key[0])

304

elif storage_kind == 'fulltext':

305

return self._knit.get_text(self.key[0])

306

raise errors.UnavailableRepresentation(self.key, storage_kind,

307

self.storage_kind)

308

309

310

class LazyKnitContentFactory(ContentFactory):

311

"""A ContentFactory which can either generate full text or a wire form.

312

313

:seealso ContentFactory:

314

"""

315

316

def __init__(self, key, parents, generator, first):

317

"""Create a LazyKnitContentFactory.

318

319

:param key: The key of the record.

320

:param parents: The parents of the record.

321

:param generator: A _ContentMapGenerator containing the record for this

322

key.

323

:param first: Is this the first content object returned from generator?

324

if it is, its storage kind is knit-delta-closure, otherwise it is

325

knit-delta-closure-ref

326

"""

327

self.key = key

328

self.parents = parents

329

self.sha1 = None

330

self._generator = generator

331

self.storage_kind = "knit-delta-closure"

332

if not first:

333

self.storage_kind = self.storage_kind + "-ref"

334

self._first = first

335

336

def get_bytes_as(self, storage_kind):

337

if storage_kind == self.storage_kind:

338

if self._first:

339

return self._generator._wire_bytes()

340

else:

341

# all the keys etc are contained in the bytes returned in the

342

# first record.

343

return ''

344

if storage_kind in ('chunked', 'fulltext'):

345

chunks = self._generator._get_one_work(self.key).text()

346

if storage_kind == 'chunked':

347

return chunks

348

else:

349

return ''.join(chunks)

350

raise errors.UnavailableRepresentation(self.key, storage_kind,

351

self.storage_kind)

352

353

354

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

355

"""Convert a network record to a iterator over stream records.

356

357

:param storage_kind: The storage kind of the record.

358

Must be 'knit-delta-closure'.

359

:param bytes: The bytes of the record on the network.

360

"""

361

generator = _NetworkContentMapGenerator(bytes, line_end)

362

return generator.get_record_stream()

363

364

365

def knit_network_to_record(storage_kind, bytes, line_end):

366

"""Convert a network record to a record object.

367

368

:param storage_kind: The storage kind of the record.

369

:param bytes: The bytes of the record on the network.

370

"""

371

start = line_end

372

line_end = bytes.find('\n', start)

373

key = tuple(bytes[start:line_end].split('\x00'))

374

start = line_end + 1

375

line_end = bytes.find('\n', start)

376

parent_line = bytes[start:line_end]

377

if parent_line == 'None:':

378

parents = None

379

else:

380

parents = tuple(

381

[tuple(segment.split('\x00')) for segment in parent_line.split('\t')

382

if segment])

383

start = line_end + 1

384

noeol = bytes[start] == 'N'

385

if 'ft' in storage_kind:

386

method = 'fulltext'

387

else:

388

method = 'line-delta'

389

build_details = (method, noeol)

390

start = start + 1

391

raw_record = bytes[start:]

392

annotated = 'annotated' in storage_kind

393

return [KnitContentFactory(key, parents, build_details, None, raw_record,

394

annotated, network_bytes=bytes)]

395

396

397

class KnitContent(object):

398

"""Content of a knit version to which deltas can be applied.

399

400

This is always stored in memory as a list of lines with \n at the end,

401

plus a flag saying if the final ending is really there or not, because that

402

corresponds to the on-disk knit representation.

403

"""

404

405

def __init__(self):

406

self._should_strip_eol = False

407

408

def apply_delta(self, delta, new_version_id):

409

"""Apply delta to this object to become new_version_id."""

410

raise NotImplementedError(self.apply_delta)

411

412

def line_delta_iter(self, new_lines):

413

"""Generate line-based delta from this content to new_lines."""

414

new_texts = new_lines.text()

415

old_texts = self.text()

416

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

417

for tag, i1, i2, j1, j2 in s.get_opcodes():

418

if tag == 'equal':

419

continue

420

# ofrom, oto, length, data

421

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

422

423

def line_delta(self, new_lines):

424

return list(self.line_delta_iter(new_lines))

425

426

@staticmethod

427

def get_line_delta_blocks(knit_delta, source, target):

428

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

429

target_len = len(target)

430

s_pos = 0

431

t_pos = 0

432

for s_begin, s_end, t_len, new_text in knit_delta:

433

true_n = s_begin - s_pos

434

n = true_n

435

if n > 0:

436

# knit deltas do not provide reliable info about whether the

437

# last line of a file matches, due to eol handling.

438

if source[s_pos + n -1] != target[t_pos + n -1]:

439

n-=1

440

if n > 0:

441

yield s_pos, t_pos, n

442

t_pos += t_len + true_n

443

s_pos = s_end

444

n = target_len - t_pos

445

if n > 0:

446

if source[s_pos + n -1] != target[t_pos + n -1]:

447

n-=1

448

if n > 0:

449

yield s_pos, t_pos, n

450

yield s_pos + (target_len - t_pos), target_len, 0

451

452

453

class AnnotatedKnitContent(KnitContent):

454

"""Annotated content."""

455

456

def __init__(self, lines):

457

KnitContent.__init__(self)

458

self._lines = lines

459

460

def annotate(self):

461

"""Return a list of (origin, text) for each content line."""

462

lines = self._lines[:]

463

if self._should_strip_eol:

464

origin, last_line = lines[-1]

465

lines[-1] = (origin, last_line.rstrip('\n'))

466

return lines

467

468

def apply_delta(self, delta, new_version_id):

469

"""Apply delta to this object to become new_version_id."""

470

offset = 0

471

lines = self._lines

472

for start, end, count, delta_lines in delta:

473

lines[offset+start:offset+end] = delta_lines

474

offset = offset + (start - end) + count

475

476

def text(self):

477

try:

478

lines = [text for origin, text in self._lines]

479

except ValueError, e:

480

# most commonly (only?) caused by the internal form of the knit

481

# missing annotation information because of a bug - see thread

482

# around 20071015

483

raise KnitCorrupt(self,

484

"line in annotated knit missing annotation information: %s"

485

% (e,))

486

if self._should_strip_eol:

487

lines[-1] = lines[-1].rstrip('\n')

488

return lines

489

490

def copy(self):

491

return AnnotatedKnitContent(self._lines[:])

492

493

494

class PlainKnitContent(KnitContent):

495

"""Unannotated content.

496

497

When annotate[_iter] is called on this content, the same version is reported

498

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

499

objects.

500

"""

501

502

def __init__(self, lines, version_id):

503

KnitContent.__init__(self)

504

self._lines = lines

505

self._version_id = version_id

506

507

def annotate(self):

508

"""Return a list of (origin, text) for each content line."""

509

return [(self._version_id, line) for line in self._lines]

510

511

def apply_delta(self, delta, new_version_id):

512

"""Apply delta to this object to become new_version_id."""

513

offset = 0

514

lines = self._lines

515

for start, end, count, delta_lines in delta:

516

lines[offset+start:offset+end] = delta_lines

517

offset = offset + (start - end) + count

518

self._version_id = new_version_id

519

520

def copy(self):

521

return PlainKnitContent(self._lines[:], self._version_id)

522

523

def text(self):

524

lines = self._lines

525

if self._should_strip_eol:

526

lines = lines[:]

527

lines[-1] = lines[-1].rstrip('\n')

528

return lines

529

530

531

class _KnitFactory(object):

532

"""Base class for common Factory functions."""

533

534

def parse_record(self, version_id, record, record_details,

535

base_content, copy_base_content=True):

536

"""Parse a record into a full content object.

537

538

:param version_id: The official version id for this content

539

:param record: The data returned by read_records_iter()

540

:param record_details: Details about the record returned by

541

get_build_details

542

:param base_content: If get_build_details returns a compression_parent,

543

you must return a base_content here, else use None

544

:param copy_base_content: When building from the base_content, decide

545

you can either copy it and return a new object, or modify it in

546

place.

547

:return: (content, delta) A Content object and possibly a line-delta,

548

delta may be None

549

"""

550

method, noeol = record_details

551

if method == 'line-delta':

552

if copy_base_content:

553

content = base_content.copy()

554

else:

555

content = base_content

556

delta = self.parse_line_delta(record, version_id)

557

content.apply_delta(delta, version_id)

558

else:

559

content = self.parse_fulltext(record, version_id)

560

delta = None

561

content._should_strip_eol = noeol

562

return (content, delta)

563

564

565

class KnitAnnotateFactory(_KnitFactory):

566

"""Factory for creating annotated Content objects."""

567

568

annotated = True

569

570

def make(self, lines, version_id):

571

num_lines = len(lines)

572

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

573

574

def parse_fulltext(self, content, version_id):

575

"""Convert fulltext to internal representation

576

577

fulltext content is of the format

578

revid(utf8) plaintext\n

579

internal representation is of the format:

580

(revid, plaintext)

581

"""

582

# TODO: jam 20070209 The tests expect this to be returned as tuples,

583

# but the code itself doesn't really depend on that.

584

# Figure out a way to not require the overhead of turning the

585

# list back into tuples.

586

lines = [tuple(line.split(' ', 1)) for line in content]

587

return AnnotatedKnitContent(lines)

588

589

def parse_line_delta_iter(self, lines):

590

return iter(self.parse_line_delta(lines))

591

592

def parse_line_delta(self, lines, version_id, plain=False):

593

"""Convert a line based delta into internal representation.

594

595

line delta is in the form of:

596

intstart intend intcount

597

1..count lines:

598

revid(utf8) newline\n

599

internal representation is

600

(start, end, count, [1..count tuples (revid, newline)])

601

602

:param plain: If True, the lines are returned as a plain

603

list without annotations, not as a list of (origin, content) tuples, i.e.

604

(start, end, count, [1..count newline])

605

"""

606

result = []

607

lines = iter(lines)

608

next = lines.next

609

610

cache = {}

611

def cache_and_return(line):

612

origin, text = line.split(' ', 1)

613

return cache.setdefault(origin, origin), text

614

615

# walk through the lines parsing.

616

# Note that the plain test is explicitly pulled out of the

617

# loop to minimise any performance impact

618

if plain:

619

for header in lines:

620

start, end, count = [int(n) for n in header.split(',')]

621

contents = [next().split(' ', 1)[1] for i in xrange(count)]

622

result.append((start, end, count, contents))

623

else:

624

for header in lines:

625

start, end, count = [int(n) for n in header.split(',')]

626

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

627

result.append((start, end, count, contents))

628

return result

629

630

def get_fulltext_content(self, lines):

631

"""Extract just the content lines from a fulltext."""

632

return (line.split(' ', 1)[1] for line in lines)

633

634

def get_linedelta_content(self, lines):

635

"""Extract just the content from a line delta.

636

637

This doesn't return all of the extra information stored in a delta.

638

Only the actual content lines.

639

"""

640

lines = iter(lines)

641

next = lines.next

642

for header in lines:

643

header = header.split(',')

644

count = int(header[2])

645

for i in xrange(count):

646

origin, text = next().split(' ', 1)

647

yield text

648

649

def lower_fulltext(self, content):

650

"""convert a fulltext content record into a serializable form.

651

652

see parse_fulltext which this inverts.

653

"""

654

# TODO: jam 20070209 We only do the caching thing to make sure that

655

# the origin is a valid utf-8 line, eventually we could remove it

656

return ['%s %s' % (o, t) for o, t in content._lines]

657

658

def lower_line_delta(self, delta):

659

"""convert a delta into a serializable form.

660

661

See parse_line_delta which this inverts.

662

"""

663

# TODO: jam 20070209 We only do the caching thing to make sure that

664

# the origin is a valid utf-8 line, eventually we could remove it

665

out = []

666

for start, end, c, lines in delta:

667

out.append('%d,%d,%d\n' % (start, end, c))

668

out.extend(origin + ' ' + text

669

for origin, text in lines)

670

return out

671

672

def annotate(self, knit, key):

673

content = knit._get_content(key)

674

# adjust for the fact that serialised annotations are only key suffixes

675

# for this factory.

676

if type(key) == tuple:

677

prefix = key[:-1]

678

origins = content.annotate()

679

result = []

680

for origin, line in origins:

681

result.append((prefix + (origin,), line))

682

return result

683

else:

684

# XXX: This smells a bit. Why would key ever be a non-tuple here?

685

# Aren't keys defined to be tuples? -- spiv 20080618

686

return content.annotate()

687

688

689

class KnitPlainFactory(_KnitFactory):

690

"""Factory for creating plain Content objects."""

691

692

annotated = False

693

694

def make(self, lines, version_id):

695

return PlainKnitContent(lines, version_id)

696

697

def parse_fulltext(self, content, version_id):

698

"""This parses an unannotated fulltext.

699

700

Note that this is not a noop - the internal representation

701

has (versionid, line) - its just a constant versionid.

702

"""

703

return self.make(content, version_id)

704

705

def parse_line_delta_iter(self, lines, version_id):

706

cur = 0

707

num_lines = len(lines)

708

while cur < num_lines:

709

header = lines[cur]

710

cur += 1

711

start, end, c = [int(n) for n in header.split(',')]

712

yield start, end, c, lines[cur:cur+c]

713

cur += c

714

715

def parse_line_delta(self, lines, version_id):

716

return list(self.parse_line_delta_iter(lines, version_id))

717

718

def get_fulltext_content(self, lines):

719

"""Extract just the content lines from a fulltext."""

720

return iter(lines)

721

722

def get_linedelta_content(self, lines):

723

"""Extract just the content from a line delta.

724

725

This doesn't return all of the extra information stored in a delta.

726

Only the actual content lines.

727

"""

728

lines = iter(lines)

729

next = lines.next

730

for header in lines:

731

header = header.split(',')

732

count = int(header[2])

733

for i in xrange(count):

734

yield next()

735

736

def lower_fulltext(self, content):

737

return content.text()

738

739

def lower_line_delta(self, delta):

740

out = []

741

for start, end, c, lines in delta:

742

out.append('%d,%d,%d\n' % (start, end, c))

743

out.extend(lines)

744

return out

745

746

def annotate(self, knit, key):

747

annotator = _KnitAnnotator(knit)

748

return annotator.annotate(key)

749

750

751

752

def make_file_factory(annotated, mapper):

753

"""Create a factory for creating a file based KnitVersionedFiles.

754

755

This is only functional enough to run interface tests, it doesn't try to

756

provide a full pack environment.

757

758

:param annotated: knit annotations are wanted.

759

:param mapper: The mapper from keys to paths.

760

"""

761

def factory(transport):

762

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

763

access = _KnitKeyAccess(transport, mapper)

764

return KnitVersionedFiles(index, access, annotated=annotated)

765

return factory

766

767

768

def make_pack_factory(graph, delta, keylength):

769

"""Create a factory for creating a pack based VersionedFiles.

770

771

This is only functional enough to run interface tests, it doesn't try to

772

provide a full pack environment.

773

774

:param graph: Store a graph.

775

:param delta: Delta compress contents.

776

:param keylength: How long should keys be.

777

"""

778

def factory(transport):

779

parents = graph or delta

780

ref_length = 0

781

if graph:

782

ref_length += 1

783

if delta:

784

ref_length += 1

785

max_delta_chain = 200

786

else:

787

max_delta_chain = 0

788

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

789

key_elements=keylength)

790

stream = transport.open_write_stream('newpack')

791

writer = pack.ContainerWriter(stream.write)

792

writer.begin()

793

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

794

deltas=delta, add_callback=graph_index.add_nodes)

795

access = _DirectPackAccess({})

796

access.set_writer(writer, graph_index, (transport, 'newpack'))

797

result = KnitVersionedFiles(index, access,

798

max_delta_chain=max_delta_chain)

799

result.stream = stream

800

result.writer = writer

801

return result

802

return factory

803

804

805

def cleanup_pack_knit(versioned_files):

806

versioned_files.stream.close()

807

versioned_files.writer.end()

808

809

810

def _get_total_build_size(self, keys, positions):

811

"""Determine the total bytes to build these keys.

812

813

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

814

don't inherit from a common base.)

815

816

:param keys: Keys that we want to build

817

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

818

as returned by _get_components_positions)

819

:return: Number of bytes to build those keys

820

"""

821

all_build_index_memos = {}

822

build_keys = keys

823

while build_keys:

824

next_keys = set()

825

for key in build_keys:

826

# This is mostly for the 'stacked' case

827

# Where we will be getting the data from a fallback

828

if key not in positions:

829

continue

830

_, index_memo, compression_parent = positions[key]

831

all_build_index_memos[key] = index_memo

832

if compression_parent not in all_build_index_memos:

833

next_keys.add(compression_parent)

834

build_keys = next_keys

835

return sum([index_memo[2] for index_memo

836

in all_build_index_memos.itervalues()])

837

838

839

class KnitVersionedFiles(VersionedFiles):

840

"""Storage for many versioned files using knit compression.

841

842

Backend storage is managed by indices and data objects.

843

844

:ivar _index: A _KnitGraphIndex or similar that can describe the

845

parents, graph, compression and data location of entries in this

846

KnitVersionedFiles. Note that this is only the index for

847

*this* vfs; if there are fallbacks they must be queried separately.

848

"""

849

850

def __init__(self, index, data_access, max_delta_chain=200,

851

annotated=False, reload_func=None):

852

"""Create a KnitVersionedFiles with index and data_access.

853

854

:param index: The index for the knit data.

855

:param data_access: The access object to store and retrieve knit

856

records.

857

:param max_delta_chain: The maximum number of deltas to permit during

858

insertion. Set to 0 to prohibit the use of deltas.

859

:param annotated: Set to True to cause annotations to be calculated and

860

stored during insertion.

861

:param reload_func: An function that can be called if we think we need

862

to reload the pack listing and try again. See

863

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

864

"""

865

self._index = index

866

self._access = data_access

867

self._max_delta_chain = max_delta_chain

868

if annotated:

869

self._factory = KnitAnnotateFactory()

870

else:

871

self._factory = KnitPlainFactory()

872

self._fallback_vfs = []

873

self._reload_func = reload_func

874

875

def __repr__(self):

876

return "%s(%r, %r)" % (

877

self.__class__.__name__,

878

self._index,

879

self._access)

880

881

def add_fallback_versioned_files(self, a_versioned_files):

882

"""Add a source of texts for texts not present in this knit.

883

884

:param a_versioned_files: A VersionedFiles object.

885

"""

886

self._fallback_vfs.append(a_versioned_files)

887

888

def add_lines(self, key, parents, lines, parent_texts=None,

889

left_matching_blocks=None, nostore_sha=None, random_id=False,

890

check_content=True):

891

"""See VersionedFiles.add_lines()."""

892

self._index._check_write_ok()

893

self._check_add(key, lines, random_id, check_content)

894

if parents is None:

895

# The caller might pass None if there is no graph data, but kndx

896

# indexes can't directly store that, so we give them

897

# an empty tuple instead.

898

parents = ()

899

return self._add(key, lines, parents,

900

parent_texts, left_matching_blocks, nostore_sha, random_id)

901

902

def _add(self, key, lines, parents, parent_texts,

903

left_matching_blocks, nostore_sha, random_id):

904

"""Add a set of lines on top of version specified by parents.

905

906

Any versions not present will be converted into ghosts.

907

"""

908

# first thing, if the content is something we don't need to store, find

909

# that out.

910

line_bytes = ''.join(lines)

911

digest = sha_string(line_bytes)

912

if nostore_sha == digest:

913

raise errors.ExistingContent

914

915

present_parents = []

916

if parent_texts is None:

917

parent_texts = {}

918

# Do a single query to ascertain parent presence; we only compress

919

# against parents in the same kvf.

920

present_parent_map = self._index.get_parent_map(parents)

921

for parent in parents:

922

if parent in present_parent_map:

923

present_parents.append(parent)

924

925

# Currently we can only compress against the left most present parent.

926

if (len(present_parents) == 0 or

927

present_parents[0] != parents[0]):

928

delta = False

929

else:

930

# To speed the extract of texts the delta chain is limited

931

# to a fixed number of deltas. This should minimize both

932

# I/O and the time spend applying deltas.

933

delta = self._check_should_delta(present_parents[0])

934

935

text_length = len(line_bytes)

936

options = []

937

if lines:

938

if lines[-1][-1] != '\n':

939

# copy the contents of lines.

940

lines = lines[:]

941

options.append('no-eol')

942

lines[-1] = lines[-1] + '\n'

943

line_bytes += '\n'

944

945

for element in key:

946

if type(element) != str:

947

raise TypeError("key contains non-strings: %r" % (key,))

948

# Knit hunks are still last-element only

949

version_id = key[-1]

950

content = self._factory.make(lines, version_id)

951

if 'no-eol' in options:

952

# Hint to the content object that its text() call should strip the

953

# EOL.

954

content._should_strip_eol = True

955

if delta or (self._factory.annotated and len(present_parents) > 0):

956

# Merge annotations from parent texts if needed.

957

delta_hunks = self._merge_annotations(content, present_parents,

958

parent_texts, delta, self._factory.annotated,

959

left_matching_blocks)

960

961

if delta:

962

options.append('line-delta')

963

store_lines = self._factory.lower_line_delta(delta_hunks)

964

size, bytes = self._record_to_data(key, digest,

965

store_lines)

966

else:

967

options.append('fulltext')

968

# isinstance is slower and we have no hierarchy.

969

if self._factory.__class__ is KnitPlainFactory:

970

# Use the already joined bytes saving iteration time in

971

# _record_to_data.

972

size, bytes = self._record_to_data(key, digest,

973

lines, [line_bytes])

974

else:

975

# get mixed annotation + content and feed it into the

976

# serialiser.

977

store_lines = self._factory.lower_fulltext(content)

978

size, bytes = self._record_to_data(key, digest,

979

store_lines)

980

981

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

982

self._index.add_records(

983

((key, options, access_memo, parents),),

984

random_id=random_id)

985

return digest, text_length, content

986

987

def annotate(self, key):

988

"""See VersionedFiles.annotate."""

989

return self._factory.annotate(self, key)

990

991

def check(self, progress_bar=None):

992

"""See VersionedFiles.check()."""

993

# This doesn't actually test extraction of everything, but that will

994

# impact 'bzr check' substantially, and needs to be integrated with

995

# care. However, it does check for the obvious problem of a delta with

996

# no basis.

997

keys = self._index.keys()

998

parent_map = self.get_parent_map(keys)

999

for key in keys:

1000

if self._index.get_method(key) != 'fulltext':

1001

compression_parent = parent_map[key][0]

1002

if compression_parent not in parent_map:

1003

raise errors.KnitCorrupt(self,

1004

"Missing basis parent %s for %s" % (

1005

compression_parent, key))

1006

for fallback_vfs in self._fallback_vfs:

1007

fallback_vfs.check()

1008

1009

def _check_add(self, key, lines, random_id, check_content):

1010

"""check that version_id and lines are safe to add."""

1011

version_id = key[-1]

1012

if contains_whitespace(version_id):

1013

raise InvalidRevisionId(version_id, self)

1014

self.check_not_reserved_id(version_id)

1015

# TODO: If random_id==False and the key is already present, we should

1016

# probably check that the existing content is identical to what is

1017

# being inserted, and otherwise raise an exception. This would make

1018

# the bundle code simpler.

1019

if check_content:

1020

self._check_lines_not_unicode(lines)

1021

self._check_lines_are_lines(lines)

1022

1023

def _check_header(self, key, line):

1024

rec = self._split_header(line)

1025

self._check_header_version(rec, key[-1])

1026

return rec

1027

1028

def _check_header_version(self, rec, version_id):

1029

"""Checks the header version on original format knit records.

1030

1031

These have the last component of the key embedded in the record.

1032

"""

1033

if rec[1] != version_id:

1034

raise KnitCorrupt(self,

1035

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

1036

1037

def _check_should_delta(self, parent):

1038

"""Iterate back through the parent listing, looking for a fulltext.

1039

1040

This is used when we want to decide whether to add a delta or a new

1041

fulltext. It searches for _max_delta_chain parents. When it finds a

1042

fulltext parent, it sees if the total size of the deltas leading up to

1043

it is large enough to indicate that we want a new full text anyway.

1044

1045

Return True if we should create a new delta, False if we should use a

1046

full text.

1047

"""

1048

delta_size = 0

1049

fulltext_size = None

1050

for count in xrange(self._max_delta_chain):

1051

try:

1052

# Note that this only looks in the index of this particular

1053

# KnitVersionedFiles, not in the fallbacks. This ensures that

1054

# we won't store a delta spanning physical repository

1055

# boundaries.

1056

build_details = self._index.get_build_details([parent])

1057

parent_details = build_details[parent]

1058

except (RevisionNotPresent, KeyError), e:

1059

# Some basis is not locally present: always fulltext

1060

return False

1061

index_memo, compression_parent, _, _ = parent_details

1062

_, _, size = index_memo

1063

if compression_parent is None:

1064

fulltext_size = size

1065

break

1066

delta_size += size

1067

# We don't explicitly check for presence because this is in an

1068

# inner loop, and if it's missing it'll fail anyhow.

1069

parent = compression_parent

1070

else:

1071

# We couldn't find a fulltext, so we must create a new one

1072

return False

1073

# Simple heuristic - if the total I/O wold be greater as a delta than

1074

# the originally installed fulltext, we create a new fulltext.

1075

return fulltext_size > delta_size

1076

1077

def _build_details_to_components(self, build_details):

1078

"""Convert a build_details tuple to a position tuple."""

1079

# record_details, access_memo, compression_parent

1080

return build_details[3], build_details[0], build_details[1]

1081

1082

def _get_components_positions(self, keys, allow_missing=False):

1083

"""Produce a map of position data for the components of keys.

1084

1085

This data is intended to be used for retrieving the knit records.

1086

1087

A dict of key to (record_details, index_memo, next, parents) is

1088

returned.

1089

method is the way referenced data should be applied.

1090

index_memo is the handle to pass to the data access to actually get the

1091

data

1092

next is the build-parent of the version, or None for fulltexts.

1093

parents is the version_ids of the parents of this version

1094

1095

:param allow_missing: If True do not raise an error on a missing component,

1096

just ignore it.

1097

"""

1098

component_data = {}

1099

pending_components = keys

1100

while pending_components:

1101

build_details = self._index.get_build_details(pending_components)

1102

current_components = set(pending_components)

1103

pending_components = set()

1104

for key, details in build_details.iteritems():

1105

(index_memo, compression_parent, parents,

1106

record_details) = details

1107

method = record_details[0]

1108

if compression_parent is not None:

1109

pending_components.add(compression_parent)

1110

component_data[key] = self._build_details_to_components(details)

1111

missing = current_components.difference(build_details)

1112

if missing and not allow_missing:

1113

raise errors.RevisionNotPresent(missing.pop(), self)

1114

return component_data

1115

1116

def _get_content(self, key, parent_texts={}):

1117

"""Returns a content object that makes up the specified

1118

version."""

1119

cached_version = parent_texts.get(key, None)

1120

if cached_version is not None:

1121

# Ensure the cache dict is valid.

1122

if not self.get_parent_map([key]):

1123

raise RevisionNotPresent(key, self)

1124

return cached_version

1125

generator = _VFContentMapGenerator(self, [key])

1126

return generator._get_content(key)

1127

1128

def get_parent_map(self, keys):

1129

"""Get a map of the graph parents of keys.

1130

1131

:param keys: The keys to look up parents for.

1132

:return: A mapping from keys to parents. Absent keys are absent from

1133

the mapping.

1134

"""

1135

return self._get_parent_map_with_sources(keys)[0]

1136

1137

def _get_parent_map_with_sources(self, keys):

1138

"""Get a map of the parents of keys.

1139

1140

:param keys: The keys to look up parents for.

1141

:return: A tuple. The first element is a mapping from keys to parents.

1142

Absent keys are absent from the mapping. The second element is a

1143

list with the locations each key was found in. The first element

1144

is the in-this-knit parents, the second the first fallback source,

1145

and so on.

1146

"""

1147

result = {}

1148

sources = [self._index] + self._fallback_vfs

1149

source_results = []

1150

missing = set(keys)

1151

for source in sources:

1152

if not missing:

1153

break

1154

new_result = source.get_parent_map(missing)

1155

source_results.append(new_result)

1156

result.update(new_result)

1157

missing.difference_update(set(new_result))

1158

return result, source_results

1159

1160

def _get_record_map(self, keys, allow_missing=False):

1161

"""Produce a dictionary of knit records.

1162

1163

:return: {key:(record, record_details, digest, next)}

1164

record

1165

data returned from read_records (a KnitContentobject)

1166

record_details

1167

opaque information to pass to parse_record

1168

digest

1169

SHA1 digest of the full text after all steps are done

1170

1171

build-parent of the version, i.e. the leftmost ancestor.

1172

Will be None if the record is not a delta.

1173

:param keys: The keys to build a map for

1174

:param allow_missing: If some records are missing, rather than

1175

error, just return the data that could be generated.

1176

"""

1177

raw_map = self._get_record_map_unparsed(keys,

1178

allow_missing=allow_missing)

1179

return self._raw_map_to_record_map(raw_map)

1180

1181

def _raw_map_to_record_map(self, raw_map):

1182

"""Parse the contents of _get_record_map_unparsed.

1183

1184

:return: see _get_record_map.

1185

"""

1186

result = {}

1187

for key in raw_map:

1188

data, record_details, next = raw_map[key]

1189

content, digest = self._parse_record(key[-1], data)

1190

result[key] = content, record_details, digest, next

1191

return result

1192

1193

def _get_record_map_unparsed(self, keys, allow_missing=False):

1194

"""Get the raw data for reconstructing keys without parsing it.

1195

1196

:return: A dict suitable for parsing via _raw_map_to_record_map.

1197

key-> raw_bytes, (method, noeol), compression_parent

1198

"""

1199

# This retries the whole request if anything fails. Potentially we

1200

# could be a bit more selective. We could track the keys whose records

1201

# we have successfully found, and then only request the new records

1202

# from there. However, _get_components_positions grabs the whole build

1203

# chain, which means we'll likely try to grab the same records again

1204

# anyway. Also, can the build chains change as part of a pack

1205

# operation? We wouldn't want to end up with a broken chain.

1206

while True:

1207

try:

1208

position_map = self._get_components_positions(keys,

1209

allow_missing=allow_missing)

1210

# key = component_id, r = record_details, i_m = index_memo,

1211

# n = next

1212

records = [(key, i_m) for key, (r, i_m, n)

1213

in position_map.iteritems()]

1214

# Sort by the index memo, so that we request records from the

1215

# same pack file together, and in forward-sorted order

1216

records.sort(key=operator.itemgetter(1))

1217

raw_record_map = {}

1218

for key, data in self._read_records_iter_unchecked(records):

1219

(record_details, index_memo, next) = position_map[key]

1220

raw_record_map[key] = data, record_details, next

1221

return raw_record_map

1222

except errors.RetryWithNewPacks, e:

1223

self._access.reload_or_raise(e)

1224

1225

@classmethod

1226

def _split_by_prefix(cls, keys):

1227

"""For the given keys, split them up based on their prefix.

1228

1229

To keep memory pressure somewhat under control, split the

1230

requests back into per-file-id requests, otherwise "bzr co"

1231

extracts the full tree into memory before writing it to disk.

1232

This should be revisited if _get_content_maps() can ever cross

1233

file-id boundaries.

1234

1235

The keys for a given file_id are kept in the same relative order.

1236

Ordering between file_ids is not, though prefix_order will return the

1237

order that the key was first seen.

1238

1239

:param keys: An iterable of key tuples

1240

:return: (split_map, prefix_order)

1241

split_map A dictionary mapping prefix => keys

1242

prefix_order The order that we saw the various prefixes

1243

"""

1244

split_by_prefix = {}

1245

prefix_order = []

1246

for key in keys:

1247

if len(key) == 1:

1248

prefix = ''

1249

else:

1250

prefix = key[0]

1251

1252

if prefix in split_by_prefix:

1253

split_by_prefix[prefix].append(key)

1254

else:

1255

split_by_prefix[prefix] = [key]

1256

prefix_order.append(prefix)

1257

return split_by_prefix, prefix_order

1258

1259

def _group_keys_for_io(self, keys, non_local_keys, positions,

1260

_min_buffer_size=_STREAM_MIN_BUFFER_SIZE):

1261

"""For the given keys, group them into 'best-sized' requests.

1262

1263

The idea is to avoid making 1 request per file, but to never try to

1264

unpack an entire 1.5GB source tree in a single pass. Also when

1265

possible, we should try to group requests to the same pack file

1266

together.

1267

1268

:return: list of (keys, non_local) tuples that indicate what keys

1269

should be fetched next.

1270

"""

1271

# TODO: Ideally we would group on 2 factors. We want to extract texts

1272

# from the same pack file together, and we want to extract all

1273

# the texts for a given build-chain together. Ultimately it

1274

# probably needs a better global view.

1275

total_keys = len(keys)

1276

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1277

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1278

cur_keys = []

1279

cur_non_local = set()

1280

cur_size = 0

1281

result = []

1282

sizes = []

1283

for prefix in prefix_order:

1284

keys = prefix_split_keys[prefix]

1285

non_local = prefix_split_non_local_keys.get(prefix, [])

1286

1287

this_size = self._index._get_total_build_size(keys, positions)

1288

cur_size += this_size

1289

cur_keys.extend(keys)

1290

cur_non_local.update(non_local)

1291

if cur_size > _min_buffer_size:

1292

result.append((cur_keys, cur_non_local))

1293

sizes.append(cur_size)

1294

cur_keys = []

1295

cur_non_local = set()

1296

cur_size = 0

1297

if cur_keys:

1298

result.append((cur_keys, cur_non_local))

1299

sizes.append(cur_size)

1300

return result

1301

1302

def get_record_stream(self, keys, ordering, include_delta_closure):

1303

"""Get a stream of records for keys.

1304

1305

:param keys: The keys to include.

1306

:param ordering: Either 'unordered' or 'topological'. A topologically

1307

sorted stream has compression parents strictly before their

1308

children.

1309

:param include_delta_closure: If True then the closure across any

1310

compression parents will be included (in the opaque data).

1311

:return: An iterator of ContentFactory objects, each of which is only

1312

valid until the iterator is advanced.

1313

"""

1314

# keys might be a generator

1315

keys = set(keys)

1316

if not keys:

1317

return

1318

if not self._index.has_graph:

1319

# Cannot topological order when no graph has been stored.

1320

ordering = 'unordered'

1321

1322

remaining_keys = keys

1323

while True:

1324

try:

1325

keys = set(remaining_keys)

1326

for content_factory in self._get_remaining_record_stream(keys,

1327

ordering, include_delta_closure):

1328

remaining_keys.discard(content_factory.key)

1329

yield content_factory

1330

return

1331

except errors.RetryWithNewPacks, e:

1332

self._access.reload_or_raise(e)

1333

1334

def _get_remaining_record_stream(self, keys, ordering,

1335

include_delta_closure):

1336

"""This function is the 'retry' portion for get_record_stream."""

1337

if include_delta_closure:

1338

positions = self._get_components_positions(keys, allow_missing=True)

1339

else:

1340

build_details = self._index.get_build_details(keys)

1341

# map from key to

1342

# (record_details, access_memo, compression_parent_key)

1343

positions = dict((key, self._build_details_to_components(details))

1344

for key, details in build_details.iteritems())

1345

absent_keys = keys.difference(set(positions))

1346

# There may be more absent keys : if we're missing the basis component

1347

# and are trying to include the delta closure.

1348

# XXX: We should not ever need to examine remote sources because we do

1349

# not permit deltas across versioned files boundaries.

1350

if include_delta_closure:

1351

needed_from_fallback = set()

1352

# Build up reconstructable_keys dict. key:True in this dict means

1353

# the key can be reconstructed.

1354

reconstructable_keys = {}

1355

for key in keys:

1356

# the delta chain

1357

try:

1358

chain = [key, positions[key][2]]

1359

except KeyError:

1360

needed_from_fallback.add(key)

1361

continue

1362

result = True

1363

while chain[-1] is not None:

1364

if chain[-1] in reconstructable_keys:

1365

result = reconstructable_keys[chain[-1]]

1366

break

1367

else:

1368

try:

1369

chain.append(positions[chain[-1]][2])

1370

except KeyError:

1371

# missing basis component

1372

needed_from_fallback.add(chain[-1])

1373

result = True

1374

break

1375

for chain_key in chain[:-1]:

1376

reconstructable_keys[chain_key] = result

1377

if not result:

1378

needed_from_fallback.add(key)

1379

# Double index lookups here : need a unified api ?

1380

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1381

if ordering == 'topological':

1382

# Global topological sort

1383

present_keys = tsort.topo_sort(global_map)

1384

# Now group by source:

1385

source_keys = []

1386

current_source = None

1387

for key in present_keys:

1388

for parent_map in parent_maps:

1389

if key in parent_map:

1390

key_source = parent_map

1391

break

1392

if current_source is not key_source:

1393

source_keys.append((key_source, []))

1394

current_source = key_source

1395

source_keys[-1][1].append(key)

1396

else:

1397

if ordering != 'unordered':

1398

raise AssertionError('valid values for ordering are:'

1399

' "unordered" or "topological" not: %r'

1400

% (ordering,))

1401

# Just group by source; remote sources first.

1402

present_keys = []

1403

source_keys = []

1404

for parent_map in reversed(parent_maps):

1405

source_keys.append((parent_map, []))

1406

for key in parent_map:

1407

present_keys.append(key)

1408

source_keys[-1][1].append(key)

1409

# We have been requested to return these records in an order that

1410

# suits us. So we ask the index to give us an optimally sorted

1411

# order.

1412

for source, sub_keys in source_keys:

1413

if source is parent_maps[0]:

1414

# Only sort the keys for this VF

1415

self._index._sort_keys_by_io(sub_keys, positions)

1416

absent_keys = keys - set(global_map)

1417

for key in absent_keys:

1418

yield AbsentContentFactory(key)

1419

# restrict our view to the keys we can answer.

1420

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1421

# XXX: At that point we need to consider the impact of double reads by

1422

# utilising components multiple times.

1423

if include_delta_closure:

1424

# XXX: get_content_maps performs its own index queries; allow state

1425

# to be passed in.

1426

non_local_keys = needed_from_fallback - absent_keys

1427

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1428

non_local_keys,

1429

positions):

1430

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1431

global_map)

1432

for record in generator.get_record_stream():

1433

yield record

1434

else:

1435

for source, keys in source_keys:

1436

if source is parent_maps[0]:

1437

# this KnitVersionedFiles

1438

records = [(key, positions[key][1]) for key in keys]

1439

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1440

(record_details, index_memo, _) = positions[key]

1441

yield KnitContentFactory(key, global_map[key],

1442

record_details, sha1, raw_data, self._factory.annotated, None)

1443

else:

1444

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1445

for record in vf.get_record_stream(keys, ordering,

1446

include_delta_closure):

1447

yield record

1448

1449

def get_sha1s(self, keys):

1450

"""See VersionedFiles.get_sha1s()."""

1451

missing = set(keys)

1452

record_map = self._get_record_map(missing, allow_missing=True)

1453

result = {}

1454

for key, details in record_map.iteritems():

1455

if key not in missing:

1456

continue

1457

# record entry 2 is the 'digest'.

1458

result[key] = details[2]

1459

missing.difference_update(set(result))

1460

for source in self._fallback_vfs:

1461

if not missing:

1462

break

1463

new_result = source.get_sha1s(missing)

1464

result.update(new_result)

1465

missing.difference_update(set(new_result))

1466

return result

1467

1468

def insert_record_stream(self, stream):

1469

"""Insert a record stream into this container.

1470

1471

:param stream: A stream of records to insert.

1472

:return: None

1473

:seealso VersionedFiles.get_record_stream:

1474

"""

1475

def get_adapter(adapter_key):

1476

try:

1477

return adapters[adapter_key]

1478

except KeyError:

1479

adapter_factory = adapter_registry.get(adapter_key)

1480

adapter = adapter_factory(self)

1481

adapters[adapter_key] = adapter

1482

return adapter

1483

delta_types = set()

1484

if self._factory.annotated:

1485

# self is annotated, we need annotated knits to use directly.

1486

annotated = "annotated-"

1487

convertibles = []

1488

else:

1489

# self is not annotated, but we can strip annotations cheaply.

1490

annotated = ""

1491

convertibles = set(["knit-annotated-ft-gz"])

1492

if self._max_delta_chain:

1493

delta_types.add("knit-annotated-delta-gz")

1494

convertibles.add("knit-annotated-delta-gz")

1495

# The set of types we can cheaply adapt without needing basis texts.

1496

native_types = set()

1497

if self._max_delta_chain:

1498

native_types.add("knit-%sdelta-gz" % annotated)

1499

delta_types.add("knit-%sdelta-gz" % annotated)

1500

native_types.add("knit-%sft-gz" % annotated)

1501

knit_types = native_types.union(convertibles)

1502

adapters = {}

1503

# Buffer all index entries that we can't add immediately because their

1504

# basis parent is missing. We don't buffer all because generating

1505

# annotations may require access to some of the new records. However we

1506

# can't generate annotations from new deltas until their basis parent

1507

# is present anyway, so we get away with not needing an index that

1508

# includes the new keys.

1509

1510

# See <http://launchpad.net/bugs/300177> about ordering of compression

1511

# parents in the records - to be conservative, we insist that all

1512

# parents must be present to avoid expanding to a fulltext.

1513

1514

# key = basis_parent, value = index entry to add

1515

buffered_index_entries = {}

1516

for record in stream:

1517

buffered = False

1518

parents = record.parents

1519

if record.storage_kind in delta_types:

1520

# TODO: eventually the record itself should track

1521

# compression_parent

1522

compression_parent = parents[0]

1523

else:

1524

compression_parent = None

1525

# Raise an error when a record is missing.

1526

if record.storage_kind == 'absent':

1527

raise RevisionNotPresent([record.key], self)

1528

elif ((record.storage_kind in knit_types)

1529

and (compression_parent is None

1530

or not self._fallback_vfs

1531

or self._index.has_key(compression_parent)

1532

or not self.has_key(compression_parent))):

1533

# we can insert the knit record literally if either it has no

1534

# compression parent OR we already have its basis in this kvf

1535

# OR the basis is not present even in the fallbacks. In the

1536

# last case it will either turn up later in the stream and all

1537

# will be well, or it won't turn up at all and we'll raise an

1538

# error at the end.

1539

1540

# TODO: self.has_key is somewhat redundant with

1541

# self._index.has_key; we really want something that directly

1542

# asks if it's only present in the fallbacks. -- mbp 20081119

1543

if record.storage_kind not in native_types:

1544

try:

1545

adapter_key = (record.storage_kind, "knit-delta-gz")

1546

adapter = get_adapter(adapter_key)

1547

except KeyError:

1548

adapter_key = (record.storage_kind, "knit-ft-gz")

1549

adapter = get_adapter(adapter_key)

1550

bytes = adapter.get_bytes(record)

1551

else:

1552

# It's a knit record, it has a _raw_record field (even if

1553

# it was reconstituted from a network stream).

1554

bytes = record._raw_record

1555

options = [record._build_details[0]]

1556

if record._build_details[1]:

1557

options.append('no-eol')

1558

# Just blat it across.

1559

# Note: This does end up adding data on duplicate keys. As

1560

# modern repositories use atomic insertions this should not

1561

# lead to excessive growth in the event of interrupted fetches.

1562

# 'knit' repositories may suffer excessive growth, but as a

1563

# deprecated format this is tolerable. It can be fixed if

1564

# needed by in the kndx index support raising on a duplicate

1565

# add with identical parents and options.

1566

access_memo = self._access.add_raw_records(

1567

[(record.key, len(bytes))], bytes)[0]

1568

index_entry = (record.key, options, access_memo, parents)

1569

if 'fulltext' not in options:

1570

# Not a fulltext, so we need to make sure the compression

1571

# parent will also be present.

1572

# Note that pack backed knits don't need to buffer here

1573

# because they buffer all writes to the transaction level,

1574

# but we don't expose that difference at the index level. If

1575

# the query here has sufficient cost to show up in

1576

# profiling we should do that.

1577

1578

# They're required to be physically in this

1579

# KnitVersionedFiles, not in a fallback.

1580

if not self._index.has_key(compression_parent):

1581

pending = buffered_index_entries.setdefault(

1582

compression_parent, [])

1583

pending.append(index_entry)

1584

buffered = True

1585

if not buffered:

1586

self._index.add_records([index_entry])

1587

elif record.storage_kind == 'chunked':

1588

self.add_lines(record.key, parents,

1589

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1590

else:

1591

# Not suitable for direct insertion as a

1592

# delta, either because it's not the right format, or this

1593

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1594

# 0) or because it depends on a base only present in the

1595

# fallback kvfs.

1596

try:

1597

# Try getting a fulltext directly from the record.

1598

bytes = record.get_bytes_as('fulltext')

1599

except errors.UnavailableRepresentation:

1600

adapter_key = record.storage_kind, 'fulltext'

1601

adapter = get_adapter(adapter_key)

1602

bytes = adapter.get_bytes(record)

1603

lines = split_lines(bytes)

1604

try:

1605

self.add_lines(record.key, parents, lines)

1606

except errors.RevisionAlreadyPresent:

1607

pass

1608

# Add any records whose basis parent is now available.

1609

if not buffered:

1610

added_keys = [record.key]

1611

while added_keys:

1612

key = added_keys.pop(0)

1613

if key in buffered_index_entries:

1614

index_entries = buffered_index_entries[key]

1615

self._index.add_records(index_entries)

1616

added_keys.extend(

1617

[index_entry[0] for index_entry in index_entries])

1618

del buffered_index_entries[key]

1619

if buffered_index_entries:

1620

# There were index entries buffered at the end of the stream,

1621

# So these need to be added (if the index supports holding such

1622

# entries for later insertion)

1623

for key in buffered_index_entries:

1624

index_entries = buffered_index_entries[key]

1625

self._index.add_records(index_entries,

1626

missing_compression_parents=True)

1627

1628

def get_missing_compression_parent_keys(self):

1629

"""Return an iterable of keys of missing compression parents.

1630

1631

Check this after calling insert_record_stream to find out if there are

1632

any missing compression parents. If there are, the records that

1633

depend on them are not able to be inserted safely. For atomic

1634

KnitVersionedFiles built on packs, the transaction should be aborted or

1635

suspended - commit will fail at this point. Nonatomic knits will error

1636

earlier because they have no staging area to put pending entries into.

1637

"""

1638

return self._index.get_missing_compression_parents()

1639

1640

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1641

"""Iterate over the lines in the versioned files from keys.

1642

1643

This may return lines from other keys. Each item the returned

1644

iterator yields is a tuple of a line and a text version that that line

1645

is present in (not introduced in).

1646

1647

Ordering of results is in whatever order is most suitable for the

1648

underlying storage format.

1649

1650

If a progress bar is supplied, it may be used to indicate progress.

1651

The caller is responsible for cleaning up progress bars (because this

1652

is an iterator).

1653

1654

NOTES:

1655

* Lines are normalised by the underlying store: they will all have \\n

1656

terminators.

1657

* Lines are returned in arbitrary order.

1658

* If a requested key did not change any lines (or didn't have any

1659

lines), it may not be mentioned at all in the result.

1660

1661

:return: An iterator over (line, key).

1662

"""

1663

if pb is None:

1664

pb = progress.DummyProgress()

1665

keys = set(keys)

1666

total = len(keys)

1667

done = False

1668

while not done:

1669

try:

1670

# we don't care about inclusions, the caller cares.

1671

# but we need to setup a list of records to visit.

1672

# we need key, position, length

1673

key_records = []

1674

build_details = self._index.get_build_details(keys)

1675

for key, details in build_details.iteritems():

1676

if key in keys:

1677

key_records.append((key, details[0]))

1678

records_iter = enumerate(self._read_records_iter(key_records))

1679

for (key_idx, (key, data, sha_value)) in records_iter:

1680

pb.update('Walking content.', key_idx, total)

1681

compression_parent = build_details[key][1]

1682

if compression_parent is None:

1683

# fulltext

1684

line_iterator = self._factory.get_fulltext_content(data)

1685

else:

1686

# Delta

1687

line_iterator = self._factory.get_linedelta_content(data)

1688

# Now that we are yielding the data for this key, remove it

1689

# from the list

1690

keys.remove(key)

1691

# XXX: It might be more efficient to yield (key,

1692

# line_iterator) in the future. However for now, this is a

1693

# simpler change to integrate into the rest of the

1694

# codebase. RBC 20071110

1695

for line in line_iterator:

1696

yield line, key

1697

done = True

1698

except errors.RetryWithNewPacks, e:

1699

self._access.reload_or_raise(e)

1700

# If there are still keys we've not yet found, we look in the fallback

1701

# vfs, and hope to find them there. Note that if the keys are found

1702

# but had no changes or no content, the fallback may not return

1703

# anything.

1704

if keys and not self._fallback_vfs:

1705

# XXX: strictly the second parameter is meant to be the file id

1706

# but it's not easily accessible here.

1707

raise RevisionNotPresent(keys, repr(self))

1708

for source in self._fallback_vfs:

1709

if not keys:

1710

break

1711

source_keys = set()

1712

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1713

source_keys.add(key)

1714

yield line, key

1715

keys.difference_update(source_keys)

1716

pb.update('Walking content.', total, total)

1717

1718

def _make_line_delta(self, delta_seq, new_content):

1719

"""Generate a line delta from delta_seq and new_content."""

1720

diff_hunks = []

1721

for op in delta_seq.get_opcodes():

1722

if op[0] == 'equal':

1723

continue

1724

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1725

return diff_hunks

1726

1727

def _merge_annotations(self, content, parents, parent_texts={},

1728

delta=None, annotated=None,

1729

left_matching_blocks=None):

1730

"""Merge annotations for content and generate deltas.

1731

1732

This is done by comparing the annotations based on changes to the text

1733

and generating a delta on the resulting full texts. If annotations are

1734

not being created then a simple delta is created.

1735

"""

1736

if left_matching_blocks is not None:

1737

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1738

else:

1739

delta_seq = None

1740

if annotated:

1741

for parent_key in parents:

1742

merge_content = self._get_content(parent_key, parent_texts)

1743

if (parent_key == parents[0] and delta_seq is not None):

1744

seq = delta_seq

1745

else:

1746

seq = patiencediff.PatienceSequenceMatcher(

1747

None, merge_content.text(), content.text())

1748

for i, j, n in seq.get_matching_blocks():

1749

if n == 0:

1750

continue

1751

# this copies (origin, text) pairs across to the new

1752

# content for any line that matches the last-checked

1753

# parent.

1754

content._lines[j:j+n] = merge_content._lines[i:i+n]

1755

# XXX: Robert says the following block is a workaround for a

1756

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1757

if content._lines and content._lines[-1][1][-1] != '\n':

1758

# The copied annotation was from a line without a trailing EOL,

1759

# reinstate one for the content object, to ensure correct

1760

# serialization.

1761

line = content._lines[-1][1] + '\n'

1762

content._lines[-1] = (content._lines[-1][0], line)

1763

if delta:

1764

if delta_seq is None:

1765

reference_content = self._get_content(parents[0], parent_texts)

1766

new_texts = content.text()

1767

old_texts = reference_content.text()

1768

delta_seq = patiencediff.PatienceSequenceMatcher(

1769

None, old_texts, new_texts)

1770

return self._make_line_delta(delta_seq, content)

1771

1772

def _parse_record(self, version_id, data):

1773

"""Parse an original format knit record.

1774

1775

These have the last element of the key only present in the stored data.

1776

"""

1777

rec, record_contents = self._parse_record_unchecked(data)

1778

self._check_header_version(rec, version_id)

1779

return record_contents, rec[3]

1780

1781

def _parse_record_header(self, key, raw_data):

1782

"""Parse a record header for consistency.

1783

1784

:return: the header and the decompressor stream.

1785

as (stream, header_record)

1786

"""

1787

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1788

try:

1789

# Current serialise

1790

rec = self._check_header(key, df.readline())

1791

except Exception, e:

1792

raise KnitCorrupt(self,

1793

"While reading {%s} got %s(%s)"

1794

% (key, e.__class__.__name__, str(e)))

1795

return df, rec

1796

1797

def _parse_record_unchecked(self, data):

1798

# profiling notes:

1799

# 4168 calls in 2880 217 internal

1800

# 4168 calls to _parse_record_header in 2121

1801

# 4168 calls to readlines in 330

1802

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1803

try:

1804

record_contents = df.readlines()

1805

except Exception, e:

1806

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1807

(data, e.__class__.__name__, str(e)))

1808

header = record_contents.pop(0)

1809

rec = self._split_header(header)

1810

last_line = record_contents.pop()

1811

if len(record_contents) != int(rec[2]):

1812

raise KnitCorrupt(self,

1813

'incorrect number of lines %s != %s'

1814

' for version {%s} %s'

1815

% (len(record_contents), int(rec[2]),

1816

rec[1], record_contents))

1817

if last_line != 'end %s\n' % rec[1]:

1818

raise KnitCorrupt(self,

1819

'unexpected version end line %r, wanted %r'

1820

% (last_line, rec[1]))

1821

df.close()

1822

return rec, record_contents

1823

1824

def _read_records_iter(self, records):

1825

"""Read text records from data file and yield result.

1826

1827

The result will be returned in whatever is the fastest to read.

1828

Not by the order requested. Also, multiple requests for the same

1829

record will only yield 1 response.

1830

:param records: A list of (key, access_memo) entries

1831

:return: Yields (key, contents, digest) in the order

1832

read, not the order requested

1833

"""

1834

if not records:

1835

return

1836

1837

# XXX: This smells wrong, IO may not be getting ordered right.

1838

needed_records = sorted(set(records), key=operator.itemgetter(1))

1839

if not needed_records:

1840

return

1841

1842

# The transport optimizes the fetching as well

1843

# (ie, reads continuous ranges.)

1844

raw_data = self._access.get_raw_records(

1845

[index_memo for key, index_memo in needed_records])

1846

1847

for (key, index_memo), data in \

1848

izip(iter(needed_records), raw_data):

1849

content, digest = self._parse_record(key[-1], data)

1850

yield key, content, digest

1851

1852

def _read_records_iter_raw(self, records):

1853

"""Read text records from data file and yield raw data.

1854

1855

This unpacks enough of the text record to validate the id is

1856

as expected but thats all.

1857

1858

Each item the iterator yields is (key, bytes,

1859

expected_sha1_of_full_text).

1860

"""

1861

for key, data in self._read_records_iter_unchecked(records):

1862

# validate the header (note that we can only use the suffix in

1863

# current knit records).

1864

df, rec = self._parse_record_header(key, data)

1865

df.close()

1866

yield key, data, rec[3]

1867

1868

def _read_records_iter_unchecked(self, records):

1869

"""Read text records from data file and yield raw data.

1870

1871

No validation is done.

1872

1873

Yields tuples of (key, data).

1874

"""

1875

# setup an iterator of the external records:

1876

# uses readv so nice and fast we hope.

1877

if len(records):

1878

# grab the disk data needed.

1879

needed_offsets = [index_memo for key, index_memo

1880

in records]

1881

raw_records = self._access.get_raw_records(needed_offsets)

1882

1883

for key, index_memo in records:

1884

data = raw_records.next()

1885

yield key, data

1886

1887

def _record_to_data(self, key, digest, lines, dense_lines=None):

1888

"""Convert key, digest, lines into a raw data block.

1889

1890

:param key: The key of the record. Currently keys are always serialised

1891

using just the trailing component.

1892

:param dense_lines: The bytes of lines but in a denser form. For

1893

instance, if lines is a list of 1000 bytestrings each ending in \n,

1894

dense_lines may be a list with one line in it, containing all the

1895

1000's lines and their \n's. Using dense_lines if it is already

1896

known is a win because the string join to create bytes in this

1897

function spends less time resizing the final string.

1898

:return: (len, a StringIO instance with the raw data ready to read.)

1899

"""

1900

# Note: using a string copy here increases memory pressure with e.g.

1901

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1902

# when doing the initial commit of a mozilla tree. RBC 20070921

1903

bytes = ''.join(chain(

1904

["version %s %d %s\n" % (key[-1],

1905

len(lines),

1906

digest)],

1907

dense_lines or lines,

1908

["end %s\n" % key[-1]]))

1909

if type(bytes) != str:

1910

raise AssertionError(

1911

'data must be plain bytes was %s' % type(bytes))

1912

if lines and lines[-1][-1] != '\n':

1913

raise ValueError('corrupt lines value %r' % lines)

1914

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1915

return len(compressed_bytes), compressed_bytes

1916

1917

def _split_header(self, line):

1918

rec = line.split()

1919

if len(rec) != 4:

1920

raise KnitCorrupt(self,

1921

'unexpected number of elements in record header')

1922

return rec

1923

1924

def keys(self):

1925

"""See VersionedFiles.keys."""

1926

if 'evil' in debug.debug_flags:

1927

trace.mutter_callsite(2, "keys scales with size of history")

1928

sources = [self._index] + self._fallback_vfs

1929

result = set()

1930

for source in sources:

1931

result.update(source.keys())

1932

return result

1933

1934

1935

class _ContentMapGenerator(object):

1936

"""Generate texts or expose raw deltas for a set of texts."""

1937

1938

def _get_content(self, key):

1939

"""Get the content object for key."""

1940

# Note that _get_content is only called when the _ContentMapGenerator

1941

# has been constructed with just one key requested for reconstruction.

1942

if key in self.nonlocal_keys:

1943

record = self.get_record_stream().next()

1944

# Create a content object on the fly

1945

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1946

return PlainKnitContent(lines, record.key)

1947

else:

1948

# local keys we can ask for directly

1949

return self._get_one_work(key)

1950

1951

def get_record_stream(self):

1952

"""Get a record stream for the keys requested during __init__."""

1953

for record in self._work():

1954

yield record

1955

1956

def _work(self):

1957

"""Produce maps of text and KnitContents as dicts.

1958

1959

:return: (text_map, content_map) where text_map contains the texts for

1960

the requested versions and content_map contains the KnitContents.

1961

"""

1962

# NB: By definition we never need to read remote sources unless texts

1963

# are requested from them: we don't delta across stores - and we

1964

# explicitly do not want to to prevent data loss situations.

1965

if self.global_map is None:

1966

self.global_map = self.vf.get_parent_map(self.keys)

1967

nonlocal_keys = self.nonlocal_keys

1968

1969

missing_keys = set(nonlocal_keys)

1970

# Read from remote versioned file instances and provide to our caller.

1971

for source in self.vf._fallback_vfs:

1972

if not missing_keys:

1973

break

1974

# Loop over fallback repositories asking them for texts - ignore

1975

# any missing from a particular fallback.

1976

for record in source.get_record_stream(missing_keys,

1977

'unordered', True):

1978

if record.storage_kind == 'absent':

1979

# Not in thie particular stream, may be in one of the

1980

# other fallback vfs objects.

1981

continue

1982

missing_keys.remove(record.key)

1983

yield record

1984

1985

self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,

1986

allow_missing=True)

1987

first = True

1988

for key in self.keys:

1989

if key in self.nonlocal_keys:

1990

continue

1991

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

1992

first = False

1993

1994

def _get_one_work(self, requested_key):

1995

# Now, if we have calculated everything already, just return the

1996

# desired text.

1997

if requested_key in self._contents_map:

1998

return self._contents_map[requested_key]

1999

# To simplify things, parse everything at once - code that wants one text

2000

# probably wants them all.

2001

# FUTURE: This function could be improved for the 'extract many' case

2002

# by tracking each component and only doing the copy when the number of

2003

# children than need to apply delta's to it is > 1 or it is part of the

2004

# final output.

2005

multiple_versions = len(self.keys) != 1

2006

if self._record_map is None:

2007

self._record_map = self.vf._raw_map_to_record_map(

2008

self._raw_record_map)

2009

record_map = self._record_map

2010

# raw_record_map is key:

2011

# Have read and parsed records at this point.

2012

for key in self.keys:

2013

if key in self.nonlocal_keys:

2014

# already handled

2015

continue

2016

components = []

2017

cursor = key

2018

while cursor is not None:

2019

try:

2020

record, record_details, digest, next = record_map[cursor]

2021

except KeyError:

2022

raise RevisionNotPresent(cursor, self)

2023

components.append((cursor, record, record_details, digest))

2024

cursor = next

2025

if cursor in self._contents_map:

2026

# no need to plan further back

2027

components.append((cursor, None, None, None))

2028

break

2029

2030

content = None

2031

for (component_id, record, record_details,

2032

digest) in reversed(components):

2033

if component_id in self._contents_map:

2034

content = self._contents_map[component_id]

2035

else:

2036

content, delta = self._factory.parse_record(key[-1],

2037

record, record_details, content,

2038

copy_base_content=multiple_versions)

2039

if multiple_versions:

2040

self._contents_map[component_id] = content

2041

2042

# digest here is the digest from the last applied component.

2043

text = content.text()

2044

actual_sha = sha_strings(text)

2045

if actual_sha != digest:

2046

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2047

if multiple_versions:

2048

return self._contents_map[requested_key]

2049

else:

2050

return content

2051

2052

def _wire_bytes(self):

2053

"""Get the bytes to put on the wire for 'key'.

2054

2055

The first collection of bytes asked for returns the serialised

2056

raw_record_map and the additional details (key, parent) for key.

2057

Subsequent calls return just the additional details (key, parent).

2058

The wire storage_kind given for the first key is 'knit-delta-closure',

2059

For subsequent keys it is 'knit-delta-closure-ref'.

2060

2061

:param key: A key from the content generator.

2062

:return: Bytes to put on the wire.

2063

"""

2064

lines = []

2065

# kind marker for dispatch on the far side,

2066

lines.append('knit-delta-closure')

2067

# Annotated or not

2068

if self.vf._factory.annotated:

2069

lines.append('annotated')

2070

else:

2071

lines.append('')

2072

# then the list of keys

2073

lines.append('\t'.join(['\x00'.join(key) for key in self.keys

2074

if key not in self.nonlocal_keys]))

2075

# then the _raw_record_map in serialised form:

2076

map_byte_list = []

2077

# for each item in the map:

2078

# 1 line with key

2079

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2080

# one line with method

2081

# one line with noeol

2082

# one line with next ('' for None)

2083

# one line with byte count of the record bytes

2084

# the record bytes

2085

for key, (record_bytes, (method, noeol), next) in \

2086

self._raw_record_map.iteritems():

2087

key_bytes = '\x00'.join(key)

2088

parents = self.global_map.get(key, None)

2089

if parents is None:

2090

parent_bytes = 'None:'

2091

else:

2092

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

2093

method_bytes = method

2094

if noeol:

2095

noeol_bytes = "T"

2096

else:

2097

noeol_bytes = "F"

2098

if next:

2099

next_bytes = '\x00'.join(next)

2100

else:

2101

next_bytes = ''

2102

map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (

2103

key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2104

len(record_bytes), record_bytes))

2105

map_bytes = ''.join(map_byte_list)

2106

lines.append(map_bytes)

2107

bytes = '\n'.join(lines)

2108

return bytes

2109

2110

2111

class _VFContentMapGenerator(_ContentMapGenerator):

2112

"""Content map generator reading from a VersionedFiles object."""

2113

2114

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2115

global_map=None, raw_record_map=None):

2116

"""Create a _ContentMapGenerator.

2117

2118

:param versioned_files: The versioned files that the texts are being

2119

extracted from.

2120

:param keys: The keys to produce content maps for.

2121

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2122

which are known to not be in this knit, but rather in one of the

2123

fallback knits.

2124

:param global_map: The result of get_parent_map(keys) (or a supermap).

2125

This is required if get_record_stream() is to be used.

2126

:param raw_record_map: A unparsed raw record map to use for answering

2127

contents.

2128

"""

2129

# The vf to source data from

2130

self.vf = versioned_files

2131

# The keys desired

2132

self.keys = list(keys)

2133

# Keys known to be in fallback vfs objects

2134

if nonlocal_keys is None:

2135

self.nonlocal_keys = set()

2136

else:

2137

self.nonlocal_keys = frozenset(nonlocal_keys)

2138

# Parents data for keys to be returned in get_record_stream

2139

self.global_map = global_map

2140

# The chunked lists for self.keys in text form

2141

self._text_map = {}

2142

# A cache of KnitContent objects used in extracting texts.

2143

self._contents_map = {}

2144

# All the knit records needed to assemble the requested keys as full

2145

# texts.

2146

self._record_map = None

2147

if raw_record_map is None:

2148

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2149

allow_missing=True)

2150

else:

2151

self._raw_record_map = raw_record_map

2152

# the factory for parsing records

2153

self._factory = self.vf._factory

2154

2155

2156

class _NetworkContentMapGenerator(_ContentMapGenerator):

2157

"""Content map generator sourced from a network stream."""

2158

2159

def __init__(self, bytes, line_end):

2160

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2161

self._bytes = bytes

2162

self.global_map = {}

2163

self._raw_record_map = {}

2164

self._contents_map = {}

2165

self._record_map = None

2166

self.nonlocal_keys = []

2167

# Get access to record parsing facilities

2168

self.vf = KnitVersionedFiles(None, None)

2169

start = line_end

2170

# Annotated or not

2171

line_end = bytes.find('\n', start)

2172

line = bytes[start:line_end]

2173

start = line_end + 1

2174

if line == 'annotated':

2175

self._factory = KnitAnnotateFactory()

2176

else:

2177

self._factory = KnitPlainFactory()

2178

# list of keys to emit in get_record_stream

2179

line_end = bytes.find('\n', start)

2180

line = bytes[start:line_end]

2181

start = line_end + 1

2182

self.keys = [

2183

tuple(segment.split('\x00')) for segment in line.split('\t')

2184

if segment]

2185

# now a loop until the end. XXX: It would be nice if this was just a

2186

# bunch of the same records as get_record_stream(..., False) gives, but

2187

# there is a decent sized gap stopping that at the moment.

2188

end = len(bytes)

2189

while start < end:

2190

# 1 line with key

2191

line_end = bytes.find('\n', start)

2192

key = tuple(bytes[start:line_end].split('\x00'))

2193

start = line_end + 1

2194

# 1 line with parents (None: for None, '' for ())

2195

line_end = bytes.find('\n', start)

2196

line = bytes[start:line_end]

2197

if line == 'None:':

2198

parents = None

2199

else:

2200

parents = tuple(

2201

[tuple(segment.split('\x00')) for segment in line.split('\t')

2202

if segment])

2203

self.global_map[key] = parents

2204

start = line_end + 1

2205

# one line with method

2206

line_end = bytes.find('\n', start)

2207

line = bytes[start:line_end]

2208

method = line

2209

start = line_end + 1

2210

# one line with noeol

2211

line_end = bytes.find('\n', start)

2212

line = bytes[start:line_end]

2213

noeol = line == "T"

2214

start = line_end + 1

2215

# one line with next ('' for None)

2216

line_end = bytes.find('\n', start)

2217

line = bytes[start:line_end]

2218

if not line:

2219

next = None

2220

else:

2221

next = tuple(bytes[start:line_end].split('\x00'))

2222

start = line_end + 1

2223

# one line with byte count of the record bytes

2224

line_end = bytes.find('\n', start)

2225

line = bytes[start:line_end]

2226

count = int(line)

2227

start = line_end + 1

2228

# the record bytes

2229

record_bytes = bytes[start:start+count]

2230

start = start + count

2231

# put it in the map

2232

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2233

2234

def get_record_stream(self):

2235

"""Get a record stream for for keys requested by the bytestream."""

2236

first = True

2237

for key in self.keys:

2238

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2239

first = False

2240

2241

def _wire_bytes(self):

2242

return self._bytes

2243

2244

2245

class _KndxIndex(object):

2246

"""Manages knit index files

2247

2248

The index is kept in memory and read on startup, to enable

2249

fast lookups of revision information. The cursor of the index

2250

file is always pointing to the end, making it easy to append

2251

entries.

2252

2253

_cache is a cache for fast mapping from version id to a Index

2254

object.

2255

2256

_history is a cache for fast mapping from indexes to version ids.

2257

2258

The index data format is dictionary compressed when it comes to

2259

parent references; a index entry may only have parents that with a

2260

lover index number. As a result, the index is topological sorted.

2261

2262

Duplicate entries may be written to the index for a single version id

2263

if this is done then the latter one completely replaces the former:

2264

this allows updates to correct version and parent information.

2265

Note that the two entries may share the delta, and that successive

2266

annotations and references MUST point to the first entry.

2267

2268

The index file on disc contains a header, followed by one line per knit

2269

record. The same revision can be present in an index file more than once.

2270

The first occurrence gets assigned a sequence number starting from 0.

2271

2272

The format of a single line is

2273

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

2274

REVISION_ID is a utf8-encoded revision id

2275

FLAGS is a comma separated list of flags about the record. Values include

2276

no-eol, line-delta, fulltext.

2277

BYTE_OFFSET is the ascii representation of the byte offset in the data file

2278

that the the compressed data starts at.

2279

LENGTH is the ascii representation of the length of the data file.

2280

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

2281

REVISION_ID.

2282

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

2283

revision id already in the knit that is a parent of REVISION_ID.

2284

The ' :' marker is the end of record marker.

2285

2286

partial writes:

2287

when a write is interrupted to the index file, it will result in a line

2288

that does not end in ' :'. If the ' :' is not present at the end of a line,

2289

or at the end of the file, then the record that is missing it will be

2290

ignored by the parser.

2291

2292

When writing new records to the index file, the data is preceded by '\n'

2293

to ensure that records always start on new lines even if the last write was

2294

interrupted. As a result its normal for the last line in the index to be

2295

missing a trailing newline. One can be added with no harmful effects.

2296

2297

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

2298

where prefix is e.g. the (fileid,) for .texts instances or () for

2299

constant-mapped things like .revisions, and the old state is

2300

tuple(history_vector, cache_dict). This is used to prevent having an

2301

ABI change with the C extension that reads .kndx files.

2302

"""

2303

2304

HEADER = "# bzr knit index 8\n"

2305

2306

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

2307

"""Create a _KndxIndex on transport using mapper."""

2308

self._transport = transport

2309

self._mapper = mapper

2310

self._get_scope = get_scope

2311

self._allow_writes = allow_writes

2312

self._is_locked = is_locked

2313

self._reset_cache()

2314

self.has_graph = True

2315

2316

def add_records(self, records, random_id=False, missing_compression_parents=False):

2317

"""Add multiple records to the index.

2318

2319

:param records: a list of tuples:

2320

(key, options, access_memo, parents).

2321

:param random_id: If True the ids being added were randomly generated

2322

and no check for existence will be performed.

2323

:param missing_compression_parents: If True the records being added are

2324

only compressed against texts already in the index (or inside

2325

records). If False the records all refer to unavailable texts (or

2326

texts inside records) as compression parents.

2327

"""

2328

if missing_compression_parents:

2329

# It might be nice to get the edge of the records. But keys isn't

2330

# _wrong_.

2331

keys = sorted(record[0] for record in records)

2332

raise errors.RevisionNotPresent(keys, self)

2333

paths = {}

2334

for record in records:

2335

key = record[0]

2336

prefix = key[:-1]

2337

path = self._mapper.map(key) + '.kndx'

2338

path_keys = paths.setdefault(path, (prefix, []))

2339

path_keys[1].append(record)

2340

for path in sorted(paths):

2341

prefix, path_keys = paths[path]

2342

self._load_prefixes([prefix])

2343

lines = []

2344

orig_history = self._kndx_cache[prefix][1][:]

2345

orig_cache = self._kndx_cache[prefix][0].copy()

2346

2347

try:

2348

for key, options, (_, pos, size), parents in path_keys:

2349

if parents is None:

2350

# kndx indices cannot be parentless.

2351

parents = ()

2352

line = "\n%s %s %s %s %s :" % (

2353

key[-1], ','.join(options), pos, size,

2354

self._dictionary_compress(parents))

2355

if type(line) != str:

2356

raise AssertionError(

2357

'data must be utf8 was %s' % type(line))

2358

lines.append(line)

2359

self._cache_key(key, options, pos, size, parents)

2360

if len(orig_history):

2361

self._transport.append_bytes(path, ''.join(lines))

2362

else:

2363

self._init_index(path, lines)

2364

except:

2365

# If any problems happen, restore the original values and re-raise

2366

self._kndx_cache[prefix] = (orig_cache, orig_history)

2367

raise

2368

2369

def scan_unvalidated_index(self, graph_index):

2370

"""See _KnitGraphIndex.scan_unvalidated_index."""

2371

# Because kndx files do not support atomic insertion via separate index

2372

# files, they do not support this method.

2373

raise NotImplementedError(self.scan_unvalidated_index)

2374

2375

def get_missing_compression_parents(self):

2376

"""See _KnitGraphIndex.get_missing_compression_parents."""

2377

# Because kndx files do not support atomic insertion via separate index

2378

# files, they do not support this method.

2379

raise NotImplementedError(self.get_missing_compression_parents)

2380

2381

def _cache_key(self, key, options, pos, size, parent_keys):

2382

"""Cache a version record in the history array and index cache.

2383

2384

This is inlined into _load_data for performance. KEEP IN SYNC.

2385

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

2386

indexes).

2387

"""

2388

prefix = key[:-1]

2389

version_id = key[-1]

2390

# last-element only for compatibilty with the C load_data.

2391

parents = tuple(parent[-1] for parent in parent_keys)

2392

for parent in parent_keys:

2393

if parent[:-1] != prefix:

2394

raise ValueError("mismatched prefixes for %r, %r" % (

2395

key, parent_keys))

2396

cache, history = self._kndx_cache[prefix]

2397

# only want the _history index to reference the 1st index entry

2398

# for version_id

2399

if version_id not in cache:

2400

index = len(history)

2401

history.append(version_id)

2402

else:

2403

index = cache[version_id][5]

2404

cache[version_id] = (version_id,

2405

options,

2406

pos,

2407

size,

2408

parents,

2409

index)

2410

2411

def check_header(self, fp):

2412

line = fp.readline()

2413

if line == '':

2414

# An empty file can actually be treated as though the file doesn't

2415

# exist yet.

2416

raise errors.NoSuchFile(self)

2417

if line != self.HEADER:

2418

raise KnitHeaderError(badline=line, filename=self)

2419

2420

def _check_read(self):

2421

if not self._is_locked():

2422

raise errors.ObjectNotLocked(self)

2423

if self._get_scope() != self._scope:

2424

self._reset_cache()

2425

2426

def _check_write_ok(self):

2427

"""Assert if not writes are permitted."""

2428

if not self._is_locked():

2429

raise errors.ObjectNotLocked(self)

2430

if self._get_scope() != self._scope:

2431

self._reset_cache()

2432

if self._mode != 'w':

2433

raise errors.ReadOnlyObjectDirtiedError(self)

2434

2435

def get_build_details(self, keys):

2436

"""Get the method, index_memo and compression parent for keys.

2437

2438

Ghosts are omitted from the result.

2439

2440

:param keys: An iterable of keys.

2441

:return: A dict of key:(index_memo, compression_parent, parents,

2442

record_details).

2443

index_memo

2444

opaque structure to pass to read_records to extract the raw

2445

data

2446

compression_parent

2447

Content that this record is built upon, may be None

2448

parents

2449

Logical parents of this node

2450

record_details

2451

extra information about the content which needs to be passed to

2452

Factory.parse_record

2453

"""

2454

parent_map = self.get_parent_map(keys)

2455

result = {}

2456

for key in keys:

2457

if key not in parent_map:

2458

continue # Ghost

2459

method = self.get_method(key)

2460

parents = parent_map[key]

2461

if method == 'fulltext':

2462

compression_parent = None

2463

else:

2464

compression_parent = parents[0]

2465

noeol = 'no-eol' in self.get_options(key)

2466

index_memo = self.get_position(key)

2467

result[key] = (index_memo, compression_parent,

2468

parents, (method, noeol))

2469

return result

2470

2471

def get_method(self, key):

2472

"""Return compression method of specified key."""

2473

options = self.get_options(key)

2474

if 'fulltext' in options:

2475

return 'fulltext'

2476

elif 'line-delta' in options:

2477

return 'line-delta'

2478

else:

2479

raise errors.KnitIndexUnknownMethod(self, options)

2480

2481

def get_options(self, key):

2482

"""Return a list representing options.

2483

2484

e.g. ['foo', 'bar']

2485

"""

2486

prefix, suffix = self._split_key(key)

2487

self._load_prefixes([prefix])

2488

try:

2489

return self._kndx_cache[prefix][0][suffix][1]

2490

except KeyError:

2491

raise RevisionNotPresent(key, self)

2492

2493

def get_parent_map(self, keys):

2494

"""Get a map of the parents of keys.

2495

2496

:param keys: The keys to look up parents for.

2497

:return: A mapping from keys to parents. Absent keys are absent from

2498

the mapping.

2499

"""

2500

# Parse what we need to up front, this potentially trades off I/O

2501

# locality (.kndx and .knit in the same block group for the same file

2502

# id) for less checking in inner loops.

2503

prefixes = set(key[:-1] for key in keys)

2504

self._load_prefixes(prefixes)

2505

result = {}

2506

for key in keys:

2507

prefix = key[:-1]

2508

try:

2509

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2510

except KeyError:

2511

pass

2512

else:

2513

result[key] = tuple(prefix + (suffix,) for

2514

suffix in suffix_parents)

2515

return result

2516

2517

def get_position(self, key):

2518

"""Return details needed to access the version.

2519

2520

:return: a tuple (key, data position, size) to hand to the access

2521

logic to get the record.

2522

"""

2523

prefix, suffix = self._split_key(key)

2524

self._load_prefixes([prefix])

2525

entry = self._kndx_cache[prefix][0][suffix]

2526

return key, entry[2], entry[3]

2527

2528

has_key = _mod_index._has_key_from_parent_map

2529

2530

def _init_index(self, path, extra_lines=[]):

2531

"""Initialize an index."""

2532

sio = StringIO()

2533

sio.write(self.HEADER)

2534

sio.writelines(extra_lines)

2535

sio.seek(0)

2536

self._transport.put_file_non_atomic(path, sio,

2537

create_parent_dir=True)

2538

# self._create_parent_dir)

2539

# mode=self._file_mode,

2540

# dir_mode=self._dir_mode)

2541

2542

def keys(self):

2543

"""Get all the keys in the collection.

2544

2545

The keys are not ordered.

2546

"""

2547

result = set()

2548

# Identify all key prefixes.

2549

# XXX: A bit hacky, needs polish.

2550

if type(self._mapper) == ConstantMapper:

2551

prefixes = [()]

2552

else:

2553

relpaths = set()

2554

for quoted_relpath in self._transport.iter_files_recursive():

2555

path, ext = os.path.splitext(quoted_relpath)

2556

relpaths.add(path)

2557

prefixes = [self._mapper.unmap(path) for path in relpaths]

2558

self._load_prefixes(prefixes)

2559

for prefix in prefixes:

2560

for suffix in self._kndx_cache[prefix][1]:

2561

result.add(prefix + (suffix,))

2562

return result

2563

2564

def _load_prefixes(self, prefixes):

2565

"""Load the indices for prefixes."""

2566

self._check_read()

2567

for prefix in prefixes:

2568

if prefix not in self._kndx_cache:

2569

# the load_data interface writes to these variables.

2570

self._cache = {}

2571

self._history = []

2572

self._filename = prefix

2573

try:

2574

path = self._mapper.map(prefix) + '.kndx'

2575

fp = self._transport.get(path)

2576

try:

2577

# _load_data may raise NoSuchFile if the target knit is

2578

# completely empty.

2579

_load_data(self, fp)

2580

finally:

2581

fp.close()

2582

self._kndx_cache[prefix] = (self._cache, self._history)

2583

del self._cache

2584

del self._filename

2585

del self._history

2586

except NoSuchFile:

2587

self._kndx_cache[prefix] = ({}, [])

2588

if type(self._mapper) == ConstantMapper:

2589

# preserve behaviour for revisions.kndx etc.

2590

self._init_index(path)

2591

del self._cache

2592

del self._filename

2593

del self._history

2594

2595

missing_keys = _mod_index._missing_keys_from_parent_map

2596

2597

def _partition_keys(self, keys):

2598

"""Turn keys into a dict of prefix:suffix_list."""

2599

result = {}

2600

for key in keys:

2601

prefix_keys = result.setdefault(key[:-1], [])

2602

prefix_keys.append(key[-1])

2603

return result

2604

2605

def _dictionary_compress(self, keys):

2606

"""Dictionary compress keys.

2607

2608

:param keys: The keys to generate references to.

2609

:return: A string representation of keys. keys which are present are

2610

dictionary compressed, and others are emitted as fulltext with a

2611

'.' prefix.

2612

"""

2613

if not keys:

2614

return ''

2615

result_list = []

2616

prefix = keys[0][:-1]

2617

cache = self._kndx_cache[prefix][0]

2618

for key in keys:

2619

if key[:-1] != prefix:

2620

# kndx indices cannot refer across partitioned storage.

2621

raise ValueError("mismatched prefixes for %r" % keys)

2622

if key[-1] in cache:

2623

# -- inlined lookup() --

2624

result_list.append(str(cache[key[-1]][5]))

2625

# -- end lookup () --

2626

else:

2627

result_list.append('.' + key[-1])

2628

return ' '.join(result_list)

2629

2630

def _reset_cache(self):

2631

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2632

# (cache_dict, history_vector) for parsed kndx files.

2633

self._kndx_cache = {}

2634

self._scope = self._get_scope()

2635

allow_writes = self._allow_writes()

2636

if allow_writes:

2637

self._mode = 'w'

2638

else:

2639

self._mode = 'r'

2640

2641

def _sort_keys_by_io(self, keys, positions):

2642

"""Figure out an optimal order to read the records for the given keys.

2643

2644

Sort keys, grouped by index and sorted by position.

2645

2646

:param keys: A list of keys whose records we want to read. This will be

2647

sorted 'in-place'.

2648

:param positions: A dict, such as the one returned by

2649

_get_components_positions()

2650

:return: None

2651

"""

2652

def get_sort_key(key):

2653

index_memo = positions[key][1]

2654

# Group by prefix and position. index_memo[0] is the key, so it is

2655

# (file_id, revision_id) and we don't want to sort on revision_id,

2656

# index_memo[1] is the position, and index_memo[2] is the size,

2657

# which doesn't matter for the sort

2658

return index_memo[0][:-1], index_memo[1]

2659

return keys.sort(key=get_sort_key)

2660

2661

_get_total_build_size = _get_total_build_size

2662

2663

def _split_key(self, key):

2664

"""Split key into a prefix and suffix."""

2665

return key[:-1], key[-1]

2666

2667

2668

class _KnitGraphIndex(object):

2669

"""A KnitVersionedFiles index layered on GraphIndex."""

2670

2671

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2672

add_callback=None):

2673

"""Construct a KnitGraphIndex on a graph_index.

2674

2675

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2676

:param is_locked: A callback to check whether the object should answer

2677

queries.

2678

:param deltas: Allow delta-compressed records.

2679

:param parents: If True, record knits parents, if not do not record

2680

parents.

2681

:param add_callback: If not None, allow additions to the index and call

2682

this callback with a list of added GraphIndex nodes:

2683

[(node, value, node_refs), ...]

2684

:param is_locked: A callback, returns True if the index is locked and

2685

thus usable.

2686

"""

2687

self._add_callback = add_callback

2688

self._graph_index = graph_index

2689

self._deltas = deltas

2690

self._parents = parents

2691

if deltas and not parents:

2692

# XXX: TODO: Delta tree and parent graph should be conceptually

2693

# separate.

2694

raise KnitCorrupt(self, "Cannot do delta compression without "

2695

"parent tracking.")

2696

self.has_graph = parents

2697

self._is_locked = is_locked

2698

self._missing_compression_parents = set()

2699

2700

def __repr__(self):

2701

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2702

2703

def add_records(self, records, random_id=False,

2704

missing_compression_parents=False):

2705

"""Add multiple records to the index.

2706

2707

This function does not insert data into the Immutable GraphIndex

2708

backing the KnitGraphIndex, instead it prepares data for insertion by

2709

the caller and checks that it is safe to insert then calls

2710

self._add_callback with the prepared GraphIndex nodes.

2711

2712

:param records: a list of tuples:

2713

(key, options, access_memo, parents).

2714

:param random_id: If True the ids being added were randomly generated

2715

and no check for existence will be performed.

2716

:param missing_compression_parents: If True the records being added are

2717

only compressed against texts already in the index (or inside

2718

records). If False the records all refer to unavailable texts (or

2719

texts inside records) as compression parents.

2720

"""

2721

if not self._add_callback:

2722

raise errors.ReadOnlyError(self)

2723

# we hope there are no repositories with inconsistent parentage

2724

# anymore.

2725

2726

keys = {}

2727

compression_parents = set()

2728

for (key, options, access_memo, parents) in records:

2729

if self._parents:

2730

parents = tuple(parents)

2731

index, pos, size = access_memo

2732

if 'no-eol' in options:

2733

value = 'N'

2734

else:

2735

value = ' '

2736

value += "%d %d" % (pos, size)

2737

if not self._deltas:

2738

if 'line-delta' in options:

2739

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2740

if self._parents:

2741

if self._deltas:

2742

if 'line-delta' in options:

2743

node_refs = (parents, (parents[0],))

2744

if missing_compression_parents:

2745

compression_parents.add(parents[0])

2746

else:

2747

node_refs = (parents, ())

2748

else:

2749

node_refs = (parents, )

2750

else:

2751

if parents:

2752

raise KnitCorrupt(self, "attempt to add node with parents "

2753

"in parentless index.")

2754

node_refs = ()

2755

keys[key] = (value, node_refs)

2756

# check for dups

2757

if not random_id:

2758

present_nodes = self._get_entries(keys)

2759

for (index, key, value, node_refs) in present_nodes:

2760

if (value[0] != keys[key][0][0] or

2761

node_refs[:1] != keys[key][1][:1]):

2762

raise KnitCorrupt(self, "inconsistent details in add_records"

2763

": %s %s" % ((value, node_refs), keys[key]))

2764

del keys[key]

2765

result = []

2766

if self._parents:

2767

for key, (value, node_refs) in keys.iteritems():

2768

result.append((key, value, node_refs))

2769

else:

2770

for key, (value, node_refs) in keys.iteritems():

2771

result.append((key, value))

2772

self._add_callback(result)

2773

if missing_compression_parents:

2774

# This may appear to be incorrect (it does not check for

2775

# compression parents that are in the existing graph index),

2776

# but such records won't have been buffered, so this is

2777

# actually correct: every entry when

2778

# missing_compression_parents==True either has a missing parent, or

2779

# a parent that is one of the keys in records.

2780

compression_parents.difference_update(keys)

2781

self._missing_compression_parents.update(compression_parents)

2782

# Adding records may have satisfied missing compression parents.

2783

self._missing_compression_parents.difference_update(keys)

2784

2785

def scan_unvalidated_index(self, graph_index):

2786

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2787

2788

This allows this _KnitGraphIndex to keep track of any missing

2789

compression parents we may want to have filled in to make those

2790

indices valid.

2791

2792

:param graph_index: A GraphIndex

2793

"""

2794

if self._deltas:

2795

new_missing = graph_index.external_references(ref_list_num=1)

2796

new_missing.difference_update(self.get_parent_map(new_missing))

2797

self._missing_compression_parents.update(new_missing)

2798

2799

def get_missing_compression_parents(self):

2800

"""Return the keys of missing compression parents.

2801

2802

Missing compression parents occur when a record stream was missing

2803

basis texts, or a index was scanned that had missing basis texts.

2804

"""

2805

return frozenset(self._missing_compression_parents)

2806

2807

def _check_read(self):

2808

"""raise if reads are not permitted."""

2809

if not self._is_locked():

2810

raise errors.ObjectNotLocked(self)

2811

2812

def _check_write_ok(self):

2813

"""Assert if writes are not permitted."""

2814

if not self._is_locked():

2815

raise errors.ObjectNotLocked(self)

2816

2817

def _compression_parent(self, an_entry):

2818

# return the key that an_entry is compressed against, or None

2819

# Grab the second parent list (as deltas implies parents currently)

2820

compression_parents = an_entry[3][1]

2821

if not compression_parents:

2822

return None

2823

if len(compression_parents) != 1:

2824

raise AssertionError(

2825

"Too many compression parents: %r" % compression_parents)

2826

return compression_parents[0]

2827

2828

def get_build_details(self, keys):

2829

"""Get the method, index_memo and compression parent for version_ids.

2830

2831

Ghosts are omitted from the result.

2832

2833

:param keys: An iterable of keys.

2834

:return: A dict of key:

2835

(index_memo, compression_parent, parents, record_details).

2836

index_memo

2837

opaque structure to pass to read_records to extract the raw

2838

data

2839

compression_parent

2840

Content that this record is built upon, may be None

2841

parents

2842

Logical parents of this node

2843

record_details

2844

extra information about the content which needs to be passed to

2845

Factory.parse_record

2846

"""

2847

self._check_read()

2848

result = {}

2849

entries = self._get_entries(keys, False)

2850

for entry in entries:

2851

key = entry[1]

2852

if not self._parents:

2853

parents = ()

2854

else:

2855

parents = entry[3][0]

2856

if not self._deltas:

2857

compression_parent_key = None

2858

else:

2859

compression_parent_key = self._compression_parent(entry)

2860

noeol = (entry[2][0] == 'N')

2861

if compression_parent_key:

2862

method = 'line-delta'

2863

else:

2864

method = 'fulltext'

2865

result[key] = (self._node_to_position(entry),

2866

compression_parent_key, parents,

2867

(method, noeol))

2868

return result

2869

2870

def _get_entries(self, keys, check_present=False):

2871

"""Get the entries for keys.

2872

2873

:param keys: An iterable of index key tuples.

2874

"""

2875

keys = set(keys)

2876

found_keys = set()

2877

if self._parents:

2878

for node in self._graph_index.iter_entries(keys):

2879

yield node

2880

found_keys.add(node[1])

2881

else:

2882

# adapt parentless index to the rest of the code.

2883

for node in self._graph_index.iter_entries(keys):

2884

yield node[0], node[1], node[2], ()

2885

found_keys.add(node[1])

2886

if check_present:

2887

missing_keys = keys.difference(found_keys)

2888

if missing_keys:

2889

raise RevisionNotPresent(missing_keys.pop(), self)

2890

2891

def get_method(self, key):

2892

"""Return compression method of specified key."""

2893

return self._get_method(self._get_node(key))

2894

2895

def _get_method(self, node):

2896

if not self._deltas:

2897

return 'fulltext'

2898

if self._compression_parent(node):

2899

return 'line-delta'

2900

else:

2901

return 'fulltext'

2902

2903

def _get_node(self, key):

2904

try:

2905

return list(self._get_entries([key]))[0]

2906

except IndexError:

2907

raise RevisionNotPresent(key, self)

2908

2909

def get_options(self, key):

2910

"""Return a list representing options.

2911

2912

e.g. ['foo', 'bar']

2913

"""

2914

node = self._get_node(key)

2915

options = [self._get_method(node)]

2916

if node[2][0] == 'N':

2917

options.append('no-eol')

2918

return options

2919

2920

def get_parent_map(self, keys):

2921

"""Get a map of the parents of keys.

2922

2923

:param keys: The keys to look up parents for.

2924

:return: A mapping from keys to parents. Absent keys are absent from

2925

the mapping.

2926

"""

2927

self._check_read()

2928

nodes = self._get_entries(keys)

2929

result = {}

2930

if self._parents:

2931

for node in nodes:

2932

result[node[1]] = node[3][0]

2933

else:

2934

for node in nodes:

2935

result[node[1]] = None

2936

return result

2937

2938

def get_position(self, key):

2939

"""Return details needed to access the version.

2940

2941

:return: a tuple (index, data position, size) to hand to the access

2942

logic to get the record.

2943

"""

2944

node = self._get_node(key)

2945

return self._node_to_position(node)

2946

2947

has_key = _mod_index._has_key_from_parent_map

2948

2949

def keys(self):

2950

"""Get all the keys in the collection.

2951

2952

The keys are not ordered.

2953

"""

2954

self._check_read()

2955

return [node[1] for node in self._graph_index.iter_all_entries()]

2956

2957

missing_keys = _mod_index._missing_keys_from_parent_map

2958

2959

def _node_to_position(self, node):

2960

"""Convert an index value to position details."""

2961

bits = node[2][1:].split(' ')

2962

return node[0], int(bits[0]), int(bits[1])

2963

2964

def _sort_keys_by_io(self, keys, positions):

2965

"""Figure out an optimal order to read the records for the given keys.

2966

2967

Sort keys, grouped by index and sorted by position.

2968

2969

:param keys: A list of keys whose records we want to read. This will be

2970

sorted 'in-place'.

2971

:param positions: A dict, such as the one returned by

2972

_get_components_positions()

2973

:return: None

2974

"""

2975

def get_index_memo(key):

2976

# index_memo is at offset [1]. It is made up of (GraphIndex,

2977

# position, size). GI is an object, which will be unique for each

2978

# pack file. This causes us to group by pack file, then sort by

2979

# position. Size doesn't matter, but it isn't worth breaking up the

2980

# tuple.

2981

return positions[key][1]

2982

return keys.sort(key=get_index_memo)

2983

2984

_get_total_build_size = _get_total_build_size

2985

2986

2987

class _KnitKeyAccess(object):

2988

"""Access to records in .knit files."""

2989

2990

def __init__(self, transport, mapper):

2991

"""Create a _KnitKeyAccess with transport and mapper.

2992

2993

:param transport: The transport the access object is rooted at.

2994

:param mapper: The mapper used to map keys to .knit files.

2995

"""

2996

self._transport = transport

2997

self._mapper = mapper

2998

2999

def add_raw_records(self, key_sizes, raw_data):

3000

"""Add raw knit bytes to a storage area.

3001

3002

The data is spooled to the container writer in one bytes-record per

3003

raw data item.

3004

3005

:param sizes: An iterable of tuples containing the key and size of each

3006

raw data segment.

3007

:param raw_data: A bytestring containing the data.

3008

:return: A list of memos to retrieve the record later. Each memo is an

3009

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3010

length), where the key is the record key.

3011

"""

3012

if type(raw_data) != str:

3013

raise AssertionError(

3014

'data must be plain bytes was %s' % type(raw_data))

3015

result = []

3016

offset = 0

3017

# TODO: This can be tuned for writing to sftp and other servers where

3018

# append() is relatively expensive by grouping the writes to each key

3019

# prefix.

3020

for key, size in key_sizes:

3021

path = self._mapper.map(key)

3022

try:

3023

base = self._transport.append_bytes(path + '.knit',

3024

raw_data[offset:offset+size])

3025

except errors.NoSuchFile:

3026

self._transport.mkdir(osutils.dirname(path))

3027

base = self._transport.append_bytes(path + '.knit',

3028

raw_data[offset:offset+size])

3029

# if base == 0:

3030

# chmod.

3031

offset += size

3032

result.append((key, base, size))

3033

return result

3034

3035

def get_raw_records(self, memos_for_retrieval):

3036

"""Get the raw bytes for a records.

3037

3038

:param memos_for_retrieval: An iterable containing the access memo for

3039

retrieving the bytes.

3040

:return: An iterator over the bytes of the records.

3041

"""

3042

# first pass, group into same-index request to minimise readv's issued.

3043

request_lists = []

3044

current_prefix = None

3045

for (key, offset, length) in memos_for_retrieval:

3046

if current_prefix == key[:-1]:

3047

current_list.append((offset, length))

3048

else:

3049

if current_prefix is not None:

3050

request_lists.append((current_prefix, current_list))

3051

current_prefix = key[:-1]

3052

current_list = [(offset, length)]

3053

# handle the last entry

3054

if current_prefix is not None:

3055

request_lists.append((current_prefix, current_list))

3056

for prefix, read_vector in request_lists:

3057

path = self._mapper.map(prefix) + '.knit'

3058

for pos, data in self._transport.readv(path, read_vector):

3059

yield data

3060

3061

3062

class _DirectPackAccess(object):

3063

"""Access to data in one or more packs with less translation."""

3064

3065

def __init__(self, index_to_packs, reload_func=None):

3066

"""Create a _DirectPackAccess object.

3067

3068

:param index_to_packs: A dict mapping index objects to the transport

3069

and file names for obtaining data.

3070

:param reload_func: A function to call if we determine that the pack

3071

files have moved and we need to reload our caches. See

3072

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

3073

"""

3074

self._container_writer = None

3075

self._write_index = None

3076

self._indices = index_to_packs

3077

self._reload_func = reload_func

3078

3079

def add_raw_records(self, key_sizes, raw_data):

3080

"""Add raw knit bytes to a storage area.

3081

3082

The data is spooled to the container writer in one bytes-record per

3083

raw data item.

3084

3085

:param sizes: An iterable of tuples containing the key and size of each

3086

raw data segment.

3087

:param raw_data: A bytestring containing the data.

3088

:return: A list of memos to retrieve the record later. Each memo is an

3089

opaque index memo. For _DirectPackAccess the memo is (index, pos,

3090

length), where the index field is the write_index object supplied

3091

to the PackAccess object.

3092

"""

3093

if type(raw_data) != str:

3094

raise AssertionError(

3095

'data must be plain bytes was %s' % type(raw_data))

3096

result = []

3097

offset = 0

3098

for key, size in key_sizes:

3099

p_offset, p_length = self._container_writer.add_bytes_record(

3100

raw_data[offset:offset+size], [])

3101

offset += size

3102

result.append((self._write_index, p_offset, p_length))

3103

return result

3104

3105

def get_raw_records(self, memos_for_retrieval):

3106

"""Get the raw bytes for a records.

3107

3108

:param memos_for_retrieval: An iterable containing the (index, pos,

3109

length) memo for retrieving the bytes. The Pack access method

3110

looks up the pack to use for a given record in its index_to_pack

3111

map.

3112

:return: An iterator over the bytes of the records.

3113

"""

3114

# first pass, group into same-index requests

3115

request_lists = []

3116

current_index = None

3117

for (index, offset, length) in memos_for_retrieval:

3118

if current_index == index:

3119

current_list.append((offset, length))

3120

else:

3121

if current_index is not None:

3122

request_lists.append((current_index, current_list))

3123

current_index = index

3124

current_list = [(offset, length)]

3125

# handle the last entry

3126

if current_index is not None:

3127

request_lists.append((current_index, current_list))

3128

for index, offsets in request_lists:

3129

try:

3130

transport, path = self._indices[index]

3131

except KeyError:

3132

# A KeyError here indicates that someone has triggered an index

3133

# reload, and this index has gone missing, we need to start

3134

# over.

3135

if self._reload_func is None:

3136

# If we don't have a _reload_func there is nothing that can

3137

# be done

3138

raise

3139

raise errors.RetryWithNewPacks(index,

3140

reload_occurred=True,

3141

exc_info=sys.exc_info())

3142

try:

3143

reader = pack.make_readv_reader(transport, path, offsets)

3144

for names, read_func in reader.iter_records():

3145

yield read_func(None)

3146

except errors.NoSuchFile:

3147

# A NoSuchFile error indicates that a pack file has gone

3148

# missing on disk, we need to trigger a reload, and start over.

3149

if self._reload_func is None:

3150

raise

3151

raise errors.RetryWithNewPacks(transport.abspath(path),

3152

reload_occurred=False,

3153

exc_info=sys.exc_info())

3154

3155

def set_writer(self, writer, index, transport_packname):

3156

"""Set a writer to use for adding data."""

3157

if index is not None:

3158

self._indices[index] = transport_packname

3159

self._container_writer = writer

3160

self._write_index = index

3161

3162

def reload_or_raise(self, retry_exc):

3163

"""Try calling the reload function, or re-raise the original exception.

3164

3165

This should be called after _DirectPackAccess raises a

3166

RetryWithNewPacks exception. This function will handle the common logic

3167

of determining when the error is fatal versus being temporary.

3168

It will also make sure that the original exception is raised, rather

3169

than the RetryWithNewPacks exception.

3170

3171

If this function returns, then the calling function should retry

3172

whatever operation was being performed. Otherwise an exception will

3173

be raised.

3174

3175

:param retry_exc: A RetryWithNewPacks exception.

3176

"""

3177

is_error = False

3178

if self._reload_func is None:

3179

is_error = True

3180

elif not self._reload_func():

3181

# The reload claimed that nothing changed

3182

if not retry_exc.reload_occurred:

3183

# If there wasn't an earlier reload, then we really were

3184

# expecting to find changes. We didn't find them, so this is a

3185

# hard error

3186

is_error = True

3187

if is_error:

3188

exc_class, exc_value, exc_traceback = retry_exc.exc_info

3189

raise exc_class, exc_value, exc_traceback

3190

3191

3192

# Deprecated, use PatienceSequenceMatcher instead

3193

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

3194

3195

3196

def annotate_knit(knit, revision_id):

3197

"""Annotate a knit with no cached annotations.

3198

3199

This implementation is for knits with no cached annotations.

3200

It will work for knits with cached annotations, but this is not

3201

recommended.

3202

"""

3203

annotator = _KnitAnnotator(knit)

3204

return iter(annotator.annotate(revision_id))

3205

3206

3207

class _KnitAnnotator(object):

3208

"""Build up the annotations for a text."""

3209

3210

def __init__(self, knit):

3211

self._knit = knit

3212

3213

# Content objects, differs from fulltexts because of how final newlines

3214

# are treated by knits. the content objects here will always have a

3215

# final newline

3216

self._fulltext_contents = {}

3217

3218

# Annotated lines of specific revisions

3219

self._annotated_lines = {}

3220

3221

# Track the raw data for nodes that we could not process yet.

3222

# This maps the revision_id of the base to a list of children that will

3223

# annotated from it.

3224

self._pending_children = {}

3225

3226

# Nodes which cannot be extracted

3227

self._ghosts = set()

3228

3229

# Track how many children this node has, so we know if we need to keep

3230

# it

3231

self._annotate_children = {}

3232

self._compression_children = {}

3233

3234

self._all_build_details = {}

3235

# The children => parent revision_id graph

3236

self._revision_id_graph = {}

3237

3238

self._heads_provider = None

3239

3240

self._nodes_to_keep_annotations = set()

3241

self._generations_until_keep = 100

3242

3243

def set_generations_until_keep(self, value):

3244

"""Set the number of generations before caching a node.

3245

3246

Setting this to -1 will cache every merge node, setting this higher

3247

will cache fewer nodes.

3248

"""

3249

self._generations_until_keep = value

3250

3251

def _add_fulltext_content(self, revision_id, content_obj):

3252

self._fulltext_contents[revision_id] = content_obj

3253

# TODO: jam 20080305 It might be good to check the sha1digest here

3254

return content_obj.text()

3255

3256

def _check_parents(self, child, nodes_to_annotate):

3257

"""Check if all parents have been processed.

3258

3259

:param child: A tuple of (rev_id, parents, raw_content)

3260

:param nodes_to_annotate: If child is ready, add it to

3261

nodes_to_annotate, otherwise put it back in self._pending_children

3262

"""

3263

for parent_id in child[1]:

3264

if (parent_id not in self._annotated_lines):

3265

# This parent is present, but another parent is missing

3266

self._pending_children.setdefault(parent_id,

3267

[]).append(child)

3268

break

3269

else:

3270

# This one is ready to be processed

3271

nodes_to_annotate.append(child)

3272

3273

def _add_annotation(self, revision_id, fulltext, parent_ids,

3274

left_matching_blocks=None):

3275

"""Add an annotation entry.

3276

3277

All parents should already have been annotated.

3278

:return: A list of children that now have their parents satisfied.

3279

"""

3280

a = self._annotated_lines

3281

annotated_parent_lines = [a[p] for p in parent_ids]

3282

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3283

fulltext, revision_id, left_matching_blocks,

3284

heads_provider=self._get_heads_provider()))

3285

self._annotated_lines[revision_id] = annotated_lines

3286

for p in parent_ids:

3287

ann_children = self._annotate_children[p]

3288

ann_children.remove(revision_id)

3289

if (not ann_children

3290

and p not in self._nodes_to_keep_annotations):

3291

del self._annotated_lines[p]

3292

del self._all_build_details[p]

3293

if p in self._fulltext_contents:

3294

del self._fulltext_contents[p]

3295

# Now that we've added this one, see if there are any pending

3296

# deltas to be done, certainly this parent is finished

3297

nodes_to_annotate = []

3298

for child in self._pending_children.pop(revision_id, []):

3299

self._check_parents(child, nodes_to_annotate)

3300

return nodes_to_annotate

3301

3302

def _get_build_graph(self, key):

3303

"""Get the graphs for building texts and annotations.

3304

3305

The data you need for creating a full text may be different than the

3306

data you need to annotate that text. (At a minimum, you need both

3307

parents to create an annotation, but only need 1 parent to generate the

3308

fulltext.)

3309

3310

:return: A list of (key, index_memo) records, suitable for

3311

passing to read_records_iter to start reading in the raw data fro/

3312

the pack file.

3313

"""

3314

if key in self._annotated_lines:

3315

# Nothing to do

3316

return []

3317

pending = set([key])

3318

records = []

3319

generation = 0

3320

kept_generation = 0

3321

while pending:

3322

# get all pending nodes

3323

generation += 1

3324

this_iteration = pending

3325

build_details = self._knit._index.get_build_details(this_iteration)

3326

self._all_build_details.update(build_details)

3327

# new_nodes = self._knit._index._get_entries(this_iteration)

3328

pending = set()

3329

for key, details in build_details.iteritems():

3330

(index_memo, compression_parent, parents,

3331

record_details) = details

3332

self._revision_id_graph[key] = parents

3333

records.append((key, index_memo))

3334

# Do we actually need to check _annotated_lines?

3335

pending.update(p for p in parents

3336

if p not in self._all_build_details)

3337

if compression_parent:

3338

self._compression_children.setdefault(compression_parent,

3339

[]).append(key)

3340

if parents:

3341

for parent in parents:

3342

self._annotate_children.setdefault(parent,

3343

[]).append(key)

3344

num_gens = generation - kept_generation

3345

if ((num_gens >= self._generations_until_keep)

3346

and len(parents) > 1):

3347

kept_generation = generation

3348

self._nodes_to_keep_annotations.add(key)

3349

3350

missing_versions = this_iteration.difference(build_details.keys())

3351

self._ghosts.update(missing_versions)

3352

for missing_version in missing_versions:

3353

# add a key, no parents

3354

self._revision_id_graph[missing_version] = ()

3355

pending.discard(missing_version) # don't look for it

3356

if self._ghosts.intersection(self._compression_children):

3357

raise KnitCorrupt(

3358

"We cannot have nodes which have a ghost compression parent:\n"

3359

"ghosts: %r\n"

3360

"compression children: %r"

3361

% (self._ghosts, self._compression_children))

3362

# Cleanout anything that depends on a ghost so that we don't wait for

3363

# the ghost to show up

3364

for node in self._ghosts:

3365

if node in self._annotate_children:

3366

# We won't be building this node

3367

del self._annotate_children[node]

3368

# Generally we will want to read the records in reverse order, because

3369

# we find the parent nodes after the children

3370

records.reverse()

3371

return records

3372

3373

def _annotate_records(self, records):

3374

"""Build the annotations for the listed records."""

3375

# We iterate in the order read, rather than a strict order requested

3376

# However, process what we can, and put off to the side things that

3377

# still need parents, cleaning them up when those parents are

3378

# processed.

3379

for (rev_id, record,

3380

digest) in self._knit._read_records_iter(records):

3381

if rev_id in self._annotated_lines:

3382

continue

3383

parent_ids = self._revision_id_graph[rev_id]

3384

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3385

details = self._all_build_details[rev_id]

3386

(index_memo, compression_parent, parents,

3387

record_details) = details

3388

nodes_to_annotate = []

3389

# TODO: Remove the punning between compression parents, and

3390

# parent_ids, we should be able to do this without assuming

3391

# the build order

3392

if len(parent_ids) == 0:

3393

# There are no parents for this node, so just add it

3394

# TODO: This probably needs to be decoupled

3395

fulltext_content, delta = self._knit._factory.parse_record(

3396

rev_id, record, record_details, None)

3397

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3398

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3399

parent_ids, left_matching_blocks=None))

3400

else:

3401

child = (rev_id, parent_ids, record)

3402

# Check if all the parents are present

3403

self._check_parents(child, nodes_to_annotate)

3404

while nodes_to_annotate:

3405

# Should we use a queue here instead of a stack?

3406

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3407

(index_memo, compression_parent, parents,

3408

record_details) = self._all_build_details[rev_id]

3409

blocks = None

3410

if compression_parent is not None:

3411

comp_children = self._compression_children[compression_parent]

3412

if rev_id not in comp_children:

3413

raise AssertionError("%r not in compression children %r"

3414

% (rev_id, comp_children))

3415

# If there is only 1 child, it is safe to reuse this

3416

# content

3417

reuse_content = (len(comp_children) == 1

3418

and compression_parent not in

3419

self._nodes_to_keep_annotations)

3420

if reuse_content:

3421

# Remove it from the cache since it will be changing

3422

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3423

# Make sure to copy the fulltext since it might be

3424

# modified

3425

parent_fulltext = list(parent_fulltext_content.text())

3426

else:

3427

parent_fulltext_content = self._fulltext_contents[compression_parent]

3428

parent_fulltext = parent_fulltext_content.text()

3429

comp_children.remove(rev_id)

3430

fulltext_content, delta = self._knit._factory.parse_record(

3431

rev_id, record, record_details,

3432

parent_fulltext_content,

3433

copy_base_content=(not reuse_content))

3434

fulltext = self._add_fulltext_content(rev_id,

3435

fulltext_content)

3436

if compression_parent == parent_ids[0]:

3437

# the compression_parent is the left parent, so we can

3438

# re-use the delta

3439

blocks = KnitContent.get_line_delta_blocks(delta,

3440

parent_fulltext, fulltext)

3441

else:

3442

fulltext_content = self._knit._factory.parse_fulltext(

3443

record, rev_id)

3444

fulltext = self._add_fulltext_content(rev_id,

3445

fulltext_content)

3446

nodes_to_annotate.extend(

3447

self._add_annotation(rev_id, fulltext, parent_ids,

3448

left_matching_blocks=blocks))

3449

3450

def _get_heads_provider(self):

3451

"""Create a heads provider for resolving ancestry issues."""

3452

if self._heads_provider is not None:

3453

return self._heads_provider

3454

parent_provider = _mod_graph.DictParentsProvider(

3455

self._revision_id_graph)

3456

graph_obj = _mod_graph.Graph(parent_provider)

3457

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3458

self._heads_provider = head_cache

3459

return head_cache

3460

3461

def annotate(self, key):

3462

"""Return the annotated fulltext at the given key.

3463

3464

:param key: The key to annotate.

3465

"""

3466

if len(self._knit._fallback_vfs) > 0:

3467

# stacked knits can't use the fast path at present.

3468

return self._simple_annotate(key)

3469

while True:

3470

try:

3471

records = self._get_build_graph(key)

3472

if key in self._ghosts:

3473

raise errors.RevisionNotPresent(key, self._knit)

3474

self._annotate_records(records)

3475

return self._annotated_lines[key]

3476

except errors.RetryWithNewPacks, e:

3477

self._knit._access.reload_or_raise(e)

3478

# The cached build_details are no longer valid

3479

self._all_build_details.clear()

3480

3481

def _simple_annotate(self, key):

3482

"""Return annotated fulltext, rediffing from the full texts.

3483

3484

This is slow but makes no assumptions about the repository

3485

being able to produce line deltas.

3486

"""

3487

# TODO: this code generates a parent maps of present ancestors; it

3488

# could be split out into a separate method, and probably should use

3489

# iter_ancestry instead. -- mbp and robertc 20080704

3490

graph = _mod_graph.Graph(self._knit)

3491

head_cache = _mod_graph.FrozenHeadsCache(graph)

3492

search = graph._make_breadth_first_searcher([key])

3493

keys = set()

3494

while True:

3495

try:

3496

present, ghosts = search.next_with_ghosts()

3497

except StopIteration:

3498

break

3499

keys.update(present)

3500

parent_map = self._knit.get_parent_map(keys)

3501

parent_cache = {}

3502

reannotate = annotate.reannotate

3503

for record in self._knit.get_record_stream(keys, 'topological', True):

3504

key = record.key

3505

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

3506

parents = parent_map[key]

3507

if parents is not None:

3508

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

3509

else:

3510

parent_lines = []

3511

parent_cache[key] = list(

3512

reannotate(parent_lines, fulltext, key, None, head_cache))

3513

try:

3514

return parent_cache[key]

3515

except KeyError, e:

3516

raise errors.RevisionNotPresent(key, self._knit)

3517

3518

3519

try:

3520

from bzrlib._knit_load_data_c import _load_data_c as _load_data

3521

except ImportError:

3522

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »