/brz/remove-bazaar : revision 4145.2.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Ian Clatworthy
Date: 2009-03-16 13:16:44 UTC
mto: (4150.1.1 ianc-integration)
mto: This revision was merged to the branch mainline in revision 4151.
Revision ID: ian.clatworthy@canonical.com-20090316131644-r7kevoidcxw30i8d

faster check

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2-windows.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/check-newsbugs.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

errors,

osutils,

patiencediff,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

SHA1KnitCorrupt,

)

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_string,

sha_strings,

split_lines,

)

100

from bzrlib.versionedfile import (

101

AbsentContentFactory,

102

adapter_registry,

103

ConstantMapper,

104

ContentFactory,

105

ChunkedContentFactory,

106

sort_groupcompress,

107

VersionedFile,

108

VersionedFiles,

109

)

110

111

112

# TODO: Split out code specific to this format into an associated object.

113

114

# TODO: Can we put in some kind of value to check that the index and data

115

# files belong together?

116

117

# TODO: accommodate binaries, perhaps by storing a byte count

118

119

# TODO: function to check whole file

120

121

# TODO: atomically append data, then measure backwards from the cursor

122

# position after writing to work out where it was located. we may need to

123

# bypass python file buffering.

124

125

DATA_SUFFIX = '.knit'

126

INDEX_SUFFIX = '.kndx'

127

_STREAM_MIN_BUFFER_SIZE = 5*1024*1024

128

129

130

class KnitAdapter(object):

131

"""Base class for knit record adaption."""

132

133

def __init__(self, basis_vf):

134

"""Create an adapter which accesses full texts from basis_vf.

135

136

:param basis_vf: A versioned file to access basis texts of deltas from.

137

May be None for adapters that do not need to access basis texts.

138

"""

139

self._data = KnitVersionedFiles(None, None)

140

self._annotate_factory = KnitAnnotateFactory()

141

self._plain_factory = KnitPlainFactory()

142

self._basis_vf = basis_vf

143

144

145

class FTAnnotatedToUnannotated(KnitAdapter):

146

"""An adapter from FT annotated knits to unannotated ones."""

147

148

def get_bytes(self, factory):

149

annotated_compressed_bytes = factory._raw_record

150

rec, contents = \

151

self._data._parse_record_unchecked(annotated_compressed_bytes)

152

content = self._annotate_factory.parse_fulltext(contents, rec[1])

153

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

154

return bytes

155

156

157

class DeltaAnnotatedToUnannotated(KnitAdapter):

158

"""An adapter for deltas from annotated to unannotated."""

159

160

def get_bytes(self, factory):

161

annotated_compressed_bytes = factory._raw_record

162

rec, contents = \

163

self._data._parse_record_unchecked(annotated_compressed_bytes)

164

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

165

plain=True)

166

contents = self._plain_factory.lower_line_delta(delta)

167

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

168

return bytes

169

170

171

class FTAnnotatedToFullText(KnitAdapter):

172

"""An adapter from FT annotated knits to unannotated ones."""

173

174

def get_bytes(self, factory):

175

annotated_compressed_bytes = factory._raw_record

176

rec, contents = \

177

self._data._parse_record_unchecked(annotated_compressed_bytes)

178

content, delta = self._annotate_factory.parse_record(factory.key[-1],

179

contents, factory._build_details, None)

180

return ''.join(content.text())

181

182

183

class DeltaAnnotatedToFullText(KnitAdapter):

184

"""An adapter for deltas from annotated to unannotated."""

185

186

def get_bytes(self, factory):

187

annotated_compressed_bytes = factory._raw_record

188

rec, contents = \

189

self._data._parse_record_unchecked(annotated_compressed_bytes)

190

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

191

plain=True)

192

compression_parent = factory.parents[0]

193

basis_entry = self._basis_vf.get_record_stream(

194

[compression_parent], 'unordered', True).next()

195

if basis_entry.storage_kind == 'absent':

196

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

197

basis_chunks = basis_entry.get_bytes_as('chunked')

198

basis_lines = osutils.chunks_to_lines(basis_chunks)

199

# Manually apply the delta because we have one annotated content and

200

# one plain.

201

basis_content = PlainKnitContent(basis_lines, compression_parent)

202

basis_content.apply_delta(delta, rec[1])

203

basis_content._should_strip_eol = factory._build_details[1]

204

return ''.join(basis_content.text())

205

206

207

class FTPlainToFullText(KnitAdapter):

208

"""An adapter from FT plain knits to unannotated ones."""

209

210

def get_bytes(self, factory):

211

compressed_bytes = factory._raw_record

212

rec, contents = \

213

self._data._parse_record_unchecked(compressed_bytes)

214

content, delta = self._plain_factory.parse_record(factory.key[-1],

215

contents, factory._build_details, None)

216

return ''.join(content.text())

217

218

219

class DeltaPlainToFullText(KnitAdapter):

220

"""An adapter for deltas from annotated to unannotated."""

221

222

def get_bytes(self, factory):

223

compressed_bytes = factory._raw_record

224

rec, contents = \

225

self._data._parse_record_unchecked(compressed_bytes)

226

delta = self._plain_factory.parse_line_delta(contents, rec[1])

227

compression_parent = factory.parents[0]

228

# XXX: string splitting overhead.

229

basis_entry = self._basis_vf.get_record_stream(

230

[compression_parent], 'unordered', True).next()

231

if basis_entry.storage_kind == 'absent':

232

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

233

basis_chunks = basis_entry.get_bytes_as('chunked')

234

basis_lines = osutils.chunks_to_lines(basis_chunks)

235

basis_content = PlainKnitContent(basis_lines, compression_parent)

236

# Manually apply the delta because we have one annotated content and

237

# one plain.

238

content, _ = self._plain_factory.parse_record(rec[1], contents,

239

factory._build_details, basis_content)

240

return ''.join(content.text())

241

242

243

class KnitContentFactory(ContentFactory):

244

"""Content factory for streaming from knits.

245

246

:seealso ContentFactory:

247

"""

248

249

def __init__(self, key, parents, build_details, sha1, raw_record,

250

annotated, knit=None, network_bytes=None):

251

"""Create a KnitContentFactory for key.

252

253

:param key: The key.

254

:param parents: The parents.

255

:param build_details: The build details as returned from

256

get_build_details.

257

:param sha1: The sha1 expected from the full text of this object.

258

:param raw_record: The bytes of the knit data from disk.

259

:param annotated: True if the raw data is annotated.

260

:param network_bytes: None to calculate the network bytes on demand,

261

not-none if they are already known.

262

"""

263

ContentFactory.__init__(self)

264

self.sha1 = sha1

265

self.key = key

266

self.parents = parents

267

if build_details[0] == 'line-delta':

268

kind = 'delta'

269

else:

270

kind = 'ft'

271

if annotated:

272

annotated_kind = 'annotated-'

273

else:

274

annotated_kind = ''

275

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

276

self._raw_record = raw_record

277

self._network_bytes = network_bytes

278

self._build_details = build_details

279

self._knit = knit

280

281

def _create_network_bytes(self):

282

"""Create a fully serialised network version for transmission."""

283

# storage_kind, key, parents, Noeol, raw_record

284

key_bytes = '\x00'.join(self.key)

285

if self.parents is None:

286

parent_bytes = 'None:'

287

else:

288

parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents)

289

if self._build_details[1]:

290

noeol = 'N'

291

else:

292

noeol = ' '

293

network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes,

294

parent_bytes, noeol, self._raw_record)

295

self._network_bytes = network_bytes

296

297

def get_bytes_as(self, storage_kind):

298

if storage_kind == self.storage_kind:

299

if self._network_bytes is None:

300

self._create_network_bytes()

301

return self._network_bytes

302

if self._knit is not None:

303

if storage_kind == 'chunked':

304

return self._knit.get_lines(self.key[0])

305

elif storage_kind == 'fulltext':

306

return self._knit.get_text(self.key[0])

307

raise errors.UnavailableRepresentation(self.key, storage_kind,

308

self.storage_kind)

309

310

311

class LazyKnitContentFactory(ContentFactory):

312

"""A ContentFactory which can either generate full text or a wire form.

313

314

:seealso ContentFactory:

315

"""

316

317

def __init__(self, key, parents, generator, first):

318

"""Create a LazyKnitContentFactory.

319

320

:param key: The key of the record.

321

:param parents: The parents of the record.

322

:param generator: A _ContentMapGenerator containing the record for this

323

key.

324

:param first: Is this the first content object returned from generator?

325

if it is, its storage kind is knit-delta-closure, otherwise it is

326

knit-delta-closure-ref

327

"""

328

self.key = key

329

self.parents = parents

330

self.sha1 = None

331

self._generator = generator

332

self.storage_kind = "knit-delta-closure"

333

if not first:

334

self.storage_kind = self.storage_kind + "-ref"

335

self._first = first

336

337

def get_bytes_as(self, storage_kind):

338

if storage_kind == self.storage_kind:

339

if self._first:

340

return self._generator._wire_bytes()

341

else:

342

# all the keys etc are contained in the bytes returned in the

343

# first record.

344

return ''

345

if storage_kind in ('chunked', 'fulltext'):

346

chunks = self._generator._get_one_work(self.key).text()

347

if storage_kind == 'chunked':

348

return chunks

349

else:

350

return ''.join(chunks)

351

raise errors.UnavailableRepresentation(self.key, storage_kind,

352

self.storage_kind)

353

354

355

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

356

"""Convert a network record to a iterator over stream records.

357

358

:param storage_kind: The storage kind of the record.

359

Must be 'knit-delta-closure'.

360

:param bytes: The bytes of the record on the network.

361

"""

362

generator = _NetworkContentMapGenerator(bytes, line_end)

363

return generator.get_record_stream()

364

365

366

def knit_network_to_record(storage_kind, bytes, line_end):

367

"""Convert a network record to a record object.

368

369

:param storage_kind: The storage kind of the record.

370

:param bytes: The bytes of the record on the network.

371

"""

372

start = line_end

373

line_end = bytes.find('\n', start)

374

key = tuple(bytes[start:line_end].split('\x00'))

375

start = line_end + 1

376

line_end = bytes.find('\n', start)

377

parent_line = bytes[start:line_end]

378

if parent_line == 'None:':

379

parents = None

380

else:

381

parents = tuple(

382

[tuple(segment.split('\x00')) for segment in parent_line.split('\t')

383

if segment])

384

start = line_end + 1

385

noeol = bytes[start] == 'N'

386

if 'ft' in storage_kind:

387

method = 'fulltext'

388

else:

389

method = 'line-delta'

390

build_details = (method, noeol)

391

start = start + 1

392

raw_record = bytes[start:]

393

annotated = 'annotated' in storage_kind

394

return [KnitContentFactory(key, parents, build_details, None, raw_record,

395

annotated, network_bytes=bytes)]

396

397

398

class KnitContent(object):

399

"""Content of a knit version to which deltas can be applied.

400

401

This is always stored in memory as a list of lines with \n at the end,

402

plus a flag saying if the final ending is really there or not, because that

403

corresponds to the on-disk knit representation.

404

"""

405

406

def __init__(self):

407

self._should_strip_eol = False

408

409

def apply_delta(self, delta, new_version_id):

410

"""Apply delta to this object to become new_version_id."""

411

raise NotImplementedError(self.apply_delta)

412

413

def line_delta_iter(self, new_lines):

414

"""Generate line-based delta from this content to new_lines."""

415

new_texts = new_lines.text()

416

old_texts = self.text()

417

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

418

for tag, i1, i2, j1, j2 in s.get_opcodes():

419

if tag == 'equal':

420

continue

421

# ofrom, oto, length, data

422

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

423

424

def line_delta(self, new_lines):

425

return list(self.line_delta_iter(new_lines))

426

427

@staticmethod

428

def get_line_delta_blocks(knit_delta, source, target):

429

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

430

target_len = len(target)

431

s_pos = 0

432

t_pos = 0

433

for s_begin, s_end, t_len, new_text in knit_delta:

434

true_n = s_begin - s_pos

435

n = true_n

436

if n > 0:

437

# knit deltas do not provide reliable info about whether the

438

# last line of a file matches, due to eol handling.

439

if source[s_pos + n -1] != target[t_pos + n -1]:

440

n-=1

441

if n > 0:

442

yield s_pos, t_pos, n

443

t_pos += t_len + true_n

444

s_pos = s_end

445

n = target_len - t_pos

446

if n > 0:

447

if source[s_pos + n -1] != target[t_pos + n -1]:

448

n-=1

449

if n > 0:

450

yield s_pos, t_pos, n

451

yield s_pos + (target_len - t_pos), target_len, 0

452

453

454

class AnnotatedKnitContent(KnitContent):

455

"""Annotated content."""

456

457

def __init__(self, lines):

458

KnitContent.__init__(self)

459

self._lines = lines

460

461

def annotate(self):

462

"""Return a list of (origin, text) for each content line."""

463

lines = self._lines[:]

464

if self._should_strip_eol:

465

origin, last_line = lines[-1]

466

lines[-1] = (origin, last_line.rstrip('\n'))

467

return lines

468

469

def apply_delta(self, delta, new_version_id):

470

"""Apply delta to this object to become new_version_id."""

471

offset = 0

472

lines = self._lines

473

for start, end, count, delta_lines in delta:

474

lines[offset+start:offset+end] = delta_lines

475

offset = offset + (start - end) + count

476

477

def text(self):

478

try:

479

lines = [text for origin, text in self._lines]

480

except ValueError, e:

481

# most commonly (only?) caused by the internal form of the knit

482

# missing annotation information because of a bug - see thread

483

# around 20071015

484

raise KnitCorrupt(self,

485

"line in annotated knit missing annotation information: %s"

486

% (e,))

487

if self._should_strip_eol:

488

lines[-1] = lines[-1].rstrip('\n')

489

return lines

490

491

def copy(self):

492

return AnnotatedKnitContent(self._lines[:])

493

494

495

class PlainKnitContent(KnitContent):

496

"""Unannotated content.

497

498

When annotate[_iter] is called on this content, the same version is reported

499

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

500

objects.

501

"""

502

503

def __init__(self, lines, version_id):

504

KnitContent.__init__(self)

505

self._lines = lines

506

self._version_id = version_id

507

508

def annotate(self):

509

"""Return a list of (origin, text) for each content line."""

510

return [(self._version_id, line) for line in self._lines]

511

512

def apply_delta(self, delta, new_version_id):

513

"""Apply delta to this object to become new_version_id."""

514

offset = 0

515

lines = self._lines

516

for start, end, count, delta_lines in delta:

517

lines[offset+start:offset+end] = delta_lines

518

offset = offset + (start - end) + count

519

self._version_id = new_version_id

520

521

def copy(self):

522

return PlainKnitContent(self._lines[:], self._version_id)

523

524

def text(self):

525

lines = self._lines

526

if self._should_strip_eol:

527

lines = lines[:]

528

lines[-1] = lines[-1].rstrip('\n')

529

return lines

530

531

532

class _KnitFactory(object):

533

"""Base class for common Factory functions."""

534

535

def parse_record(self, version_id, record, record_details,

536

base_content, copy_base_content=True):

537

"""Parse a record into a full content object.

538

539

:param version_id: The official version id for this content

540

:param record: The data returned by read_records_iter()

541

:param record_details: Details about the record returned by

542

get_build_details

543

:param base_content: If get_build_details returns a compression_parent,

544

you must return a base_content here, else use None

545

:param copy_base_content: When building from the base_content, decide

546

you can either copy it and return a new object, or modify it in

547

place.

548

:return: (content, delta) A Content object and possibly a line-delta,

549

delta may be None

550

"""

551

method, noeol = record_details

552

if method == 'line-delta':

553

if copy_base_content:

554

content = base_content.copy()

555

else:

556

content = base_content

557

delta = self.parse_line_delta(record, version_id)

558

content.apply_delta(delta, version_id)

559

else:

560

content = self.parse_fulltext(record, version_id)

561

delta = None

562

content._should_strip_eol = noeol

563

return (content, delta)

564

565

566

class KnitAnnotateFactory(_KnitFactory):

567

"""Factory for creating annotated Content objects."""

568

569

annotated = True

570

571

def make(self, lines, version_id):

572

num_lines = len(lines)

573

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

574

575

def parse_fulltext(self, content, version_id):

576

"""Convert fulltext to internal representation

577

578

fulltext content is of the format

579

revid(utf8) plaintext\n

580

internal representation is of the format:

581

(revid, plaintext)

582

"""

583

# TODO: jam 20070209 The tests expect this to be returned as tuples,

584

# but the code itself doesn't really depend on that.

585

# Figure out a way to not require the overhead of turning the

586

# list back into tuples.

587

lines = [tuple(line.split(' ', 1)) for line in content]

588

return AnnotatedKnitContent(lines)

589

590

def parse_line_delta_iter(self, lines):

591

return iter(self.parse_line_delta(lines))

592

593

def parse_line_delta(self, lines, version_id, plain=False):

594

"""Convert a line based delta into internal representation.

595

596

line delta is in the form of:

597

intstart intend intcount

598

1..count lines:

599

revid(utf8) newline\n

600

internal representation is

601

(start, end, count, [1..count tuples (revid, newline)])

602

603

:param plain: If True, the lines are returned as a plain

604

list without annotations, not as a list of (origin, content) tuples, i.e.

605

(start, end, count, [1..count newline])

606

"""

607

result = []

608

lines = iter(lines)

609

next = lines.next

610

611

cache = {}

612

def cache_and_return(line):

613

origin, text = line.split(' ', 1)

614

return cache.setdefault(origin, origin), text

615

616

# walk through the lines parsing.

617

# Note that the plain test is explicitly pulled out of the

618

# loop to minimise any performance impact

619

if plain:

620

for header in lines:

621

start, end, count = [int(n) for n in header.split(',')]

622

contents = [next().split(' ', 1)[1] for i in xrange(count)]

623

result.append((start, end, count, contents))

624

else:

625

for header in lines:

626

start, end, count = [int(n) for n in header.split(',')]

627

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

628

result.append((start, end, count, contents))

629

return result

630

631

def get_fulltext_content(self, lines):

632

"""Extract just the content lines from a fulltext."""

633

return (line.split(' ', 1)[1] for line in lines)

634

635

def get_linedelta_content(self, lines):

636

"""Extract just the content from a line delta.

637

638

This doesn't return all of the extra information stored in a delta.

639

Only the actual content lines.

640

"""

641

lines = iter(lines)

642

next = lines.next

643

for header in lines:

644

header = header.split(',')

645

count = int(header[2])

646

for i in xrange(count):

647

origin, text = next().split(' ', 1)

648

yield text

649

650

def lower_fulltext(self, content):

651

"""convert a fulltext content record into a serializable form.

652

653

see parse_fulltext which this inverts.

654

"""

655

# TODO: jam 20070209 We only do the caching thing to make sure that

656

# the origin is a valid utf-8 line, eventually we could remove it

657

return ['%s %s' % (o, t) for o, t in content._lines]

658

659

def lower_line_delta(self, delta):

660

"""convert a delta into a serializable form.

661

662

See parse_line_delta which this inverts.

663

"""

664

# TODO: jam 20070209 We only do the caching thing to make sure that

665

# the origin is a valid utf-8 line, eventually we could remove it

666

out = []

667

for start, end, c, lines in delta:

668

out.append('%d,%d,%d\n' % (start, end, c))

669

out.extend(origin + ' ' + text

670

for origin, text in lines)

671

return out

672

673

def annotate(self, knit, key):

674

content = knit._get_content(key)

675

# adjust for the fact that serialised annotations are only key suffixes

676

# for this factory.

677

if type(key) == tuple:

678

prefix = key[:-1]

679

origins = content.annotate()

680

result = []

681

for origin, line in origins:

682

result.append((prefix + (origin,), line))

683

return result

684

else:

685

# XXX: This smells a bit. Why would key ever be a non-tuple here?

686

# Aren't keys defined to be tuples? -- spiv 20080618

687

return content.annotate()

688

689

690

class KnitPlainFactory(_KnitFactory):

691

"""Factory for creating plain Content objects."""

692

693

annotated = False

694

695

def make(self, lines, version_id):

696

return PlainKnitContent(lines, version_id)

697

698

def parse_fulltext(self, content, version_id):

699

"""This parses an unannotated fulltext.

700

701

Note that this is not a noop - the internal representation

702

has (versionid, line) - its just a constant versionid.

703

"""

704

return self.make(content, version_id)

705

706

def parse_line_delta_iter(self, lines, version_id):

707

cur = 0

708

num_lines = len(lines)

709

while cur < num_lines:

710

header = lines[cur]

711

cur += 1

712

start, end, c = [int(n) for n in header.split(',')]

713

yield start, end, c, lines[cur:cur+c]

714

cur += c

715

716

def parse_line_delta(self, lines, version_id):

717

return list(self.parse_line_delta_iter(lines, version_id))

718

719

def get_fulltext_content(self, lines):

720

"""Extract just the content lines from a fulltext."""

721

return iter(lines)

722

723

def get_linedelta_content(self, lines):

724

"""Extract just the content from a line delta.

725

726

This doesn't return all of the extra information stored in a delta.

727

Only the actual content lines.

728

"""

729

lines = iter(lines)

730

next = lines.next

731

for header in lines:

732

header = header.split(',')

733

count = int(header[2])

734

for i in xrange(count):

735

yield next()

736

737

def lower_fulltext(self, content):

738

return content.text()

739

740

def lower_line_delta(self, delta):

741

out = []

742

for start, end, c, lines in delta:

743

out.append('%d,%d,%d\n' % (start, end, c))

744

out.extend(lines)

745

return out

746

747

def annotate(self, knit, key):

748

annotator = _KnitAnnotator(knit)

749

return annotator.annotate(key)

750

751

752

753

def make_file_factory(annotated, mapper):

754

"""Create a factory for creating a file based KnitVersionedFiles.

755

756

This is only functional enough to run interface tests, it doesn't try to

757

provide a full pack environment.

758

759

:param annotated: knit annotations are wanted.

760

:param mapper: The mapper from keys to paths.

761

"""

762

def factory(transport):

763

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

764

access = _KnitKeyAccess(transport, mapper)

765

return KnitVersionedFiles(index, access, annotated=annotated)

766

return factory

767

768

769

def make_pack_factory(graph, delta, keylength):

770

"""Create a factory for creating a pack based VersionedFiles.

771

772

This is only functional enough to run interface tests, it doesn't try to

773

provide a full pack environment.

774

775

:param graph: Store a graph.

776

:param delta: Delta compress contents.

777

:param keylength: How long should keys be.

778

"""

779

def factory(transport):

780

parents = graph or delta

781

ref_length = 0

782

if graph:

783

ref_length += 1

784

if delta:

785

ref_length += 1

786

max_delta_chain = 200

787

else:

788

max_delta_chain = 0

789

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

790

key_elements=keylength)

791

stream = transport.open_write_stream('newpack')

792

writer = pack.ContainerWriter(stream.write)

793

writer.begin()

794

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

795

deltas=delta, add_callback=graph_index.add_nodes)

796

access = _DirectPackAccess({})

797

access.set_writer(writer, graph_index, (transport, 'newpack'))

798

result = KnitVersionedFiles(index, access,

799

max_delta_chain=max_delta_chain)

800

result.stream = stream

801

result.writer = writer

802

return result

803

return factory

804

805

806

def cleanup_pack_knit(versioned_files):

807

versioned_files.stream.close()

808

versioned_files.writer.end()

809

810

811

def _get_total_build_size(self, keys, positions):

812

"""Determine the total bytes to build these keys.

813

814

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

815

don't inherit from a common base.)

816

817

:param keys: Keys that we want to build

818

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

819

as returned by _get_components_positions)

820

:return: Number of bytes to build those keys

821

"""

822

all_build_index_memos = {}

823

build_keys = keys

824

while build_keys:

825

next_keys = set()

826

for key in build_keys:

827

# This is mostly for the 'stacked' case

828

# Where we will be getting the data from a fallback

829

if key not in positions:

830

continue

831

_, index_memo, compression_parent = positions[key]

832

all_build_index_memos[key] = index_memo

833

if compression_parent not in all_build_index_memos:

834

next_keys.add(compression_parent)

835

build_keys = next_keys

836

return sum([index_memo[2] for index_memo

837

in all_build_index_memos.itervalues()])

838

839

840

class KnitVersionedFiles(VersionedFiles):

841

"""Storage for many versioned files using knit compression.

842

843

Backend storage is managed by indices and data objects.

844

845

:ivar _index: A _KnitGraphIndex or similar that can describe the

846

parents, graph, compression and data location of entries in this

847

KnitVersionedFiles. Note that this is only the index for

848

*this* vfs; if there are fallbacks they must be queried separately.

849

"""

850

851

def __init__(self, index, data_access, max_delta_chain=200,

852

annotated=False, reload_func=None):

853

"""Create a KnitVersionedFiles with index and data_access.

854

855

:param index: The index for the knit data.

856

:param data_access: The access object to store and retrieve knit

857

records.

858

:param max_delta_chain: The maximum number of deltas to permit during

859

insertion. Set to 0 to prohibit the use of deltas.

860

:param annotated: Set to True to cause annotations to be calculated and

861

stored during insertion.

862

:param reload_func: An function that can be called if we think we need

863

to reload the pack listing and try again. See

864

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

865

"""

866

self._index = index

867

self._access = data_access

868

self._max_delta_chain = max_delta_chain

869

if annotated:

870

self._factory = KnitAnnotateFactory()

871

else:

872

self._factory = KnitPlainFactory()

873

self._fallback_vfs = []

874

self._reload_func = reload_func

875

876

def __repr__(self):

877

return "%s(%r, %r)" % (

878

self.__class__.__name__,

879

self._index,

880

self._access)

881

882

def add_fallback_versioned_files(self, a_versioned_files):

883

"""Add a source of texts for texts not present in this knit.

884

885

:param a_versioned_files: A VersionedFiles object.

886

"""

887

self._fallback_vfs.append(a_versioned_files)

888

889

def add_lines(self, key, parents, lines, parent_texts=None,

890

left_matching_blocks=None, nostore_sha=None, random_id=False,

891

check_content=True):

892

"""See VersionedFiles.add_lines()."""

893

self._index._check_write_ok()

894

self._check_add(key, lines, random_id, check_content)

895

if parents is None:

896

# The caller might pass None if there is no graph data, but kndx

897

# indexes can't directly store that, so we give them

898

# an empty tuple instead.

899

parents = ()

900

return self._add(key, lines, parents,

901

parent_texts, left_matching_blocks, nostore_sha, random_id)

902

903

def _add(self, key, lines, parents, parent_texts,

904

left_matching_blocks, nostore_sha, random_id):

905

"""Add a set of lines on top of version specified by parents.

906

907

Any versions not present will be converted into ghosts.

908

"""

909

# first thing, if the content is something we don't need to store, find

910

# that out.

911

line_bytes = ''.join(lines)

912

digest = sha_string(line_bytes)

913

if nostore_sha == digest:

914

raise errors.ExistingContent

915

916

present_parents = []

917

if parent_texts is None:

918

parent_texts = {}

919

# Do a single query to ascertain parent presence; we only compress

920

# against parents in the same kvf.

921

present_parent_map = self._index.get_parent_map(parents)

922

for parent in parents:

923

if parent in present_parent_map:

924

present_parents.append(parent)

925

926

# Currently we can only compress against the left most present parent.

927

if (len(present_parents) == 0 or

928

present_parents[0] != parents[0]):

929

delta = False

930

else:

931

# To speed the extract of texts the delta chain is limited

932

# to a fixed number of deltas. This should minimize both

933

# I/O and the time spend applying deltas.

934

delta = self._check_should_delta(present_parents[0])

935

936

text_length = len(line_bytes)

937

options = []

938

if lines:

939

if lines[-1][-1] != '\n':

940

# copy the contents of lines.

941

lines = lines[:]

942

options.append('no-eol')

943

lines[-1] = lines[-1] + '\n'

944

line_bytes += '\n'

945

946

for element in key:

947

if type(element) != str:

948

raise TypeError("key contains non-strings: %r" % (key,))

949

# Knit hunks are still last-element only

950

version_id = key[-1]

951

content = self._factory.make(lines, version_id)

952

if 'no-eol' in options:

953

# Hint to the content object that its text() call should strip the

954

# EOL.

955

content._should_strip_eol = True

956

if delta or (self._factory.annotated and len(present_parents) > 0):

957

# Merge annotations from parent texts if needed.

958

delta_hunks = self._merge_annotations(content, present_parents,

959

parent_texts, delta, self._factory.annotated,

960

left_matching_blocks)

961

962

if delta:

963

options.append('line-delta')

964

store_lines = self._factory.lower_line_delta(delta_hunks)

965

size, bytes = self._record_to_data(key, digest,

966

store_lines)

967

else:

968

options.append('fulltext')

969

# isinstance is slower and we have no hierarchy.

970

if self._factory.__class__ is KnitPlainFactory:

971

# Use the already joined bytes saving iteration time in

972

# _record_to_data.

973

size, bytes = self._record_to_data(key, digest,

974

lines, [line_bytes])

975

else:

976

# get mixed annotation + content and feed it into the

977

# serialiser.

978

store_lines = self._factory.lower_fulltext(content)

979

size, bytes = self._record_to_data(key, digest,

980

store_lines)

981

982

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

983

self._index.add_records(

984

((key, options, access_memo, parents),),

985

random_id=random_id)

986

return digest, text_length, content

987

988

def annotate(self, key):

989

"""See VersionedFiles.annotate."""

990

return self._factory.annotate(self, key)

991

992

def check(self, progress_bar=None):

993

"""See VersionedFiles.check()."""

994

# This doesn't actually test extraction of everything, but that will

995

# impact 'bzr check' substantially, and needs to be integrated with

996

# care. However, it does check for the obvious problem of a delta with

997

# no basis.

998

keys = self._index.keys()

999

parent_map = self.get_parent_map(keys)

1000

for key in keys:

1001

if self._index.get_method(key) != 'fulltext':

1002

compression_parent = parent_map[key][0]

1003

if compression_parent not in parent_map:

1004

raise errors.KnitCorrupt(self,

1005

"Missing basis parent %s for %s" % (

1006

compression_parent, key))

1007

for fallback_vfs in self._fallback_vfs:

1008

fallback_vfs.check()

1009

1010

def _check_add(self, key, lines, random_id, check_content):

1011

"""check that version_id and lines are safe to add."""

1012

version_id = key[-1]

1013

if contains_whitespace(version_id):

1014

raise InvalidRevisionId(version_id, self)

1015

self.check_not_reserved_id(version_id)

1016

# TODO: If random_id==False and the key is already present, we should

1017

# probably check that the existing content is identical to what is

1018

# being inserted, and otherwise raise an exception. This would make

1019

# the bundle code simpler.

1020

if check_content:

1021

self._check_lines_not_unicode(lines)

1022

self._check_lines_are_lines(lines)

1023

1024

def _check_header(self, key, line):

1025

rec = self._split_header(line)

1026

self._check_header_version(rec, key[-1])

1027

return rec

1028

1029

def _check_header_version(self, rec, version_id):

1030

"""Checks the header version on original format knit records.

1031

1032

These have the last component of the key embedded in the record.

1033

"""

1034

if rec[1] != version_id:

1035

raise KnitCorrupt(self,

1036

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

1037

1038

def _check_should_delta(self, parent):

1039

"""Iterate back through the parent listing, looking for a fulltext.

1040

1041

This is used when we want to decide whether to add a delta or a new

1042

fulltext. It searches for _max_delta_chain parents. When it finds a

1043

fulltext parent, it sees if the total size of the deltas leading up to

1044

it is large enough to indicate that we want a new full text anyway.

1045

1046

Return True if we should create a new delta, False if we should use a

1047

full text.

1048

"""

1049

delta_size = 0

1050

fulltext_size = None

1051

for count in xrange(self._max_delta_chain):

1052

try:

1053

# Note that this only looks in the index of this particular

1054

# KnitVersionedFiles, not in the fallbacks. This ensures that

1055

# we won't store a delta spanning physical repository

1056

# boundaries.

1057

build_details = self._index.get_build_details([parent])

1058

parent_details = build_details[parent]

1059

except (RevisionNotPresent, KeyError), e:

1060

# Some basis is not locally present: always fulltext

1061

return False

1062

index_memo, compression_parent, _, _ = parent_details

1063

_, _, size = index_memo

1064

if compression_parent is None:

1065

fulltext_size = size

1066

break

1067

delta_size += size

1068

# We don't explicitly check for presence because this is in an

1069

# inner loop, and if it's missing it'll fail anyhow.

1070

parent = compression_parent

1071

else:

1072

# We couldn't find a fulltext, so we must create a new one

1073

return False

1074

# Simple heuristic - if the total I/O wold be greater as a delta than

1075

# the originally installed fulltext, we create a new fulltext.

1076

return fulltext_size > delta_size

1077

1078

def _build_details_to_components(self, build_details):

1079

"""Convert a build_details tuple to a position tuple."""

1080

# record_details, access_memo, compression_parent

1081

return build_details[3], build_details[0], build_details[1]

1082

1083

def _get_components_positions(self, keys, allow_missing=False):

1084

"""Produce a map of position data for the components of keys.

1085

1086

This data is intended to be used for retrieving the knit records.

1087

1088

A dict of key to (record_details, index_memo, next, parents) is

1089

returned.

1090

method is the way referenced data should be applied.

1091

index_memo is the handle to pass to the data access to actually get the

1092

data

1093

next is the build-parent of the version, or None for fulltexts.

1094

parents is the version_ids of the parents of this version

1095

1096

:param allow_missing: If True do not raise an error on a missing component,

1097

just ignore it.

1098

"""

1099

component_data = {}

1100

pending_components = keys

1101

while pending_components:

1102

build_details = self._index.get_build_details(pending_components)

1103

current_components = set(pending_components)

1104

pending_components = set()

1105

for key, details in build_details.iteritems():

1106

(index_memo, compression_parent, parents,

1107

record_details) = details

1108

method = record_details[0]

1109

if compression_parent is not None:

1110

pending_components.add(compression_parent)

1111

component_data[key] = self._build_details_to_components(details)

1112

missing = current_components.difference(build_details)

1113

if missing and not allow_missing:

1114

raise errors.RevisionNotPresent(missing.pop(), self)

1115

return component_data

1116

1117

def _get_content(self, key, parent_texts={}):

1118

"""Returns a content object that makes up the specified

1119

version."""

1120

cached_version = parent_texts.get(key, None)

1121

if cached_version is not None:

1122

# Ensure the cache dict is valid.

1123

if not self.get_parent_map([key]):

1124

raise RevisionNotPresent(key, self)

1125

return cached_version

1126

generator = _VFContentMapGenerator(self, [key])

1127

return generator._get_content(key)

1128

1129

def get_parent_map(self, keys):

1130

"""Get a map of the graph parents of keys.

1131

1132

:param keys: The keys to look up parents for.

1133

:return: A mapping from keys to parents. Absent keys are absent from

1134

the mapping.

1135

"""

1136

return self._get_parent_map_with_sources(keys)[0]

1137

1138

def _get_parent_map_with_sources(self, keys):

1139

"""Get a map of the parents of keys.

1140

1141

:param keys: The keys to look up parents for.

1142

:return: A tuple. The first element is a mapping from keys to parents.

1143

Absent keys are absent from the mapping. The second element is a

1144

list with the locations each key was found in. The first element

1145

is the in-this-knit parents, the second the first fallback source,

1146

and so on.

1147

"""

1148

result = {}

1149

sources = [self._index] + self._fallback_vfs

1150

source_results = []

1151

missing = set(keys)

1152

for source in sources:

1153

if not missing:

1154

break

1155

new_result = source.get_parent_map(missing)

1156

source_results.append(new_result)

1157

result.update(new_result)

1158

missing.difference_update(set(new_result))

1159

return result, source_results

1160

1161

def _get_record_map(self, keys, allow_missing=False):

1162

"""Produce a dictionary of knit records.

1163

1164

:return: {key:(record, record_details, digest, next)}

1165

record

1166

data returned from read_records (a KnitContentobject)

1167

record_details

1168

opaque information to pass to parse_record

1169

digest

1170

SHA1 digest of the full text after all steps are done

1171

1172

build-parent of the version, i.e. the leftmost ancestor.

1173

Will be None if the record is not a delta.

1174

:param keys: The keys to build a map for

1175

:param allow_missing: If some records are missing, rather than

1176

error, just return the data that could be generated.

1177

"""

1178

raw_map = self._get_record_map_unparsed(keys,

1179

allow_missing=allow_missing)

1180

return self._raw_map_to_record_map(raw_map)

1181

1182

def _raw_map_to_record_map(self, raw_map):

1183

"""Parse the contents of _get_record_map_unparsed.

1184

1185

:return: see _get_record_map.

1186

"""

1187

result = {}

1188

for key in raw_map:

1189

data, record_details, next = raw_map[key]

1190

content, digest = self._parse_record(key[-1], data)

1191

result[key] = content, record_details, digest, next

1192

return result

1193

1194

def _get_record_map_unparsed(self, keys, allow_missing=False):

1195

"""Get the raw data for reconstructing keys without parsing it.

1196

1197

:return: A dict suitable for parsing via _raw_map_to_record_map.

1198

key-> raw_bytes, (method, noeol), compression_parent

1199

"""

1200

# This retries the whole request if anything fails. Potentially we

1201

# could be a bit more selective. We could track the keys whose records

1202

# we have successfully found, and then only request the new records

1203

# from there. However, _get_components_positions grabs the whole build

1204

# chain, which means we'll likely try to grab the same records again

1205

# anyway. Also, can the build chains change as part of a pack

1206

# operation? We wouldn't want to end up with a broken chain.

1207

while True:

1208

try:

1209

position_map = self._get_components_positions(keys,

1210

allow_missing=allow_missing)

1211

# key = component_id, r = record_details, i_m = index_memo,

1212

# n = next

1213

records = [(key, i_m) for key, (r, i_m, n)

1214

in position_map.iteritems()]

1215

# Sort by the index memo, so that we request records from the

1216

# same pack file together, and in forward-sorted order

1217

records.sort(key=operator.itemgetter(1))

1218

raw_record_map = {}

1219

for key, data in self._read_records_iter_unchecked(records):

1220

(record_details, index_memo, next) = position_map[key]

1221

raw_record_map[key] = data, record_details, next

1222

return raw_record_map

1223

except errors.RetryWithNewPacks, e:

1224

self._access.reload_or_raise(e)

1225

1226

@classmethod

1227

def _split_by_prefix(cls, keys):

1228

"""For the given keys, split them up based on their prefix.

1229

1230

To keep memory pressure somewhat under control, split the

1231

requests back into per-file-id requests, otherwise "bzr co"

1232

extracts the full tree into memory before writing it to disk.

1233

This should be revisited if _get_content_maps() can ever cross

1234

file-id boundaries.

1235

1236

The keys for a given file_id are kept in the same relative order.

1237

Ordering between file_ids is not, though prefix_order will return the

1238

order that the key was first seen.

1239

1240

:param keys: An iterable of key tuples

1241

:return: (split_map, prefix_order)

1242

split_map A dictionary mapping prefix => keys

1243

prefix_order The order that we saw the various prefixes

1244

"""

1245

split_by_prefix = {}

1246

prefix_order = []

1247

for key in keys:

1248

if len(key) == 1:

1249

prefix = ''

1250

else:

1251

prefix = key[0]

1252

1253

if prefix in split_by_prefix:

1254

split_by_prefix[prefix].append(key)

1255

else:

1256

split_by_prefix[prefix] = [key]

1257

prefix_order.append(prefix)

1258

return split_by_prefix, prefix_order

1259

1260

def _group_keys_for_io(self, keys, non_local_keys, positions,

1261

_min_buffer_size=_STREAM_MIN_BUFFER_SIZE):

1262

"""For the given keys, group them into 'best-sized' requests.

1263

1264

The idea is to avoid making 1 request per file, but to never try to

1265

unpack an entire 1.5GB source tree in a single pass. Also when

1266

possible, we should try to group requests to the same pack file

1267

together.

1268

1269

:return: list of (keys, non_local) tuples that indicate what keys

1270

should be fetched next.

1271

"""

1272

# TODO: Ideally we would group on 2 factors. We want to extract texts

1273

# from the same pack file together, and we want to extract all

1274

# the texts for a given build-chain together. Ultimately it

1275

# probably needs a better global view.

1276

total_keys = len(keys)

1277

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1278

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1279

cur_keys = []

1280

cur_non_local = set()

1281

cur_size = 0

1282

result = []

1283

sizes = []

1284

for prefix in prefix_order:

1285

keys = prefix_split_keys[prefix]

1286

non_local = prefix_split_non_local_keys.get(prefix, [])

1287

1288

this_size = self._index._get_total_build_size(keys, positions)

1289

cur_size += this_size

1290

cur_keys.extend(keys)

1291

cur_non_local.update(non_local)

1292

if cur_size > _min_buffer_size:

1293

result.append((cur_keys, cur_non_local))

1294

sizes.append(cur_size)

1295

cur_keys = []

1296

cur_non_local = set()

1297

cur_size = 0

1298

if cur_keys:

1299

result.append((cur_keys, cur_non_local))

1300

sizes.append(cur_size)

1301

return result

1302

1303

def get_record_stream(self, keys, ordering, include_delta_closure):

1304

"""Get a stream of records for keys.

1305

1306

:param keys: The keys to include.

1307

:param ordering: Either 'unordered' or 'topological'. A topologically

1308

sorted stream has compression parents strictly before their

1309

children.

1310

:param include_delta_closure: If True then the closure across any

1311

compression parents will be included (in the opaque data).

1312

:return: An iterator of ContentFactory objects, each of which is only

1313

valid until the iterator is advanced.

1314

"""

1315

# keys might be a generator

1316

keys = set(keys)

1317

if not keys:

1318

return

1319

if not self._index.has_graph:

1320

# Cannot sort when no graph has been stored.

1321

ordering = 'unordered'

1322

1323

remaining_keys = keys

1324

while True:

1325

try:

1326

keys = set(remaining_keys)

1327

for content_factory in self._get_remaining_record_stream(keys,

1328

ordering, include_delta_closure):

1329

remaining_keys.discard(content_factory.key)

1330

yield content_factory

1331

return

1332

except errors.RetryWithNewPacks, e:

1333

self._access.reload_or_raise(e)

1334

1335

def _get_remaining_record_stream(self, keys, ordering,

1336

include_delta_closure):

1337

"""This function is the 'retry' portion for get_record_stream."""

1338

if include_delta_closure:

1339

positions = self._get_components_positions(keys, allow_missing=True)

1340

else:

1341

build_details = self._index.get_build_details(keys)

1342

# map from key to

1343

# (record_details, access_memo, compression_parent_key)

1344

positions = dict((key, self._build_details_to_components(details))

1345

for key, details in build_details.iteritems())

1346

absent_keys = keys.difference(set(positions))

1347

# There may be more absent keys : if we're missing the basis component

1348

# and are trying to include the delta closure.

1349

# XXX: We should not ever need to examine remote sources because we do

1350

# not permit deltas across versioned files boundaries.

1351

if include_delta_closure:

1352

needed_from_fallback = set()

1353

# Build up reconstructable_keys dict. key:True in this dict means

1354

# the key can be reconstructed.

1355

reconstructable_keys = {}

1356

for key in keys:

1357

# the delta chain

1358

try:

1359

chain = [key, positions[key][2]]

1360

except KeyError:

1361

needed_from_fallback.add(key)

1362

continue

1363

result = True

1364

while chain[-1] is not None:

1365

if chain[-1] in reconstructable_keys:

1366

result = reconstructable_keys[chain[-1]]

1367

break

1368

else:

1369

try:

1370

chain.append(positions[chain[-1]][2])

1371

except KeyError:

1372

# missing basis component

1373

needed_from_fallback.add(chain[-1])

1374

result = True

1375

break

1376

for chain_key in chain[:-1]:

1377

reconstructable_keys[chain_key] = result

1378

if not result:

1379

needed_from_fallback.add(key)

1380

# Double index lookups here : need a unified api ?

1381

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1382

if ordering in ('topological', 'groupcompress'):

1383

if ordering == 'topological':

1384

# Global topological sort

1385

present_keys = tsort.topo_sort(global_map)

1386

else:

1387

present_keys = sort_groupcompress(global_map)

1388

# Now group by source:

1389

source_keys = []

1390

current_source = None

1391

for key in present_keys:

1392

for parent_map in parent_maps:

1393

if key in parent_map:

1394

key_source = parent_map

1395

break

1396

if current_source is not key_source:

1397

source_keys.append((key_source, []))

1398

current_source = key_source

1399

source_keys[-1][1].append(key)

1400

else:

1401

if ordering != 'unordered':

1402

raise AssertionError('valid values for ordering are:'

1403

' "unordered", "groupcompress" or "topological" not: %r'

1404

% (ordering,))

1405

# Just group by source; remote sources first.

1406

present_keys = []

1407

source_keys = []

1408

for parent_map in reversed(parent_maps):

1409

source_keys.append((parent_map, []))

1410

for key in parent_map:

1411

present_keys.append(key)

1412

source_keys[-1][1].append(key)

1413

# We have been requested to return these records in an order that

1414

# suits us. So we ask the index to give us an optimally sorted

1415

# order.

1416

for source, sub_keys in source_keys:

1417

if source is parent_maps[0]:

1418

# Only sort the keys for this VF

1419

self._index._sort_keys_by_io(sub_keys, positions)

1420

absent_keys = keys - set(global_map)

1421

for key in absent_keys:

1422

yield AbsentContentFactory(key)

1423

# restrict our view to the keys we can answer.

1424

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1425

# XXX: At that point we need to consider the impact of double reads by

1426

# utilising components multiple times.

1427

if include_delta_closure:

1428

# XXX: get_content_maps performs its own index queries; allow state

1429

# to be passed in.

1430

non_local_keys = needed_from_fallback - absent_keys

1431

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1432

non_local_keys,

1433

positions):

1434

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1435

global_map)

1436

for record in generator.get_record_stream():

1437

yield record

1438

else:

1439

for source, keys in source_keys:

1440

if source is parent_maps[0]:

1441

# this KnitVersionedFiles

1442

records = [(key, positions[key][1]) for key in keys]

1443

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1444

(record_details, index_memo, _) = positions[key]

1445

yield KnitContentFactory(key, global_map[key],

1446

record_details, sha1, raw_data, self._factory.annotated, None)

1447

else:

1448

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1449

for record in vf.get_record_stream(keys, ordering,

1450

include_delta_closure):

1451

yield record

1452

1453

def get_sha1s(self, keys):

1454

"""See VersionedFiles.get_sha1s()."""

1455

missing = set(keys)

1456

record_map = self._get_record_map(missing, allow_missing=True)

1457

result = {}

1458

for key, details in record_map.iteritems():

1459

if key not in missing:

1460

continue

1461

# record entry 2 is the 'digest'.

1462

result[key] = details[2]

1463

missing.difference_update(set(result))

1464

for source in self._fallback_vfs:

1465

if not missing:

1466

break

1467

new_result = source.get_sha1s(missing)

1468

result.update(new_result)

1469

missing.difference_update(set(new_result))

1470

return result

1471

1472

def insert_record_stream(self, stream):

1473

"""Insert a record stream into this container.

1474

1475

:param stream: A stream of records to insert.

1476

:return: None

1477

:seealso VersionedFiles.get_record_stream:

1478

"""

1479

def get_adapter(adapter_key):

1480

try:

1481

return adapters[adapter_key]

1482

except KeyError:

1483

adapter_factory = adapter_registry.get(adapter_key)

1484

adapter = adapter_factory(self)

1485

adapters[adapter_key] = adapter

1486

return adapter

1487

delta_types = set()

1488

if self._factory.annotated:

1489

# self is annotated, we need annotated knits to use directly.

1490

annotated = "annotated-"

1491

convertibles = []

1492

else:

1493

# self is not annotated, but we can strip annotations cheaply.

1494

annotated = ""

1495

convertibles = set(["knit-annotated-ft-gz"])

1496

if self._max_delta_chain:

1497

delta_types.add("knit-annotated-delta-gz")

1498

convertibles.add("knit-annotated-delta-gz")

1499

# The set of types we can cheaply adapt without needing basis texts.

1500

native_types = set()

1501

if self._max_delta_chain:

1502

native_types.add("knit-%sdelta-gz" % annotated)

1503

delta_types.add("knit-%sdelta-gz" % annotated)

1504

native_types.add("knit-%sft-gz" % annotated)

1505

knit_types = native_types.union(convertibles)

1506

adapters = {}

1507

# Buffer all index entries that we can't add immediately because their

1508

# basis parent is missing. We don't buffer all because generating

1509

# annotations may require access to some of the new records. However we

1510

# can't generate annotations from new deltas until their basis parent

1511

# is present anyway, so we get away with not needing an index that

1512

# includes the new keys.

1513

1514

# See <http://launchpad.net/bugs/300177> about ordering of compression

1515

# parents in the records - to be conservative, we insist that all

1516

# parents must be present to avoid expanding to a fulltext.

1517

1518

# key = basis_parent, value = index entry to add

1519

buffered_index_entries = {}

1520

for record in stream:

1521

buffered = False

1522

parents = record.parents

1523

if record.storage_kind in delta_types:

1524

# TODO: eventually the record itself should track

1525

# compression_parent

1526

compression_parent = parents[0]

1527

else:

1528

compression_parent = None

1529

# Raise an error when a record is missing.

1530

if record.storage_kind == 'absent':

1531

raise RevisionNotPresent([record.key], self)

1532

elif ((record.storage_kind in knit_types)

1533

and (compression_parent is None

1534

or not self._fallback_vfs

1535

or self._index.has_key(compression_parent)

1536

or not self.has_key(compression_parent))):

1537

# we can insert the knit record literally if either it has no

1538

# compression parent OR we already have its basis in this kvf

1539

# OR the basis is not present even in the fallbacks. In the

1540

# last case it will either turn up later in the stream and all

1541

# will be well, or it won't turn up at all and we'll raise an

1542

# error at the end.

1543

1544

# TODO: self.has_key is somewhat redundant with

1545

# self._index.has_key; we really want something that directly

1546

# asks if it's only present in the fallbacks. -- mbp 20081119

1547

if record.storage_kind not in native_types:

1548

try:

1549

adapter_key = (record.storage_kind, "knit-delta-gz")

1550

adapter = get_adapter(adapter_key)

1551

except KeyError:

1552

adapter_key = (record.storage_kind, "knit-ft-gz")

1553

adapter = get_adapter(adapter_key)

1554

bytes = adapter.get_bytes(record)

1555

else:

1556

# It's a knit record, it has a _raw_record field (even if

1557

# it was reconstituted from a network stream).

1558

bytes = record._raw_record

1559

options = [record._build_details[0]]

1560

if record._build_details[1]:

1561

options.append('no-eol')

1562

# Just blat it across.

1563

# Note: This does end up adding data on duplicate keys. As

1564

# modern repositories use atomic insertions this should not

1565

# lead to excessive growth in the event of interrupted fetches.

1566

# 'knit' repositories may suffer excessive growth, but as a

1567

# deprecated format this is tolerable. It can be fixed if

1568

# needed by in the kndx index support raising on a duplicate

1569

# add with identical parents and options.

1570

access_memo = self._access.add_raw_records(

1571

[(record.key, len(bytes))], bytes)[0]

1572

index_entry = (record.key, options, access_memo, parents)

1573

if 'fulltext' not in options:

1574

# Not a fulltext, so we need to make sure the compression

1575

# parent will also be present.

1576

# Note that pack backed knits don't need to buffer here

1577

# because they buffer all writes to the transaction level,

1578

# but we don't expose that difference at the index level. If

1579

# the query here has sufficient cost to show up in

1580

# profiling we should do that.

1581

1582

# They're required to be physically in this

1583

# KnitVersionedFiles, not in a fallback.

1584

if not self._index.has_key(compression_parent):

1585

pending = buffered_index_entries.setdefault(

1586

compression_parent, [])

1587

pending.append(index_entry)

1588

buffered = True

1589

if not buffered:

1590

self._index.add_records([index_entry])

1591

elif record.storage_kind == 'chunked':

1592

self.add_lines(record.key, parents,

1593

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1594

else:

1595

# Not suitable for direct insertion as a

1596

# delta, either because it's not the right format, or this

1597

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1598

# 0) or because it depends on a base only present in the

1599

# fallback kvfs.

1600

try:

1601

# Try getting a fulltext directly from the record.

1602

bytes = record.get_bytes_as('fulltext')

1603

except errors.UnavailableRepresentation:

1604

adapter_key = record.storage_kind, 'fulltext'

1605

adapter = get_adapter(adapter_key)

1606

bytes = adapter.get_bytes(record)

1607

lines = split_lines(bytes)

1608

try:

1609

self.add_lines(record.key, parents, lines)

1610

except errors.RevisionAlreadyPresent:

1611

pass

1612

# Add any records whose basis parent is now available.

1613

if not buffered:

1614

added_keys = [record.key]

1615

while added_keys:

1616

key = added_keys.pop(0)

1617

if key in buffered_index_entries:

1618

index_entries = buffered_index_entries[key]

1619

self._index.add_records(index_entries)

1620

added_keys.extend(

1621

[index_entry[0] for index_entry in index_entries])

1622

del buffered_index_entries[key]

1623

if buffered_index_entries:

1624

# There were index entries buffered at the end of the stream,

1625

# So these need to be added (if the index supports holding such

1626

# entries for later insertion)

1627

for key in buffered_index_entries:

1628

index_entries = buffered_index_entries[key]

1629

self._index.add_records(index_entries,

1630

missing_compression_parents=True)

1631

1632

def get_missing_compression_parent_keys(self):

1633

"""Return an iterable of keys of missing compression parents.

1634

1635

Check this after calling insert_record_stream to find out if there are

1636

any missing compression parents. If there are, the records that

1637

depend on them are not able to be inserted safely. For atomic

1638

KnitVersionedFiles built on packs, the transaction should be aborted or

1639

suspended - commit will fail at this point. Nonatomic knits will error

1640

earlier because they have no staging area to put pending entries into.

1641

"""

1642

return self._index.get_missing_compression_parents()

1643

1644

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1645

"""Iterate over the lines in the versioned files from keys.

1646

1647

This may return lines from other keys. Each item the returned

1648

iterator yields is a tuple of a line and a text version that that line

1649

is present in (not introduced in).

1650

1651

Ordering of results is in whatever order is most suitable for the

1652

underlying storage format.

1653

1654

If a progress bar is supplied, it may be used to indicate progress.

1655

The caller is responsible for cleaning up progress bars (because this

1656

is an iterator).

1657

1658

NOTES:

1659

* Lines are normalised by the underlying store: they will all have \\n

1660

terminators.

1661

* Lines are returned in arbitrary order.

1662

* If a requested key did not change any lines (or didn't have any

1663

lines), it may not be mentioned at all in the result.

1664

1665

:param pb: Progress bar supplied by caller.

1666

:return: An iterator over (line, key).

1667

"""

1668

if pb is None:

1669

pb = progress.DummyProgress()

1670

keys = set(keys)

1671

total = len(keys)

1672

done = False

1673

while not done:

1674

try:

1675

# we don't care about inclusions, the caller cares.

1676

# but we need to setup a list of records to visit.

1677

# we need key, position, length

1678

key_records = []

1679

build_details = self._index.get_build_details(keys)

1680

for key, details in build_details.iteritems():

1681

if key in keys:

1682

key_records.append((key, details[0]))

1683

records_iter = enumerate(self._read_records_iter(key_records))

1684

for (key_idx, (key, data, sha_value)) in records_iter:

1685

pb.update('Walking content', key_idx, total)

1686

compression_parent = build_details[key][1]

1687

if compression_parent is None:

1688

# fulltext

1689

line_iterator = self._factory.get_fulltext_content(data)

1690

else:

1691

# Delta

1692

line_iterator = self._factory.get_linedelta_content(data)

1693

# Now that we are yielding the data for this key, remove it

1694

# from the list

1695

keys.remove(key)

1696

# XXX: It might be more efficient to yield (key,

1697

# line_iterator) in the future. However for now, this is a

1698

# simpler change to integrate into the rest of the

1699

# codebase. RBC 20071110

1700

for line in line_iterator:

1701

yield line, key

1702

done = True

1703

except errors.RetryWithNewPacks, e:

1704

self._access.reload_or_raise(e)

1705

# If there are still keys we've not yet found, we look in the fallback

1706

# vfs, and hope to find them there. Note that if the keys are found

1707

# but had no changes or no content, the fallback may not return

1708

# anything.

1709

if keys and not self._fallback_vfs:

1710

# XXX: strictly the second parameter is meant to be the file id

1711

# but it's not easily accessible here.

1712

raise RevisionNotPresent(keys, repr(self))

1713

for source in self._fallback_vfs:

1714

if not keys:

1715

break

1716

source_keys = set()

1717

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1718

source_keys.add(key)

1719

yield line, key

1720

keys.difference_update(source_keys)

1721

pb.update('Walking content', total, total)

1722

1723

def _make_line_delta(self, delta_seq, new_content):

1724

"""Generate a line delta from delta_seq and new_content."""

1725

diff_hunks = []

1726

for op in delta_seq.get_opcodes():

1727

if op[0] == 'equal':

1728

continue

1729

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1730

return diff_hunks

1731

1732

def _merge_annotations(self, content, parents, parent_texts={},

1733

delta=None, annotated=None,

1734

left_matching_blocks=None):

1735

"""Merge annotations for content and generate deltas.

1736

1737

This is done by comparing the annotations based on changes to the text

1738

and generating a delta on the resulting full texts. If annotations are

1739

not being created then a simple delta is created.

1740

"""

1741

if left_matching_blocks is not None:

1742

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1743

else:

1744

delta_seq = None

1745

if annotated:

1746

for parent_key in parents:

1747

merge_content = self._get_content(parent_key, parent_texts)

1748

if (parent_key == parents[0] and delta_seq is not None):

1749

seq = delta_seq

1750

else:

1751

seq = patiencediff.PatienceSequenceMatcher(

1752

None, merge_content.text(), content.text())

1753

for i, j, n in seq.get_matching_blocks():

1754

if n == 0:

1755

continue

1756

# this copies (origin, text) pairs across to the new

1757

# content for any line that matches the last-checked

1758

# parent.

1759

content._lines[j:j+n] = merge_content._lines[i:i+n]

1760

# XXX: Robert says the following block is a workaround for a

1761

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1762

if content._lines and content._lines[-1][1][-1] != '\n':

1763

# The copied annotation was from a line without a trailing EOL,

1764

# reinstate one for the content object, to ensure correct

1765

# serialization.

1766

line = content._lines[-1][1] + '\n'

1767

content._lines[-1] = (content._lines[-1][0], line)

1768

if delta:

1769

if delta_seq is None:

1770

reference_content = self._get_content(parents[0], parent_texts)

1771

new_texts = content.text()

1772

old_texts = reference_content.text()

1773

delta_seq = patiencediff.PatienceSequenceMatcher(

1774

None, old_texts, new_texts)

1775

return self._make_line_delta(delta_seq, content)

1776

1777

def _parse_record(self, version_id, data):

1778

"""Parse an original format knit record.

1779

1780

These have the last element of the key only present in the stored data.

1781

"""

1782

rec, record_contents = self._parse_record_unchecked(data)

1783

self._check_header_version(rec, version_id)

1784

return record_contents, rec[3]

1785

1786

def _parse_record_header(self, key, raw_data):

1787

"""Parse a record header for consistency.

1788

1789

:return: the header and the decompressor stream.

1790

as (stream, header_record)

1791

"""

1792

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1793

try:

1794

# Current serialise

1795

rec = self._check_header(key, df.readline())

1796

except Exception, e:

1797

raise KnitCorrupt(self,

1798

"While reading {%s} got %s(%s)"

1799

% (key, e.__class__.__name__, str(e)))

1800

return df, rec

1801

1802

def _parse_record_unchecked(self, data):

1803

# profiling notes:

1804

# 4168 calls in 2880 217 internal

1805

# 4168 calls to _parse_record_header in 2121

1806

# 4168 calls to readlines in 330

1807

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1808

try:

1809

record_contents = df.readlines()

1810

except Exception, e:

1811

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1812

(data, e.__class__.__name__, str(e)))

1813

header = record_contents.pop(0)

1814

rec = self._split_header(header)

1815

last_line = record_contents.pop()

1816

if len(record_contents) != int(rec[2]):

1817

raise KnitCorrupt(self,

1818

'incorrect number of lines %s != %s'

1819

' for version {%s} %s'

1820

% (len(record_contents), int(rec[2]),

1821

rec[1], record_contents))

1822

if last_line != 'end %s\n' % rec[1]:

1823

raise KnitCorrupt(self,

1824

'unexpected version end line %r, wanted %r'

1825

% (last_line, rec[1]))

1826

df.close()

1827

return rec, record_contents

1828

1829

def _read_records_iter(self, records):

1830

"""Read text records from data file and yield result.

1831

1832

The result will be returned in whatever is the fastest to read.

1833

Not by the order requested. Also, multiple requests for the same

1834

record will only yield 1 response.

1835

:param records: A list of (key, access_memo) entries

1836

:return: Yields (key, contents, digest) in the order

1837

read, not the order requested

1838

"""

1839

if not records:

1840

return

1841

1842

# XXX: This smells wrong, IO may not be getting ordered right.

1843

needed_records = sorted(set(records), key=operator.itemgetter(1))

1844

if not needed_records:

1845

return

1846

1847

# The transport optimizes the fetching as well

1848

# (ie, reads continuous ranges.)

1849

raw_data = self._access.get_raw_records(

1850

[index_memo for key, index_memo in needed_records])

1851

1852

for (key, index_memo), data in \

1853

izip(iter(needed_records), raw_data):

1854

content, digest = self._parse_record(key[-1], data)

1855

yield key, content, digest

1856

1857

def _read_records_iter_raw(self, records):

1858

"""Read text records from data file and yield raw data.

1859

1860

This unpacks enough of the text record to validate the id is

1861

as expected but thats all.

1862

1863

Each item the iterator yields is (key, bytes,

1864

expected_sha1_of_full_text).

1865

"""

1866

for key, data in self._read_records_iter_unchecked(records):

1867

# validate the header (note that we can only use the suffix in

1868

# current knit records).

1869

df, rec = self._parse_record_header(key, data)

1870

df.close()

1871

yield key, data, rec[3]

1872

1873

def _read_records_iter_unchecked(self, records):

1874

"""Read text records from data file and yield raw data.

1875

1876

No validation is done.

1877

1878

Yields tuples of (key, data).

1879

"""

1880

# setup an iterator of the external records:

1881

# uses readv so nice and fast we hope.

1882

if len(records):

1883

# grab the disk data needed.

1884

needed_offsets = [index_memo for key, index_memo

1885

in records]

1886

raw_records = self._access.get_raw_records(needed_offsets)

1887

1888

for key, index_memo in records:

1889

data = raw_records.next()

1890

yield key, data

1891

1892

def _record_to_data(self, key, digest, lines, dense_lines=None):

1893

"""Convert key, digest, lines into a raw data block.

1894

1895

:param key: The key of the record. Currently keys are always serialised

1896

using just the trailing component.

1897

:param dense_lines: The bytes of lines but in a denser form. For

1898

instance, if lines is a list of 1000 bytestrings each ending in \n,

1899

dense_lines may be a list with one line in it, containing all the

1900

1000's lines and their \n's. Using dense_lines if it is already

1901

known is a win because the string join to create bytes in this

1902

function spends less time resizing the final string.

1903

:return: (len, a StringIO instance with the raw data ready to read.)

1904

"""

1905

# Note: using a string copy here increases memory pressure with e.g.

1906

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1907

# when doing the initial commit of a mozilla tree. RBC 20070921

1908

bytes = ''.join(chain(

1909

["version %s %d %s\n" % (key[-1],

1910

len(lines),

1911

digest)],

1912

dense_lines or lines,

1913

["end %s\n" % key[-1]]))

1914

if type(bytes) != str:

1915

raise AssertionError(

1916

'data must be plain bytes was %s' % type(bytes))

1917

if lines and lines[-1][-1] != '\n':

1918

raise ValueError('corrupt lines value %r' % lines)

1919

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1920

return len(compressed_bytes), compressed_bytes

1921

1922

def _split_header(self, line):

1923

rec = line.split()

1924

if len(rec) != 4:

1925

raise KnitCorrupt(self,

1926

'unexpected number of elements in record header')

1927

return rec

1928

1929

def keys(self):

1930

"""See VersionedFiles.keys."""

1931

if 'evil' in debug.debug_flags:

1932

trace.mutter_callsite(2, "keys scales with size of history")

1933

sources = [self._index] + self._fallback_vfs

1934

result = set()

1935

for source in sources:

1936

result.update(source.keys())

1937

return result

1938

1939

1940

class _ContentMapGenerator(object):

1941

"""Generate texts or expose raw deltas for a set of texts."""

1942

1943

def _get_content(self, key):

1944

"""Get the content object for key."""

1945

# Note that _get_content is only called when the _ContentMapGenerator

1946

# has been constructed with just one key requested for reconstruction.

1947

if key in self.nonlocal_keys:

1948

record = self.get_record_stream().next()

1949

# Create a content object on the fly

1950

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1951

return PlainKnitContent(lines, record.key)

1952

else:

1953

# local keys we can ask for directly

1954

return self._get_one_work(key)

1955

1956

def get_record_stream(self):

1957

"""Get a record stream for the keys requested during __init__."""

1958

for record in self._work():

1959

yield record

1960

1961

def _work(self):

1962

"""Produce maps of text and KnitContents as dicts.

1963

1964

:return: (text_map, content_map) where text_map contains the texts for

1965

the requested versions and content_map contains the KnitContents.

1966

"""

1967

# NB: By definition we never need to read remote sources unless texts

1968

# are requested from them: we don't delta across stores - and we

1969

# explicitly do not want to to prevent data loss situations.

1970

if self.global_map is None:

1971

self.global_map = self.vf.get_parent_map(self.keys)

1972

nonlocal_keys = self.nonlocal_keys

1973

1974

missing_keys = set(nonlocal_keys)

1975

# Read from remote versioned file instances and provide to our caller.

1976

for source in self.vf._fallback_vfs:

1977

if not missing_keys:

1978

break

1979

# Loop over fallback repositories asking them for texts - ignore

1980

# any missing from a particular fallback.

1981

for record in source.get_record_stream(missing_keys,

1982

'unordered', True):

1983

if record.storage_kind == 'absent':

1984

# Not in thie particular stream, may be in one of the

1985

# other fallback vfs objects.

1986

continue

1987

missing_keys.remove(record.key)

1988

yield record

1989

1990

self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,

1991

allow_missing=True)

1992

first = True

1993

for key in self.keys:

1994

if key in self.nonlocal_keys:

1995

continue

1996

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

1997

first = False

1998

1999

def _get_one_work(self, requested_key):

2000

# Now, if we have calculated everything already, just return the

2001

# desired text.

2002

if requested_key in self._contents_map:

2003

return self._contents_map[requested_key]

2004

# To simplify things, parse everything at once - code that wants one text

2005

# probably wants them all.

2006

# FUTURE: This function could be improved for the 'extract many' case

2007

# by tracking each component and only doing the copy when the number of

2008

# children than need to apply delta's to it is > 1 or it is part of the

2009

# final output.

2010

multiple_versions = len(self.keys) != 1

2011

if self._record_map is None:

2012

self._record_map = self.vf._raw_map_to_record_map(

2013

self._raw_record_map)

2014

record_map = self._record_map

2015

# raw_record_map is key:

2016

# Have read and parsed records at this point.

2017

for key in self.keys:

2018

if key in self.nonlocal_keys:

2019

# already handled

2020

continue

2021

components = []

2022

cursor = key

2023

while cursor is not None:

2024

try:

2025

record, record_details, digest, next = record_map[cursor]

2026

except KeyError:

2027

raise RevisionNotPresent(cursor, self)

2028

components.append((cursor, record, record_details, digest))

2029

cursor = next

2030

if cursor in self._contents_map:

2031

# no need to plan further back

2032

components.append((cursor, None, None, None))

2033

break

2034

2035

content = None

2036

for (component_id, record, record_details,

2037

digest) in reversed(components):

2038

if component_id in self._contents_map:

2039

content = self._contents_map[component_id]

2040

else:

2041

content, delta = self._factory.parse_record(key[-1],

2042

record, record_details, content,

2043

copy_base_content=multiple_versions)

2044

if multiple_versions:

2045

self._contents_map[component_id] = content

2046

2047

# digest here is the digest from the last applied component.

2048

text = content.text()

2049

actual_sha = sha_strings(text)

2050

if actual_sha != digest:

2051

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2052

if multiple_versions:

2053

return self._contents_map[requested_key]

2054

else:

2055

return content

2056

2057

def _wire_bytes(self):

2058

"""Get the bytes to put on the wire for 'key'.

2059

2060

The first collection of bytes asked for returns the serialised

2061

raw_record_map and the additional details (key, parent) for key.

2062

Subsequent calls return just the additional details (key, parent).

2063

The wire storage_kind given for the first key is 'knit-delta-closure',

2064

For subsequent keys it is 'knit-delta-closure-ref'.

2065

2066

:param key: A key from the content generator.

2067

:return: Bytes to put on the wire.

2068

"""

2069

lines = []

2070

# kind marker for dispatch on the far side,

2071

lines.append('knit-delta-closure')

2072

# Annotated or not

2073

if self.vf._factory.annotated:

2074

lines.append('annotated')

2075

else:

2076

lines.append('')

2077

# then the list of keys

2078

lines.append('\t'.join(['\x00'.join(key) for key in self.keys

2079

if key not in self.nonlocal_keys]))

2080

# then the _raw_record_map in serialised form:

2081

map_byte_list = []

2082

# for each item in the map:

2083

# 1 line with key

2084

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2085

# one line with method

2086

# one line with noeol

2087

# one line with next ('' for None)

2088

# one line with byte count of the record bytes

2089

# the record bytes

2090

for key, (record_bytes, (method, noeol), next) in \

2091

self._raw_record_map.iteritems():

2092

key_bytes = '\x00'.join(key)

2093

parents = self.global_map.get(key, None)

2094

if parents is None:

2095

parent_bytes = 'None:'

2096

else:

2097

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

2098

method_bytes = method

2099

if noeol:

2100

noeol_bytes = "T"

2101

else:

2102

noeol_bytes = "F"

2103

if next:

2104

next_bytes = '\x00'.join(next)

2105

else:

2106

next_bytes = ''

2107

map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (

2108

key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2109

len(record_bytes), record_bytes))

2110

map_bytes = ''.join(map_byte_list)

2111

lines.append(map_bytes)

2112

bytes = '\n'.join(lines)

2113

return bytes

2114

2115

2116

class _VFContentMapGenerator(_ContentMapGenerator):

2117

"""Content map generator reading from a VersionedFiles object."""

2118

2119

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2120

global_map=None, raw_record_map=None):

2121

"""Create a _ContentMapGenerator.

2122

2123

:param versioned_files: The versioned files that the texts are being

2124

extracted from.

2125

:param keys: The keys to produce content maps for.

2126

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2127

which are known to not be in this knit, but rather in one of the

2128

fallback knits.

2129

:param global_map: The result of get_parent_map(keys) (or a supermap).

2130

This is required if get_record_stream() is to be used.

2131

:param raw_record_map: A unparsed raw record map to use for answering

2132

contents.

2133

"""

2134

# The vf to source data from

2135

self.vf = versioned_files

2136

# The keys desired

2137

self.keys = list(keys)

2138

# Keys known to be in fallback vfs objects

2139

if nonlocal_keys is None:

2140

self.nonlocal_keys = set()

2141

else:

2142

self.nonlocal_keys = frozenset(nonlocal_keys)

2143

# Parents data for keys to be returned in get_record_stream

2144

self.global_map = global_map

2145

# The chunked lists for self.keys in text form

2146

self._text_map = {}

2147

# A cache of KnitContent objects used in extracting texts.

2148

self._contents_map = {}

2149

# All the knit records needed to assemble the requested keys as full

2150

# texts.

2151

self._record_map = None

2152

if raw_record_map is None:

2153

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2154

allow_missing=True)

2155

else:

2156

self._raw_record_map = raw_record_map

2157

# the factory for parsing records

2158

self._factory = self.vf._factory

2159

2160

2161

class _NetworkContentMapGenerator(_ContentMapGenerator):

2162

"""Content map generator sourced from a network stream."""

2163

2164

def __init__(self, bytes, line_end):

2165

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2166

self._bytes = bytes

2167

self.global_map = {}

2168

self._raw_record_map = {}

2169

self._contents_map = {}

2170

self._record_map = None

2171

self.nonlocal_keys = []

2172

# Get access to record parsing facilities

2173

self.vf = KnitVersionedFiles(None, None)

2174

start = line_end

2175

# Annotated or not

2176

line_end = bytes.find('\n', start)

2177

line = bytes[start:line_end]

2178

start = line_end + 1

2179

if line == 'annotated':

2180

self._factory = KnitAnnotateFactory()

2181

else:

2182

self._factory = KnitPlainFactory()

2183

# list of keys to emit in get_record_stream

2184

line_end = bytes.find('\n', start)

2185

line = bytes[start:line_end]

2186

start = line_end + 1

2187

self.keys = [

2188

tuple(segment.split('\x00')) for segment in line.split('\t')

2189

if segment]

2190

# now a loop until the end. XXX: It would be nice if this was just a

2191

# bunch of the same records as get_record_stream(..., False) gives, but

2192

# there is a decent sized gap stopping that at the moment.

2193

end = len(bytes)

2194

while start < end:

2195

# 1 line with key

2196

line_end = bytes.find('\n', start)

2197

key = tuple(bytes[start:line_end].split('\x00'))

2198

start = line_end + 1

2199

# 1 line with parents (None: for None, '' for ())

2200

line_end = bytes.find('\n', start)

2201

line = bytes[start:line_end]

2202

if line == 'None:':

2203

parents = None

2204

else:

2205

parents = tuple(

2206

[tuple(segment.split('\x00')) for segment in line.split('\t')

2207

if segment])

2208

self.global_map[key] = parents

2209

start = line_end + 1

2210

# one line with method

2211

line_end = bytes.find('\n', start)

2212

line = bytes[start:line_end]

2213

method = line

2214

start = line_end + 1

2215

# one line with noeol

2216

line_end = bytes.find('\n', start)

2217

line = bytes[start:line_end]

2218

noeol = line == "T"

2219

start = line_end + 1

2220

# one line with next ('' for None)

2221

line_end = bytes.find('\n', start)

2222

line = bytes[start:line_end]

2223

if not line:

2224

next = None

2225

else:

2226

next = tuple(bytes[start:line_end].split('\x00'))

2227

start = line_end + 1

2228

# one line with byte count of the record bytes

2229

line_end = bytes.find('\n', start)

2230

line = bytes[start:line_end]

2231

count = int(line)

2232

start = line_end + 1

2233

# the record bytes

2234

record_bytes = bytes[start:start+count]

2235

start = start + count

2236

# put it in the map

2237

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2238

2239

def get_record_stream(self):

2240

"""Get a record stream for for keys requested by the bytestream."""

2241

first = True

2242

for key in self.keys:

2243

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2244

first = False

2245

2246

def _wire_bytes(self):

2247

return self._bytes

2248

2249

2250

class _KndxIndex(object):

2251

"""Manages knit index files

2252

2253

The index is kept in memory and read on startup, to enable

2254

fast lookups of revision information. The cursor of the index

2255

file is always pointing to the end, making it easy to append

2256

entries.

2257

2258

_cache is a cache for fast mapping from version id to a Index

2259

object.

2260

2261

_history is a cache for fast mapping from indexes to version ids.

2262

2263

The index data format is dictionary compressed when it comes to

2264

parent references; a index entry may only have parents that with a

2265

lover index number. As a result, the index is topological sorted.

2266

2267

Duplicate entries may be written to the index for a single version id

2268

if this is done then the latter one completely replaces the former:

2269

this allows updates to correct version and parent information.

2270

Note that the two entries may share the delta, and that successive

2271

annotations and references MUST point to the first entry.

2272

2273

The index file on disc contains a header, followed by one line per knit

2274

record. The same revision can be present in an index file more than once.

2275

The first occurrence gets assigned a sequence number starting from 0.

2276

2277

The format of a single line is

2278

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

2279

REVISION_ID is a utf8-encoded revision id

2280

FLAGS is a comma separated list of flags about the record. Values include

2281

no-eol, line-delta, fulltext.

2282

BYTE_OFFSET is the ascii representation of the byte offset in the data file

2283

that the the compressed data starts at.

2284

LENGTH is the ascii representation of the length of the data file.

2285

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

2286

REVISION_ID.

2287

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

2288

revision id already in the knit that is a parent of REVISION_ID.

2289

The ' :' marker is the end of record marker.

2290

2291

partial writes:

2292

when a write is interrupted to the index file, it will result in a line

2293

that does not end in ' :'. If the ' :' is not present at the end of a line,

2294

or at the end of the file, then the record that is missing it will be

2295

ignored by the parser.

2296

2297

When writing new records to the index file, the data is preceded by '\n'

2298

to ensure that records always start on new lines even if the last write was

2299

interrupted. As a result its normal for the last line in the index to be

2300

missing a trailing newline. One can be added with no harmful effects.

2301

2302

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

2303

where prefix is e.g. the (fileid,) for .texts instances or () for

2304

constant-mapped things like .revisions, and the old state is

2305

tuple(history_vector, cache_dict). This is used to prevent having an

2306

ABI change with the C extension that reads .kndx files.

2307

"""

2308

2309

HEADER = "# bzr knit index 8\n"

2310

2311

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

2312

"""Create a _KndxIndex on transport using mapper."""

2313

self._transport = transport

2314

self._mapper = mapper

2315

self._get_scope = get_scope

2316

self._allow_writes = allow_writes

2317

self._is_locked = is_locked

2318

self._reset_cache()

2319

self.has_graph = True

2320

2321

def add_records(self, records, random_id=False, missing_compression_parents=False):

2322

"""Add multiple records to the index.

2323

2324

:param records: a list of tuples:

2325

(key, options, access_memo, parents).

2326

:param random_id: If True the ids being added were randomly generated

2327

and no check for existence will be performed.

2328

:param missing_compression_parents: If True the records being added are

2329

only compressed against texts already in the index (or inside

2330

records). If False the records all refer to unavailable texts (or

2331

texts inside records) as compression parents.

2332

"""

2333

if missing_compression_parents:

2334

# It might be nice to get the edge of the records. But keys isn't

2335

# _wrong_.

2336

keys = sorted(record[0] for record in records)

2337

raise errors.RevisionNotPresent(keys, self)

2338

paths = {}

2339

for record in records:

2340

key = record[0]

2341

prefix = key[:-1]

2342

path = self._mapper.map(key) + '.kndx'

2343

path_keys = paths.setdefault(path, (prefix, []))

2344

path_keys[1].append(record)

2345

for path in sorted(paths):

2346

prefix, path_keys = paths[path]

2347

self._load_prefixes([prefix])

2348

lines = []

2349

orig_history = self._kndx_cache[prefix][1][:]

2350

orig_cache = self._kndx_cache[prefix][0].copy()

2351

2352

try:

2353

for key, options, (_, pos, size), parents in path_keys:

2354

if parents is None:

2355

# kndx indices cannot be parentless.

2356

parents = ()

2357

line = "\n%s %s %s %s %s :" % (

2358

key[-1], ','.join(options), pos, size,

2359

self._dictionary_compress(parents))

2360

if type(line) != str:

2361

raise AssertionError(

2362

'data must be utf8 was %s' % type(line))

2363

lines.append(line)

2364

self._cache_key(key, options, pos, size, parents)

2365

if len(orig_history):

2366

self._transport.append_bytes(path, ''.join(lines))

2367

else:

2368

self._init_index(path, lines)

2369

except:

2370

# If any problems happen, restore the original values and re-raise

2371

self._kndx_cache[prefix] = (orig_cache, orig_history)

2372

raise

2373

2374

def scan_unvalidated_index(self, graph_index):

2375

"""See _KnitGraphIndex.scan_unvalidated_index."""

2376

# Because kndx files do not support atomic insertion via separate index

2377

# files, they do not support this method.

2378

raise NotImplementedError(self.scan_unvalidated_index)

2379

2380

def get_missing_compression_parents(self):

2381

"""See _KnitGraphIndex.get_missing_compression_parents."""

2382

# Because kndx files do not support atomic insertion via separate index

2383

# files, they do not support this method.

2384

raise NotImplementedError(self.get_missing_compression_parents)

2385

2386

def _cache_key(self, key, options, pos, size, parent_keys):

2387

"""Cache a version record in the history array and index cache.

2388

2389

This is inlined into _load_data for performance. KEEP IN SYNC.

2390

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

2391

indexes).

2392

"""

2393

prefix = key[:-1]

2394

version_id = key[-1]

2395

# last-element only for compatibilty with the C load_data.

2396

parents = tuple(parent[-1] for parent in parent_keys)

2397

for parent in parent_keys:

2398

if parent[:-1] != prefix:

2399

raise ValueError("mismatched prefixes for %r, %r" % (

2400

key, parent_keys))

2401

cache, history = self._kndx_cache[prefix]

2402

# only want the _history index to reference the 1st index entry

2403

# for version_id

2404

if version_id not in cache:

2405

index = len(history)

2406

history.append(version_id)

2407

else:

2408

index = cache[version_id][5]

2409

cache[version_id] = (version_id,

2410

options,

2411

pos,

2412

size,

2413

parents,

2414

index)

2415

2416

def check_header(self, fp):

2417

line = fp.readline()

2418

if line == '':

2419

# An empty file can actually be treated as though the file doesn't

2420

# exist yet.

2421

raise errors.NoSuchFile(self)

2422

if line != self.HEADER:

2423

raise KnitHeaderError(badline=line, filename=self)

2424

2425

def _check_read(self):

2426

if not self._is_locked():

2427

raise errors.ObjectNotLocked(self)

2428

if self._get_scope() != self._scope:

2429

self._reset_cache()

2430

2431

def _check_write_ok(self):

2432

"""Assert if not writes are permitted."""

2433

if not self._is_locked():

2434

raise errors.ObjectNotLocked(self)

2435

if self._get_scope() != self._scope:

2436

self._reset_cache()

2437

if self._mode != 'w':

2438

raise errors.ReadOnlyObjectDirtiedError(self)

2439

2440

def get_build_details(self, keys):

2441

"""Get the method, index_memo and compression parent for keys.

2442

2443

Ghosts are omitted from the result.

2444

2445

:param keys: An iterable of keys.

2446

:return: A dict of key:(index_memo, compression_parent, parents,

2447

record_details).

2448

index_memo

2449

opaque structure to pass to read_records to extract the raw

2450

data

2451

compression_parent

2452

Content that this record is built upon, may be None

2453

parents

2454

Logical parents of this node

2455

record_details

2456

extra information about the content which needs to be passed to

2457

Factory.parse_record

2458

"""

2459

parent_map = self.get_parent_map(keys)

2460

result = {}

2461

for key in keys:

2462

if key not in parent_map:

2463

continue # Ghost

2464

method = self.get_method(key)

2465

parents = parent_map[key]

2466

if method == 'fulltext':

2467

compression_parent = None

2468

else:

2469

compression_parent = parents[0]

2470

noeol = 'no-eol' in self.get_options(key)

2471

index_memo = self.get_position(key)

2472

result[key] = (index_memo, compression_parent,

2473

parents, (method, noeol))

2474

return result

2475

2476

def get_method(self, key):

2477

"""Return compression method of specified key."""

2478

options = self.get_options(key)

2479

if 'fulltext' in options:

2480

return 'fulltext'

2481

elif 'line-delta' in options:

2482

return 'line-delta'

2483

else:

2484

raise errors.KnitIndexUnknownMethod(self, options)

2485

2486

def get_options(self, key):

2487

"""Return a list representing options.

2488

2489

e.g. ['foo', 'bar']

2490

"""

2491

prefix, suffix = self._split_key(key)

2492

self._load_prefixes([prefix])

2493

try:

2494

return self._kndx_cache[prefix][0][suffix][1]

2495

except KeyError:

2496

raise RevisionNotPresent(key, self)

2497

2498

def get_parent_map(self, keys):

2499

"""Get a map of the parents of keys.

2500

2501

:param keys: The keys to look up parents for.

2502

:return: A mapping from keys to parents. Absent keys are absent from

2503

the mapping.

2504

"""

2505

# Parse what we need to up front, this potentially trades off I/O

2506

# locality (.kndx and .knit in the same block group for the same file

2507

# id) for less checking in inner loops.

2508

prefixes = set(key[:-1] for key in keys)

2509

self._load_prefixes(prefixes)

2510

result = {}

2511

for key in keys:

2512

prefix = key[:-1]

2513

try:

2514

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2515

except KeyError:

2516

pass

2517

else:

2518

result[key] = tuple(prefix + (suffix,) for

2519

suffix in suffix_parents)

2520

return result

2521

2522

def get_position(self, key):

2523

"""Return details needed to access the version.

2524

2525

:return: a tuple (key, data position, size) to hand to the access

2526

logic to get the record.

2527

"""

2528

prefix, suffix = self._split_key(key)

2529

self._load_prefixes([prefix])

2530

entry = self._kndx_cache[prefix][0][suffix]

2531

return key, entry[2], entry[3]

2532

2533

has_key = _mod_index._has_key_from_parent_map

2534

2535

def _init_index(self, path, extra_lines=[]):

2536

"""Initialize an index."""

2537

sio = StringIO()

2538

sio.write(self.HEADER)

2539

sio.writelines(extra_lines)

2540

sio.seek(0)

2541

self._transport.put_file_non_atomic(path, sio,

2542

create_parent_dir=True)

2543

# self._create_parent_dir)

2544

# mode=self._file_mode,

2545

# dir_mode=self._dir_mode)

2546

2547

def keys(self):

2548

"""Get all the keys in the collection.

2549

2550

The keys are not ordered.

2551

"""

2552

result = set()

2553

# Identify all key prefixes.

2554

# XXX: A bit hacky, needs polish.

2555

if type(self._mapper) == ConstantMapper:

2556

prefixes = [()]

2557

else:

2558

relpaths = set()

2559

for quoted_relpath in self._transport.iter_files_recursive():

2560

path, ext = os.path.splitext(quoted_relpath)

2561

relpaths.add(path)

2562

prefixes = [self._mapper.unmap(path) for path in relpaths]

2563

self._load_prefixes(prefixes)

2564

for prefix in prefixes:

2565

for suffix in self._kndx_cache[prefix][1]:

2566

result.add(prefix + (suffix,))

2567

return result

2568

2569

def _load_prefixes(self, prefixes):

2570

"""Load the indices for prefixes."""

2571

self._check_read()

2572

for prefix in prefixes:

2573

if prefix not in self._kndx_cache:

2574

# the load_data interface writes to these variables.

2575

self._cache = {}

2576

self._history = []

2577

self._filename = prefix

2578

try:

2579

path = self._mapper.map(prefix) + '.kndx'

2580

fp = self._transport.get(path)

2581

try:

2582

# _load_data may raise NoSuchFile if the target knit is

2583

# completely empty.

2584

_load_data(self, fp)

2585

finally:

2586

fp.close()

2587

self._kndx_cache[prefix] = (self._cache, self._history)

2588

del self._cache

2589

del self._filename

2590

del self._history

2591

except NoSuchFile:

2592

self._kndx_cache[prefix] = ({}, [])

2593

if type(self._mapper) == ConstantMapper:

2594

# preserve behaviour for revisions.kndx etc.

2595

self._init_index(path)

2596

del self._cache

2597

del self._filename

2598

del self._history

2599

2600

missing_keys = _mod_index._missing_keys_from_parent_map

2601

2602

def _partition_keys(self, keys):

2603

"""Turn keys into a dict of prefix:suffix_list."""

2604

result = {}

2605

for key in keys:

2606

prefix_keys = result.setdefault(key[:-1], [])

2607

prefix_keys.append(key[-1])

2608

return result

2609

2610

def _dictionary_compress(self, keys):

2611

"""Dictionary compress keys.

2612

2613

:param keys: The keys to generate references to.

2614

:return: A string representation of keys. keys which are present are

2615

dictionary compressed, and others are emitted as fulltext with a

2616

'.' prefix.

2617

"""

2618

if not keys:

2619

return ''

2620

result_list = []

2621

prefix = keys[0][:-1]

2622

cache = self._kndx_cache[prefix][0]

2623

for key in keys:

2624

if key[:-1] != prefix:

2625

# kndx indices cannot refer across partitioned storage.

2626

raise ValueError("mismatched prefixes for %r" % keys)

2627

if key[-1] in cache:

2628

# -- inlined lookup() --

2629

result_list.append(str(cache[key[-1]][5]))

2630

# -- end lookup () --

2631

else:

2632

result_list.append('.' + key[-1])

2633

return ' '.join(result_list)

2634

2635

def _reset_cache(self):

2636

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2637

# (cache_dict, history_vector) for parsed kndx files.

2638

self._kndx_cache = {}

2639

self._scope = self._get_scope()

2640

allow_writes = self._allow_writes()

2641

if allow_writes:

2642

self._mode = 'w'

2643

else:

2644

self._mode = 'r'

2645

2646

def _sort_keys_by_io(self, keys, positions):

2647

"""Figure out an optimal order to read the records for the given keys.

2648

2649

Sort keys, grouped by index and sorted by position.

2650

2651

:param keys: A list of keys whose records we want to read. This will be

2652

sorted 'in-place'.

2653

:param positions: A dict, such as the one returned by

2654

_get_components_positions()

2655

:return: None

2656

"""

2657

def get_sort_key(key):

2658

index_memo = positions[key][1]

2659

# Group by prefix and position. index_memo[0] is the key, so it is

2660

# (file_id, revision_id) and we don't want to sort on revision_id,

2661

# index_memo[1] is the position, and index_memo[2] is the size,

2662

# which doesn't matter for the sort

2663

return index_memo[0][:-1], index_memo[1]

2664

return keys.sort(key=get_sort_key)

2665

2666

_get_total_build_size = _get_total_build_size

2667

2668

def _split_key(self, key):

2669

"""Split key into a prefix and suffix."""

2670

return key[:-1], key[-1]

2671

2672

2673

class _KnitGraphIndex(object):

2674

"""A KnitVersionedFiles index layered on GraphIndex."""

2675

2676

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2677

add_callback=None):

2678

"""Construct a KnitGraphIndex on a graph_index.

2679

2680

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2681

:param is_locked: A callback to check whether the object should answer

2682

queries.

2683

:param deltas: Allow delta-compressed records.

2684

:param parents: If True, record knits parents, if not do not record

2685

parents.

2686

:param add_callback: If not None, allow additions to the index and call

2687

this callback with a list of added GraphIndex nodes:

2688

[(node, value, node_refs), ...]

2689

:param is_locked: A callback, returns True if the index is locked and

2690

thus usable.

2691

"""

2692

self._add_callback = add_callback

2693

self._graph_index = graph_index

2694

self._deltas = deltas

2695

self._parents = parents

2696

if deltas and not parents:

2697

# XXX: TODO: Delta tree and parent graph should be conceptually

2698

# separate.

2699

raise KnitCorrupt(self, "Cannot do delta compression without "

2700

"parent tracking.")

2701

self.has_graph = parents

2702

self._is_locked = is_locked

2703

self._missing_compression_parents = set()

2704

2705

def __repr__(self):

2706

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2707

2708

def add_records(self, records, random_id=False,

2709

missing_compression_parents=False):

2710

"""Add multiple records to the index.

2711

2712

This function does not insert data into the Immutable GraphIndex

2713

backing the KnitGraphIndex, instead it prepares data for insertion by

2714

the caller and checks that it is safe to insert then calls

2715

self._add_callback with the prepared GraphIndex nodes.

2716

2717

:param records: a list of tuples:

2718

(key, options, access_memo, parents).

2719

:param random_id: If True the ids being added were randomly generated

2720

and no check for existence will be performed.

2721

:param missing_compression_parents: If True the records being added are

2722

only compressed against texts already in the index (or inside

2723

records). If False the records all refer to unavailable texts (or

2724

texts inside records) as compression parents.

2725

"""

2726

if not self._add_callback:

2727

raise errors.ReadOnlyError(self)

2728

# we hope there are no repositories with inconsistent parentage

2729

# anymore.

2730

2731

keys = {}

2732

compression_parents = set()

2733

for (key, options, access_memo, parents) in records:

2734

if self._parents:

2735

parents = tuple(parents)

2736

index, pos, size = access_memo

2737

if 'no-eol' in options:

2738

value = 'N'

2739

else:

2740

value = ' '

2741

value += "%d %d" % (pos, size)

2742

if not self._deltas:

2743

if 'line-delta' in options:

2744

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2745

if self._parents:

2746

if self._deltas:

2747

if 'line-delta' in options:

2748

node_refs = (parents, (parents[0],))

2749

if missing_compression_parents:

2750

compression_parents.add(parents[0])

2751

else:

2752

node_refs = (parents, ())

2753

else:

2754

node_refs = (parents, )

2755

else:

2756

if parents:

2757

raise KnitCorrupt(self, "attempt to add node with parents "

2758

"in parentless index.")

2759

node_refs = ()

2760

keys[key] = (value, node_refs)

2761

# check for dups

2762

if not random_id:

2763

present_nodes = self._get_entries(keys)

2764

for (index, key, value, node_refs) in present_nodes:

2765

if (value[0] != keys[key][0][0] or

2766

node_refs[:1] != keys[key][1][:1]):

2767

raise KnitCorrupt(self, "inconsistent details in add_records"

2768

": %s %s" % ((value, node_refs), keys[key]))

2769

del keys[key]

2770

result = []

2771

if self._parents:

2772

for key, (value, node_refs) in keys.iteritems():

2773

result.append((key, value, node_refs))

2774

else:

2775

for key, (value, node_refs) in keys.iteritems():

2776

result.append((key, value))

2777

self._add_callback(result)

2778

if missing_compression_parents:

2779

# This may appear to be incorrect (it does not check for

2780

# compression parents that are in the existing graph index),

2781

# but such records won't have been buffered, so this is

2782

# actually correct: every entry when

2783

# missing_compression_parents==True either has a missing parent, or

2784

# a parent that is one of the keys in records.

2785

compression_parents.difference_update(keys)

2786

self._missing_compression_parents.update(compression_parents)

2787

# Adding records may have satisfied missing compression parents.

2788

self._missing_compression_parents.difference_update(keys)

2789

2790

def scan_unvalidated_index(self, graph_index):

2791

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2792

2793

This allows this _KnitGraphIndex to keep track of any missing

2794

compression parents we may want to have filled in to make those

2795

indices valid.

2796

2797

:param graph_index: A GraphIndex

2798

"""

2799

if self._deltas:

2800

new_missing = graph_index.external_references(ref_list_num=1)

2801

new_missing.difference_update(self.get_parent_map(new_missing))

2802

self._missing_compression_parents.update(new_missing)

2803

2804

def get_missing_compression_parents(self):

2805

"""Return the keys of missing compression parents.

2806

2807

Missing compression parents occur when a record stream was missing

2808

basis texts, or a index was scanned that had missing basis texts.

2809

"""

2810

return frozenset(self._missing_compression_parents)

2811

2812

def _check_read(self):

2813

"""raise if reads are not permitted."""

2814

if not self._is_locked():

2815

raise errors.ObjectNotLocked(self)

2816

2817

def _check_write_ok(self):

2818

"""Assert if writes are not permitted."""

2819

if not self._is_locked():

2820

raise errors.ObjectNotLocked(self)

2821

2822

def _compression_parent(self, an_entry):

2823

# return the key that an_entry is compressed against, or None

2824

# Grab the second parent list (as deltas implies parents currently)

2825

compression_parents = an_entry[3][1]

2826

if not compression_parents:

2827

return None

2828

if len(compression_parents) != 1:

2829

raise AssertionError(

2830

"Too many compression parents: %r" % compression_parents)

2831

return compression_parents[0]

2832

2833

def get_build_details(self, keys):

2834

"""Get the method, index_memo and compression parent for version_ids.

2835

2836

Ghosts are omitted from the result.

2837

2838

:param keys: An iterable of keys.

2839

:return: A dict of key:

2840

(index_memo, compression_parent, parents, record_details).

2841

index_memo

2842

opaque structure to pass to read_records to extract the raw

2843

data

2844

compression_parent

2845

Content that this record is built upon, may be None

2846

parents

2847

Logical parents of this node

2848

record_details

2849

extra information about the content which needs to be passed to

2850

Factory.parse_record

2851

"""

2852

self._check_read()

2853

result = {}

2854

entries = self._get_entries(keys, False)

2855

for entry in entries:

2856

key = entry[1]

2857

if not self._parents:

2858

parents = ()

2859

else:

2860

parents = entry[3][0]

2861

if not self._deltas:

2862

compression_parent_key = None

2863

else:

2864

compression_parent_key = self._compression_parent(entry)

2865

noeol = (entry[2][0] == 'N')

2866

if compression_parent_key:

2867

method = 'line-delta'

2868

else:

2869

method = 'fulltext'

2870

result[key] = (self._node_to_position(entry),

2871

compression_parent_key, parents,

2872

(method, noeol))

2873

return result

2874

2875

def _get_entries(self, keys, check_present=False):

2876

"""Get the entries for keys.

2877

2878

:param keys: An iterable of index key tuples.

2879

"""

2880

keys = set(keys)

2881

found_keys = set()

2882

if self._parents:

2883

for node in self._graph_index.iter_entries(keys):

2884

yield node

2885

found_keys.add(node[1])

2886

else:

2887

# adapt parentless index to the rest of the code.

2888

for node in self._graph_index.iter_entries(keys):

2889

yield node[0], node[1], node[2], ()

2890

found_keys.add(node[1])

2891

if check_present:

2892

missing_keys = keys.difference(found_keys)

2893

if missing_keys:

2894

raise RevisionNotPresent(missing_keys.pop(), self)

2895

2896

def get_method(self, key):

2897

"""Return compression method of specified key."""

2898

return self._get_method(self._get_node(key))

2899

2900

def _get_method(self, node):

2901

if not self._deltas:

2902

return 'fulltext'

2903

if self._compression_parent(node):

2904

return 'line-delta'

2905

else:

2906

return 'fulltext'

2907

2908

def _get_node(self, key):

2909

try:

2910

return list(self._get_entries([key]))[0]

2911

except IndexError:

2912

raise RevisionNotPresent(key, self)

2913

2914

def get_options(self, key):

2915

"""Return a list representing options.

2916

2917

e.g. ['foo', 'bar']

2918

"""

2919

node = self._get_node(key)

2920

options = [self._get_method(node)]

2921

if node[2][0] == 'N':

2922

options.append('no-eol')

2923

return options

2924

2925

def get_parent_map(self, keys):

2926

"""Get a map of the parents of keys.

2927

2928

:param keys: The keys to look up parents for.

2929

:return: A mapping from keys to parents. Absent keys are absent from

2930

the mapping.

2931

"""

2932

self._check_read()

2933

nodes = self._get_entries(keys)

2934

result = {}

2935

if self._parents:

2936

for node in nodes:

2937

result[node[1]] = node[3][0]

2938

else:

2939

for node in nodes:

2940

result[node[1]] = None

2941

return result

2942

2943

def get_position(self, key):

2944

"""Return details needed to access the version.

2945

2946

:return: a tuple (index, data position, size) to hand to the access

2947

logic to get the record.

2948

"""

2949

node = self._get_node(key)

2950

return self._node_to_position(node)

2951

2952

has_key = _mod_index._has_key_from_parent_map

2953

2954

def keys(self):

2955

"""Get all the keys in the collection.

2956

2957

The keys are not ordered.

2958

"""

2959

self._check_read()

2960

return [node[1] for node in self._graph_index.iter_all_entries()]

2961

2962

missing_keys = _mod_index._missing_keys_from_parent_map

2963

2964

def _node_to_position(self, node):

2965

"""Convert an index value to position details."""

2966

bits = node[2][1:].split(' ')

2967

return node[0], int(bits[0]), int(bits[1])

2968

2969

def _sort_keys_by_io(self, keys, positions):

2970

"""Figure out an optimal order to read the records for the given keys.

2971

2972

Sort keys, grouped by index and sorted by position.

2973

2974

:param keys: A list of keys whose records we want to read. This will be

2975

sorted 'in-place'.

2976

:param positions: A dict, such as the one returned by

2977

_get_components_positions()

2978

:return: None

2979

"""

2980

def get_index_memo(key):

2981

# index_memo is at offset [1]. It is made up of (GraphIndex,

2982

# position, size). GI is an object, which will be unique for each

2983

# pack file. This causes us to group by pack file, then sort by

2984

# position. Size doesn't matter, but it isn't worth breaking up the

2985

# tuple.

2986

return positions[key][1]

2987

return keys.sort(key=get_index_memo)

2988

2989

_get_total_build_size = _get_total_build_size

2990

2991

2992

class _KnitKeyAccess(object):

2993

"""Access to records in .knit files."""

2994

2995

def __init__(self, transport, mapper):

2996

"""Create a _KnitKeyAccess with transport and mapper.

2997

2998

:param transport: The transport the access object is rooted at.

2999

:param mapper: The mapper used to map keys to .knit files.

3000

"""

3001

self._transport = transport

3002

self._mapper = mapper

3003

3004

def add_raw_records(self, key_sizes, raw_data):

3005

"""Add raw knit bytes to a storage area.

3006

3007

The data is spooled to the container writer in one bytes-record per

3008

raw data item.

3009

3010

:param sizes: An iterable of tuples containing the key and size of each

3011

raw data segment.

3012

:param raw_data: A bytestring containing the data.

3013

:return: A list of memos to retrieve the record later. Each memo is an

3014

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3015

length), where the key is the record key.

3016

"""

3017

if type(raw_data) != str:

3018

raise AssertionError(

3019

'data must be plain bytes was %s' % type(raw_data))

3020

result = []

3021

offset = 0

3022

# TODO: This can be tuned for writing to sftp and other servers where

3023

# append() is relatively expensive by grouping the writes to each key

3024

# prefix.

3025

for key, size in key_sizes:

3026

path = self._mapper.map(key)

3027

try:

3028

base = self._transport.append_bytes(path + '.knit',

3029

raw_data[offset:offset+size])

3030

except errors.NoSuchFile:

3031

self._transport.mkdir(osutils.dirname(path))

3032

base = self._transport.append_bytes(path + '.knit',

3033

raw_data[offset:offset+size])

3034

# if base == 0:

3035

# chmod.

3036

offset += size

3037

result.append((key, base, size))

3038

return result

3039

3040

def get_raw_records(self, memos_for_retrieval):

3041

"""Get the raw bytes for a records.

3042

3043

:param memos_for_retrieval: An iterable containing the access memo for

3044

retrieving the bytes.

3045

:return: An iterator over the bytes of the records.

3046

"""

3047

# first pass, group into same-index request to minimise readv's issued.

3048

request_lists = []

3049

current_prefix = None

3050

for (key, offset, length) in memos_for_retrieval:

3051

if current_prefix == key[:-1]:

3052

current_list.append((offset, length))

3053

else:

3054

if current_prefix is not None:

3055

request_lists.append((current_prefix, current_list))

3056

current_prefix = key[:-1]

3057

current_list = [(offset, length)]

3058

# handle the last entry

3059

if current_prefix is not None:

3060

request_lists.append((current_prefix, current_list))

3061

for prefix, read_vector in request_lists:

3062

path = self._mapper.map(prefix) + '.knit'

3063

for pos, data in self._transport.readv(path, read_vector):

3064

yield data

3065

3066

3067

class _DirectPackAccess(object):

3068

"""Access to data in one or more packs with less translation."""

3069

3070

def __init__(self, index_to_packs, reload_func=None):

3071

"""Create a _DirectPackAccess object.

3072

3073

:param index_to_packs: A dict mapping index objects to the transport

3074

and file names for obtaining data.

3075

:param reload_func: A function to call if we determine that the pack

3076

files have moved and we need to reload our caches. See

3077

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

3078

"""

3079

self._container_writer = None

3080

self._write_index = None

3081

self._indices = index_to_packs

3082

self._reload_func = reload_func

3083

3084

def add_raw_records(self, key_sizes, raw_data):

3085

"""Add raw knit bytes to a storage area.

3086

3087

The data is spooled to the container writer in one bytes-record per

3088

raw data item.

3089

3090

:param sizes: An iterable of tuples containing the key and size of each

3091

raw data segment.

3092

:param raw_data: A bytestring containing the data.

3093

:return: A list of memos to retrieve the record later. Each memo is an

3094

opaque index memo. For _DirectPackAccess the memo is (index, pos,

3095

length), where the index field is the write_index object supplied

3096

to the PackAccess object.

3097

"""

3098

if type(raw_data) != str:

3099

raise AssertionError(

3100

'data must be plain bytes was %s' % type(raw_data))

3101

result = []

3102

offset = 0

3103

for key, size in key_sizes:

3104

p_offset, p_length = self._container_writer.add_bytes_record(

3105

raw_data[offset:offset+size], [])

3106

offset += size

3107

result.append((self._write_index, p_offset, p_length))

3108

return result

3109

3110

def get_raw_records(self, memos_for_retrieval):

3111

"""Get the raw bytes for a records.

3112

3113

:param memos_for_retrieval: An iterable containing the (index, pos,

3114

length) memo for retrieving the bytes. The Pack access method

3115

looks up the pack to use for a given record in its index_to_pack

3116

map.

3117

:return: An iterator over the bytes of the records.

3118

"""

3119

# first pass, group into same-index requests

3120

request_lists = []

3121

current_index = None

3122

for (index, offset, length) in memos_for_retrieval:

3123

if current_index == index:

3124

current_list.append((offset, length))

3125

else:

3126

if current_index is not None:

3127

request_lists.append((current_index, current_list))

3128

current_index = index

3129

current_list = [(offset, length)]

3130

# handle the last entry

3131

if current_index is not None:

3132

request_lists.append((current_index, current_list))

3133

for index, offsets in request_lists:

3134

try:

3135

transport, path = self._indices[index]

3136

except KeyError:

3137

# A KeyError here indicates that someone has triggered an index

3138

# reload, and this index has gone missing, we need to start

3139

# over.

3140

if self._reload_func is None:

3141

# If we don't have a _reload_func there is nothing that can

3142

# be done

3143

raise

3144

raise errors.RetryWithNewPacks(index,

3145

reload_occurred=True,

3146

exc_info=sys.exc_info())

3147

try:

3148

reader = pack.make_readv_reader(transport, path, offsets)

3149

for names, read_func in reader.iter_records():

3150

yield read_func(None)

3151

except errors.NoSuchFile:

3152

# A NoSuchFile error indicates that a pack file has gone

3153

# missing on disk, we need to trigger a reload, and start over.

3154

if self._reload_func is None:

3155

raise

3156

raise errors.RetryWithNewPacks(transport.abspath(path),

3157

reload_occurred=False,

3158

exc_info=sys.exc_info())

3159

3160

def set_writer(self, writer, index, transport_packname):

3161

"""Set a writer to use for adding data."""

3162

if index is not None:

3163

self._indices[index] = transport_packname

3164

self._container_writer = writer

3165

self._write_index = index

3166

3167

def reload_or_raise(self, retry_exc):

3168

"""Try calling the reload function, or re-raise the original exception.

3169

3170

This should be called after _DirectPackAccess raises a

3171

RetryWithNewPacks exception. This function will handle the common logic

3172

of determining when the error is fatal versus being temporary.

3173

It will also make sure that the original exception is raised, rather

3174

than the RetryWithNewPacks exception.

3175

3176

If this function returns, then the calling function should retry

3177

whatever operation was being performed. Otherwise an exception will

3178

be raised.

3179

3180

:param retry_exc: A RetryWithNewPacks exception.

3181

"""

3182

is_error = False

3183

if self._reload_func is None:

3184

is_error = True

3185

elif not self._reload_func():

3186

# The reload claimed that nothing changed

3187

if not retry_exc.reload_occurred:

3188

# If there wasn't an earlier reload, then we really were

3189

# expecting to find changes. We didn't find them, so this is a

3190

# hard error

3191

is_error = True

3192

if is_error:

3193

exc_class, exc_value, exc_traceback = retry_exc.exc_info

3194

raise exc_class, exc_value, exc_traceback

3195

3196

3197

# Deprecated, use PatienceSequenceMatcher instead

3198

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

3199

3200

3201

def annotate_knit(knit, revision_id):

3202

"""Annotate a knit with no cached annotations.

3203

3204

This implementation is for knits with no cached annotations.

3205

It will work for knits with cached annotations, but this is not

3206

recommended.

3207

"""

3208

annotator = _KnitAnnotator(knit)

3209

return iter(annotator.annotate(revision_id))

3210

3211

3212

class _KnitAnnotator(object):

3213

"""Build up the annotations for a text."""

3214

3215

def __init__(self, knit):

3216

self._knit = knit

3217

3218

# Content objects, differs from fulltexts because of how final newlines

3219

# are treated by knits. the content objects here will always have a

3220

# final newline

3221

self._fulltext_contents = {}

3222

3223

# Annotated lines of specific revisions

3224

self._annotated_lines = {}

3225

3226

# Track the raw data for nodes that we could not process yet.

3227

# This maps the revision_id of the base to a list of children that will

3228

# annotated from it.

3229

self._pending_children = {}

3230

3231

# Nodes which cannot be extracted

3232

self._ghosts = set()

3233

3234

# Track how many children this node has, so we know if we need to keep

3235

# it

3236

self._annotate_children = {}

3237

self._compression_children = {}

3238

3239

self._all_build_details = {}

3240

# The children => parent revision_id graph

3241

self._revision_id_graph = {}

3242

3243

self._heads_provider = None

3244

3245

self._nodes_to_keep_annotations = set()

3246

self._generations_until_keep = 100

3247

3248

def set_generations_until_keep(self, value):

3249

"""Set the number of generations before caching a node.

3250

3251

Setting this to -1 will cache every merge node, setting this higher

3252

will cache fewer nodes.

3253

"""

3254

self._generations_until_keep = value

3255

3256

def _add_fulltext_content(self, revision_id, content_obj):

3257

self._fulltext_contents[revision_id] = content_obj

3258

# TODO: jam 20080305 It might be good to check the sha1digest here

3259

return content_obj.text()

3260

3261

def _check_parents(self, child, nodes_to_annotate):

3262

"""Check if all parents have been processed.

3263

3264

:param child: A tuple of (rev_id, parents, raw_content)

3265

:param nodes_to_annotate: If child is ready, add it to

3266

nodes_to_annotate, otherwise put it back in self._pending_children

3267

"""

3268

for parent_id in child[1]:

3269

if (parent_id not in self._annotated_lines):

3270

# This parent is present, but another parent is missing

3271

self._pending_children.setdefault(parent_id,

3272

[]).append(child)

3273

break

3274

else:

3275

# This one is ready to be processed

3276

nodes_to_annotate.append(child)

3277

3278

def _add_annotation(self, revision_id, fulltext, parent_ids,

3279

left_matching_blocks=None):

3280

"""Add an annotation entry.

3281

3282

All parents should already have been annotated.

3283

:return: A list of children that now have their parents satisfied.

3284

"""

3285

a = self._annotated_lines

3286

annotated_parent_lines = [a[p] for p in parent_ids]

3287

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3288

fulltext, revision_id, left_matching_blocks,

3289

heads_provider=self._get_heads_provider()))

3290

self._annotated_lines[revision_id] = annotated_lines

3291

for p in parent_ids:

3292

ann_children = self._annotate_children[p]

3293

ann_children.remove(revision_id)

3294

if (not ann_children

3295

and p not in self._nodes_to_keep_annotations):

3296

del self._annotated_lines[p]

3297

del self._all_build_details[p]

3298

if p in self._fulltext_contents:

3299

del self._fulltext_contents[p]

3300

# Now that we've added this one, see if there are any pending

3301

# deltas to be done, certainly this parent is finished

3302

nodes_to_annotate = []

3303

for child in self._pending_children.pop(revision_id, []):

3304

self._check_parents(child, nodes_to_annotate)

3305

return nodes_to_annotate

3306

3307

def _get_build_graph(self, key):

3308

"""Get the graphs for building texts and annotations.

3309

3310

The data you need for creating a full text may be different than the

3311

data you need to annotate that text. (At a minimum, you need both

3312

parents to create an annotation, but only need 1 parent to generate the

3313

fulltext.)

3314

3315

:return: A list of (key, index_memo) records, suitable for

3316

passing to read_records_iter to start reading in the raw data fro/

3317

the pack file.

3318

"""

3319

if key in self._annotated_lines:

3320

# Nothing to do

3321

return []

3322

pending = set([key])

3323

records = []

3324

generation = 0

3325

kept_generation = 0

3326

while pending:

3327

# get all pending nodes

3328

generation += 1

3329

this_iteration = pending

3330

build_details = self._knit._index.get_build_details(this_iteration)

3331

self._all_build_details.update(build_details)

3332

# new_nodes = self._knit._index._get_entries(this_iteration)

3333

pending = set()

3334

for key, details in build_details.iteritems():

3335

(index_memo, compression_parent, parents,

3336

record_details) = details

3337

self._revision_id_graph[key] = parents

3338

records.append((key, index_memo))

3339

# Do we actually need to check _annotated_lines?

3340

pending.update(p for p in parents

3341

if p not in self._all_build_details)

3342

if compression_parent:

3343

self._compression_children.setdefault(compression_parent,

3344

[]).append(key)

3345

if parents:

3346

for parent in parents:

3347

self._annotate_children.setdefault(parent,

3348

[]).append(key)

3349

num_gens = generation - kept_generation

3350

if ((num_gens >= self._generations_until_keep)

3351

and len(parents) > 1):

3352

kept_generation = generation

3353

self._nodes_to_keep_annotations.add(key)

3354

3355

missing_versions = this_iteration.difference(build_details.keys())

3356

self._ghosts.update(missing_versions)

3357

for missing_version in missing_versions:

3358

# add a key, no parents

3359

self._revision_id_graph[missing_version] = ()

3360

pending.discard(missing_version) # don't look for it

3361

if self._ghosts.intersection(self._compression_children):

3362

raise KnitCorrupt(

3363

"We cannot have nodes which have a ghost compression parent:\n"

3364

"ghosts: %r\n"

3365

"compression children: %r"

3366

% (self._ghosts, self._compression_children))

3367

# Cleanout anything that depends on a ghost so that we don't wait for

3368

# the ghost to show up

3369

for node in self._ghosts:

3370

if node in self._annotate_children:

3371

# We won't be building this node

3372

del self._annotate_children[node]

3373

# Generally we will want to read the records in reverse order, because

3374

# we find the parent nodes after the children

3375

records.reverse()

3376

return records

3377

3378

def _annotate_records(self, records):

3379

"""Build the annotations for the listed records."""

3380

# We iterate in the order read, rather than a strict order requested

3381

# However, process what we can, and put off to the side things that

3382

# still need parents, cleaning them up when those parents are

3383

# processed.

3384

for (rev_id, record,

3385

digest) in self._knit._read_records_iter(records):

3386

if rev_id in self._annotated_lines:

3387

continue

3388

parent_ids = self._revision_id_graph[rev_id]

3389

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3390

details = self._all_build_details[rev_id]

3391

(index_memo, compression_parent, parents,

3392

record_details) = details

3393

nodes_to_annotate = []

3394

# TODO: Remove the punning between compression parents, and

3395

# parent_ids, we should be able to do this without assuming

3396

# the build order

3397

if len(parent_ids) == 0:

3398

# There are no parents for this node, so just add it

3399

# TODO: This probably needs to be decoupled

3400

fulltext_content, delta = self._knit._factory.parse_record(

3401

rev_id, record, record_details, None)

3402

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3403

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3404

parent_ids, left_matching_blocks=None))

3405

else:

3406

child = (rev_id, parent_ids, record)

3407

# Check if all the parents are present

3408

self._check_parents(child, nodes_to_annotate)

3409

while nodes_to_annotate:

3410

# Should we use a queue here instead of a stack?

3411

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3412

(index_memo, compression_parent, parents,

3413

record_details) = self._all_build_details[rev_id]

3414

blocks = None

3415

if compression_parent is not None:

3416

comp_children = self._compression_children[compression_parent]

3417

if rev_id not in comp_children:

3418

raise AssertionError("%r not in compression children %r"

3419

% (rev_id, comp_children))

3420

# If there is only 1 child, it is safe to reuse this

3421

# content

3422

reuse_content = (len(comp_children) == 1

3423

and compression_parent not in

3424

self._nodes_to_keep_annotations)

3425

if reuse_content:

3426

# Remove it from the cache since it will be changing

3427

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3428

# Make sure to copy the fulltext since it might be

3429

# modified

3430

parent_fulltext = list(parent_fulltext_content.text())

3431

else:

3432

parent_fulltext_content = self._fulltext_contents[compression_parent]

3433

parent_fulltext = parent_fulltext_content.text()

3434

comp_children.remove(rev_id)

3435

fulltext_content, delta = self._knit._factory.parse_record(

3436

rev_id, record, record_details,

3437

parent_fulltext_content,

3438

copy_base_content=(not reuse_content))

3439

fulltext = self._add_fulltext_content(rev_id,

3440

fulltext_content)

3441

if compression_parent == parent_ids[0]:

3442

# the compression_parent is the left parent, so we can

3443

# re-use the delta

3444

blocks = KnitContent.get_line_delta_blocks(delta,

3445

parent_fulltext, fulltext)

3446

else:

3447

fulltext_content = self._knit._factory.parse_fulltext(

3448

record, rev_id)

3449

fulltext = self._add_fulltext_content(rev_id,

3450

fulltext_content)

3451

nodes_to_annotate.extend(

3452

self._add_annotation(rev_id, fulltext, parent_ids,

3453

left_matching_blocks=blocks))

3454

3455

def _get_heads_provider(self):

3456

"""Create a heads provider for resolving ancestry issues."""

3457

if self._heads_provider is not None:

3458

return self._heads_provider

3459

parent_provider = _mod_graph.DictParentsProvider(

3460

self._revision_id_graph)

3461

graph_obj = _mod_graph.Graph(parent_provider)

3462

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3463

self._heads_provider = head_cache

3464

return head_cache

3465

3466

def annotate(self, key):

3467

"""Return the annotated fulltext at the given key.

3468

3469

:param key: The key to annotate.

3470

"""

3471

if len(self._knit._fallback_vfs) > 0:

3472

# stacked knits can't use the fast path at present.

3473

return self._simple_annotate(key)

3474

while True:

3475

try:

3476

records = self._get_build_graph(key)

3477

if key in self._ghosts:

3478

raise errors.RevisionNotPresent(key, self._knit)

3479

self._annotate_records(records)

3480

return self._annotated_lines[key]

3481

except errors.RetryWithNewPacks, e:

3482

self._knit._access.reload_or_raise(e)

3483

# The cached build_details are no longer valid

3484

self._all_build_details.clear()

3485

3486

def _simple_annotate(self, key):

3487

"""Return annotated fulltext, rediffing from the full texts.

3488

3489

This is slow but makes no assumptions about the repository

3490

being able to produce line deltas.

3491

"""

3492

# TODO: this code generates a parent maps of present ancestors; it

3493

# could be split out into a separate method, and probably should use

3494

# iter_ancestry instead. -- mbp and robertc 20080704

3495

graph = _mod_graph.Graph(self._knit)

3496

head_cache = _mod_graph.FrozenHeadsCache(graph)

3497

search = graph._make_breadth_first_searcher([key])

3498

keys = set()

3499

while True:

3500

try:

3501

present, ghosts = search.next_with_ghosts()

3502

except StopIteration:

3503

break

3504

keys.update(present)

3505

parent_map = self._knit.get_parent_map(keys)

3506

parent_cache = {}

3507

reannotate = annotate.reannotate

3508

for record in self._knit.get_record_stream(keys, 'topological', True):

3509

key = record.key

3510

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

3511

parents = parent_map[key]

3512

if parents is not None:

3513

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

3514

else:

3515

parent_lines = []

3516

parent_cache[key] = list(

3517

reannotate(parent_lines, fulltext, key, None, head_cache))

3518

try:

3519

return parent_cache[key]

3520

except KeyError, e:

3521

raise errors.RevisionNotPresent(key, self._knit)

3522

3523

3524

try:

3525

from bzrlib._knit_load_data_c import _load_data_c as _load_data

3526

except ImportError:

3527

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »