/brz/remove-bazaar : revision 3039

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-11-28 00:59:30 UTC
mfrom: (2979.2.5 commit.merge-speed)
Revision ID: pqm@pqm.ubuntu.com-20071128005930-4wb5pl12fyq8ek13

(robertc) Change commit on pack repositories to use the per-file
graph for heads() calculations. (Robert Collins, 165306)

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/ftp_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/authentication_conf.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

lru_cache,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

100

KnitHeaderError,

101

RevisionNotPresent,

102

RevisionAlreadyPresent,

103

)

104

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

105

from bzrlib.osutils import (

106

contains_whitespace,

107

contains_linebreaks,

108

sha_string,

109

sha_strings,

110

)

111

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

112

from bzrlib.tsort import topo_sort

113

import bzrlib.ui

114

import bzrlib.weave

115

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

116

117

118

# TODO: Split out code specific to this format into an associated object.

119

120

# TODO: Can we put in some kind of value to check that the index and data

121

# files belong together?

122

123

# TODO: accommodate binaries, perhaps by storing a byte count

124

125

# TODO: function to check whole file

126

127

# TODO: atomically append data, then measure backwards from the cursor

128

# position after writing to work out where it was located. we may need to

129

# bypass python file buffering.

130

131

DATA_SUFFIX = '.knit'

132

INDEX_SUFFIX = '.kndx'

133

134

135

class KnitContent(object):

136

"""Content of a knit version to which deltas can be applied."""

137

138

def annotate(self):

139

"""Return a list of (origin, text) tuples."""

140

return list(self.annotate_iter())

141

142

def apply_delta(self, delta, new_version_id):

143

"""Apply delta to this object to become new_version_id."""

144

raise NotImplementedError(self.apply_delta)

145

146

def line_delta_iter(self, new_lines):

147

"""Generate line-based delta from this content to new_lines."""

148

new_texts = new_lines.text()

149

old_texts = self.text()

150

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

151

for tag, i1, i2, j1, j2 in s.get_opcodes():

152

if tag == 'equal':

153

continue

154

# ofrom, oto, length, data

155

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

156

157

def line_delta(self, new_lines):

158

return list(self.line_delta_iter(new_lines))

159

160

@staticmethod

161

def get_line_delta_blocks(knit_delta, source, target):

162

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

163

target_len = len(target)

164

s_pos = 0

165

t_pos = 0

166

for s_begin, s_end, t_len, new_text in knit_delta:

167

true_n = s_begin - s_pos

168

n = true_n

169

if n > 0:

170

# knit deltas do not provide reliable info about whether the

171

# last line of a file matches, due to eol handling.

172

if source[s_pos + n -1] != target[t_pos + n -1]:

173

n-=1

174

if n > 0:

175

yield s_pos, t_pos, n

176

t_pos += t_len + true_n

177

s_pos = s_end

178

n = target_len - t_pos

179

if n > 0:

180

if source[s_pos + n -1] != target[t_pos + n -1]:

181

n-=1

182

if n > 0:

183

yield s_pos, t_pos, n

184

yield s_pos + (target_len - t_pos), target_len, 0

185

186

187

class AnnotatedKnitContent(KnitContent):

188

"""Annotated content."""

189

190

def __init__(self, lines):

191

self._lines = lines

192

193

def annotate_iter(self):

194

"""Yield tuples of (origin, text) for each content line."""

195

return iter(self._lines)

196

197

def apply_delta(self, delta, new_version_id):

198

"""Apply delta to this object to become new_version_id."""

199

offset = 0

200

lines = self._lines

201

for start, end, count, delta_lines in delta:

202

lines[offset+start:offset+end] = delta_lines

203

offset = offset + (start - end) + count

204

205

def strip_last_line_newline(self):

206

line = self._lines[-1][1].rstrip('\n')

207

self._lines[-1] = (self._lines[-1][0], line)

208

209

def text(self):

210

try:

211

return [text for origin, text in self._lines]

212

except ValueError, e:

213

# most commonly (only?) caused by the internal form of the knit

214

# missing annotation information because of a bug - see thread

215

# around 20071015

216

raise KnitCorrupt(self,

217

"line in annotated knit missing annotation information: %s"

218

% (e,))

219

220

def copy(self):

221

return AnnotatedKnitContent(self._lines[:])

222

223

224

class PlainKnitContent(KnitContent):

225

"""Unannotated content.

226

227

When annotate[_iter] is called on this content, the same version is reported

228

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

229

objects.

230

"""

231

232

def __init__(self, lines, version_id):

233

self._lines = lines

234

self._version_id = version_id

235

236

def annotate_iter(self):

237

"""Yield tuples of (origin, text) for each content line."""

238

for line in self._lines:

239

yield self._version_id, line

240

241

def apply_delta(self, delta, new_version_id):

242

"""Apply delta to this object to become new_version_id."""

243

offset = 0

244

lines = self._lines

245

for start, end, count, delta_lines in delta:

246

lines[offset+start:offset+end] = delta_lines

247

offset = offset + (start - end) + count

248

self._version_id = new_version_id

249

250

def copy(self):

251

return PlainKnitContent(self._lines[:], self._version_id)

252

253

def strip_last_line_newline(self):

254

self._lines[-1] = self._lines[-1].rstrip('\n')

255

256

def text(self):

257

return self._lines

258

259

260

class KnitAnnotateFactory(object):

261

"""Factory for creating annotated Content objects."""

262

263

annotated = True

264

265

def make(self, lines, version_id):

266

num_lines = len(lines)

267

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

268

269

def parse_fulltext(self, content, version_id):

270

"""Convert fulltext to internal representation

271

272

fulltext content is of the format

273

revid(utf8) plaintext\n

274

internal representation is of the format:

275

(revid, plaintext)

276

"""

277

# TODO: jam 20070209 The tests expect this to be returned as tuples,

278

# but the code itself doesn't really depend on that.

279

# Figure out a way to not require the overhead of turning the

280

# list back into tuples.

281

lines = [tuple(line.split(' ', 1)) for line in content]

282

return AnnotatedKnitContent(lines)

283

284

def parse_line_delta_iter(self, lines):

285

return iter(self.parse_line_delta(lines))

286

287

def parse_line_delta(self, lines, version_id, plain=False):

288

"""Convert a line based delta into internal representation.

289

290

line delta is in the form of:

291

intstart intend intcount

292

1..count lines:

293

revid(utf8) newline\n

294

internal representation is

295

(start, end, count, [1..count tuples (revid, newline)])

296

297

:param plain: If True, the lines are returned as a plain

298

list without annotations, not as a list of (origin, content) tuples, i.e.

299

(start, end, count, [1..count newline])

300

"""

301

result = []

302

lines = iter(lines)

303

next = lines.next

304

305

cache = {}

306

def cache_and_return(line):

307

origin, text = line.split(' ', 1)

308

return cache.setdefault(origin, origin), text

309

310

# walk through the lines parsing.

311

# Note that the plain test is explicitly pulled out of the

312

# loop to minimise any performance impact

313

if plain:

314

for header in lines:

315

start, end, count = [int(n) for n in header.split(',')]

316

contents = [next().split(' ', 1)[1] for i in xrange(count)]

317

result.append((start, end, count, contents))

318

else:

319

for header in lines:

320

start, end, count = [int(n) for n in header.split(',')]

321

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

322

result.append((start, end, count, contents))

323

return result

324

325

def get_fulltext_content(self, lines):

326

"""Extract just the content lines from a fulltext."""

327

return (line.split(' ', 1)[1] for line in lines)

328

329

def get_linedelta_content(self, lines):

330

"""Extract just the content from a line delta.

331

332

This doesn't return all of the extra information stored in a delta.

333

Only the actual content lines.

334

"""

335

lines = iter(lines)

336

next = lines.next

337

for header in lines:

338

header = header.split(',')

339

count = int(header[2])

340

for i in xrange(count):

341

origin, text = next().split(' ', 1)

342

yield text

343

344

def lower_fulltext(self, content):

345

"""convert a fulltext content record into a serializable form.

346

347

see parse_fulltext which this inverts.

348

"""

349

# TODO: jam 20070209 We only do the caching thing to make sure that

350

# the origin is a valid utf-8 line, eventually we could remove it

351

return ['%s %s' % (o, t) for o, t in content._lines]

352

353

def lower_line_delta(self, delta):

354

"""convert a delta into a serializable form.

355

356

See parse_line_delta which this inverts.

357

"""

358

# TODO: jam 20070209 We only do the caching thing to make sure that

359

# the origin is a valid utf-8 line, eventually we could remove it

360

out = []

361

for start, end, c, lines in delta:

362

out.append('%d,%d,%d\n' % (start, end, c))

363

out.extend(origin + ' ' + text

364

for origin, text in lines)

365

return out

366

367

def annotate_iter(self, knit, version_id):

368

content = knit._get_content(version_id)

369

return content.annotate_iter()

370

371

372

class KnitPlainFactory(object):

373

"""Factory for creating plain Content objects."""

374

375

annotated = False

376

377

def make(self, lines, version_id):

378

return PlainKnitContent(lines, version_id)

379

380

def parse_fulltext(self, content, version_id):

381

"""This parses an unannotated fulltext.

382

383

Note that this is not a noop - the internal representation

384

has (versionid, line) - its just a constant versionid.

385

"""

386

return self.make(content, version_id)

387

388

def parse_line_delta_iter(self, lines, version_id):

389

cur = 0

390

num_lines = len(lines)

391

while cur < num_lines:

392

header = lines[cur]

393

cur += 1

394

start, end, c = [int(n) for n in header.split(',')]

395

yield start, end, c, lines[cur:cur+c]

396

cur += c

397

398

def parse_line_delta(self, lines, version_id):

399

return list(self.parse_line_delta_iter(lines, version_id))

400

401

def get_fulltext_content(self, lines):

402

"""Extract just the content lines from a fulltext."""

403

return iter(lines)

404

405

def get_linedelta_content(self, lines):

406

"""Extract just the content from a line delta.

407

408

This doesn't return all of the extra information stored in a delta.

409

Only the actual content lines.

410

"""

411

lines = iter(lines)

412

next = lines.next

413

for header in lines:

414

header = header.split(',')

415

count = int(header[2])

416

for i in xrange(count):

417

yield next()

418

419

def lower_fulltext(self, content):

420

return content.text()

421

422

def lower_line_delta(self, delta):

423

out = []

424

for start, end, c, lines in delta:

425

out.append('%d,%d,%d\n' % (start, end, c))

426

out.extend(lines)

427

return out

428

429

def annotate_iter(self, knit, version_id):

430

return annotate_knit(knit, version_id)

431

432

433

def make_empty_knit(transport, relpath):

434

"""Construct a empty knit at the specified location."""

435

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

436

437

438

class KnitVersionedFile(VersionedFile):

439

"""Weave-like structure with faster random access.

440

441

A knit stores a number of texts and a summary of the relationships

442

between them. Texts are identified by a string version-id. Texts

443

are normally stored and retrieved as a series of lines, but can

444

also be passed as single strings.

445

446

Lines are stored with the trailing newline (if any) included, to

447

avoid special cases for files with no final newline. Lines are

448

composed of 8-bit characters, not unicode. The combination of

449

these approaches should mean any 'binary' file can be safely

450

stored and retrieved.

451

"""

452

453

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

454

factory=None, delta=True, create=False, create_parent_dir=False,

455

delay_create=False, dir_mode=None, index=None, access_method=None):

456

"""Construct a knit at location specified by relpath.

457

458

:param create: If not True, only open an existing knit.

459

:param create_parent_dir: If True, create the parent directory if

460

creating the file fails. (This is used for stores with

461

hash-prefixes that may not exist yet)

462

:param delay_create: The calling code is aware that the knit won't

463

actually be created until the first data is stored.

464

:param index: An index to use for the knit.

465

"""

466

if access_mode is None:

467

access_mode = 'w'

468

super(KnitVersionedFile, self).__init__(access_mode)

469

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

470

self.transport = transport

471

self.filename = relpath

472

self.factory = factory or KnitAnnotateFactory()

473

self.writable = (access_mode == 'w')

474

self.delta = delta

475

476

self._max_delta_chain = 200

477

478

if index is None:

479

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

480

access_mode, create=create, file_mode=file_mode,

481

create_parent_dir=create_parent_dir, delay_create=delay_create,

482

dir_mode=dir_mode)

483

else:

484

self._index = index

485

if access_method is None:

486

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

487

((create and not len(self)) and delay_create), create_parent_dir)

488

else:

489

_access = access_method

490

if create and not len(self) and not delay_create:

491

_access.create()

492

self._data = _KnitData(_access)

493

494

def __repr__(self):

495

return '%s(%s)' % (self.__class__.__name__,

496

self.transport.abspath(self.filename))

497

498

def _check_should_delta(self, first_parents):

499

"""Iterate back through the parent listing, looking for a fulltext.

500

501

This is used when we want to decide whether to add a delta or a new

502

fulltext. It searches for _max_delta_chain parents. When it finds a

503

fulltext parent, it sees if the total size of the deltas leading up to

504

it is large enough to indicate that we want a new full text anyway.

505

506

Return True if we should create a new delta, False if we should use a

507

full text.

508

"""

509

delta_size = 0

510

fulltext_size = None

511

delta_parents = first_parents

512

for count in xrange(self._max_delta_chain):

513

parent = delta_parents[0]

514

method = self._index.get_method(parent)

515

index, pos, size = self._index.get_position(parent)

516

if method == 'fulltext':

517

fulltext_size = size

518

break

519

delta_size += size

520

delta_parents = self._index.get_parents(parent)

521

else:

522

# We couldn't find a fulltext, so we must create a new one

523

return False

524

525

return fulltext_size > delta_size

526

527

def _add_raw_records(self, records, data):

528

"""Add all the records 'records' with data pre-joined in 'data'.

529

530

:param records: A list of tuples(version_id, options, parents, size).

531

:param data: The data for the records. When it is written, the records

532

are adjusted to have pos pointing into data by the sum of

533

the preceding records sizes.

534

"""

535

# write all the data

536

raw_record_sizes = [record[3] for record in records]

537

positions = self._data.add_raw_records(raw_record_sizes, data)

538

offset = 0

539

index_entries = []

540

for (version_id, options, parents, size), access_memo in zip(

541

records, positions):

542

index_entries.append((version_id, options, access_memo, parents))

543

if self._data._do_cache:

544

self._data._cache[version_id] = data[offset:offset+size]

545

offset += size

546

self._index.add_versions(index_entries)

547

548

def enable_cache(self):

549

"""Start caching data for this knit"""

550

self._data.enable_cache()

551

552

def clear_cache(self):

553

"""Clear the data cache only."""

554

self._data.clear_cache()

555

556

def copy_to(self, name, transport):

557

"""See VersionedFile.copy_to()."""

558

# copy the current index to a temp index to avoid racing with local

559

# writes

560

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

561

self.transport.get(self._index._filename))

562

# copy the data file

563

f = self._data._open_file()

564

try:

565

transport.put_file(name + DATA_SUFFIX, f)

566

finally:

567

f.close()

568

# move the copied index into place

569

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

570

571

def create_empty(self, name, transport, mode=None):

572

return KnitVersionedFile(name, transport, factory=self.factory,

573

delta=self.delta, create=True)

574

575

def get_data_stream(self, required_versions):

576

"""Get a data stream for the specified versions.

577

578

Versions may be returned in any order, not necessarily the order

579

specified. They are returned in a partial order by compression

580

parent, so that the deltas can be applied as the data stream is

581

inserted; however note that compression parents will not be sent

582

unless they were specifically requested, as the client may already

583

have them.

584

585

:param required_versions: The exact set of versions to be extracted.

586

Unlike some other knit methods, this is not used to generate a

587

transitive closure, rather it is used precisely as given.

588

589

:returns: format_signature, list of (version, options, length, parents),

590

reader_callable.

591

"""

592

required_version_set = frozenset(required_versions)

593

version_index = {}

594

# list of revisions that can just be sent without waiting for their

595

# compression parent

596

ready_to_send = []

597

# map from revision to the children based on it

598

deferred = {}

599

# first, read all relevant index data, enough to sort into the right

600

# order to return

601

for version_id in required_versions:

602

if not self.has_version(version_id):

603

raise RevisionNotPresent(version_id, self.filename)

604

options = self._index.get_options(version_id)

605

parents = self._index.get_parents_with_ghosts(version_id)

606

index_memo = self._index.get_position(version_id)

607

version_index[version_id] = (index_memo, options, parents)

608

if parents and parents[0] in required_version_set:

609

# must wait until the parent has been sent

610

deferred.setdefault(parents[0], []). \

611

append(version_id)

612

else:

613

# either a fulltext, or a delta whose parent the client did

614

# not ask for and presumably already has

615

ready_to_send.append(version_id)

616

# build a list of results to return, plus instructions for data to

617

# read from the file

618

copy_queue_records = []

619

temp_version_list = []

620

while ready_to_send:

621

# XXX: pushing and popping lists may be a bit inefficient

622

version_id = ready_to_send.pop(0)

623

(index_memo, options, parents) = version_index[version_id]

624

copy_queue_records.append((version_id, index_memo))

625

none, data_pos, data_size = index_memo

626

temp_version_list.append((version_id, options, data_size,

627

parents))

628

if version_id in deferred:

629

# now we can send all the children of this revision - we could

630

# put them in anywhere, but we hope that sending them soon

631

# after the fulltext will give good locality in the receiver

632

ready_to_send[:0] = deferred.pop(version_id)

633

assert len(deferred) == 0, \

634

"Still have compressed child versions waiting to be sent"

635

# XXX: The stream format is such that we cannot stream it - we have to

636

# know the length of all the data a-priori.

637

raw_datum = []

638

result_version_list = []

639

for (version_id, raw_data), \

640

(version_id2, options, _, parents) in \

641

izip(self._data.read_records_iter_raw(copy_queue_records),

642

temp_version_list):

643

assert version_id == version_id2, \

644

'logic error, inconsistent results'

645

raw_datum.append(raw_data)

646

result_version_list.append(

647

(version_id, options, len(raw_data), parents))

648

# provide a callback to get data incrementally.

649

pseudo_file = StringIO(''.join(raw_datum))

650

def read(length):

651

if length is None:

652

return pseudo_file.read()

653

else:

654

return pseudo_file.read(length)

655

return (self.get_format_signature(), result_version_list, read)

656

657

def _extract_blocks(self, version_id, source, target):

658

if self._index.get_method(version_id) != 'line-delta':

659

return None

660

parent, sha1, noeol, delta = self.get_delta(version_id)

661

return KnitContent.get_line_delta_blocks(delta, source, target)

662

663

def get_delta(self, version_id):

664

"""Get a delta for constructing version from some other version."""

665

self.check_not_reserved_id(version_id)

666

parents = self.get_parents(version_id)

667

if len(parents):

668

parent = parents[0]

669

else:

670

parent = None

671

index_memo = self._index.get_position(version_id)

672

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

673

noeol = 'no-eol' in self._index.get_options(version_id)

674

if 'fulltext' == self._index.get_method(version_id):

675

new_content = self.factory.parse_fulltext(data, version_id)

676

if parent is not None:

677

reference_content = self._get_content(parent)

678

old_texts = reference_content.text()

679

else:

680

old_texts = []

681

new_texts = new_content.text()

682

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

683

new_texts)

684

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

685

else:

686

delta = self.factory.parse_line_delta(data, version_id)

687

return parent, sha1, noeol, delta

688

689

def get_format_signature(self):

690

"""See VersionedFile.get_format_signature()."""

691

if self.factory.annotated:

692

annotated_part = "annotated"

693

else:

694

annotated_part = "plain"

695

return "knit-%s" % (annotated_part,)

696

697

def get_graph_with_ghosts(self):

698

"""See VersionedFile.get_graph_with_ghosts()."""

699

graph_items = self._index.get_graph()

700

return dict(graph_items)

701

702

def get_sha1(self, version_id):

703

return self.get_sha1s([version_id])[0]

704

705

def get_sha1s(self, version_ids):

706

"""See VersionedFile.get_sha1()."""

707

record_map = self._get_record_map(version_ids)

708

# record entry 2 is the 'digest'.

709

return [record_map[v][2] for v in version_ids]

710

711

@staticmethod

712

def get_suffixes():

713

"""See VersionedFile.get_suffixes()."""

714

return [DATA_SUFFIX, INDEX_SUFFIX]

715

716

def has_ghost(self, version_id):

717

"""True if there is a ghost reference in the file to version_id."""

718

# maybe we have it

719

if self.has_version(version_id):

720

return False

721

# optimisable if needed by memoising the _ghosts set.

722

items = self._index.get_graph()

723

for node, parents in items:

724

for parent in parents:

725

if parent not in self._index._cache:

726

if parent == version_id:

727

return True

728

return False

729

730

def insert_data_stream(self, (format, data_list, reader_callable)):

731

"""Insert knit records from a data stream into this knit.

732

733

If a version in the stream is already present in this knit, it will not

734

be inserted a second time. It will be checked for consistency with the

735

stored version however, and may cause a KnitCorrupt error to be raised

736

if the data in the stream disagrees with the already stored data.

737

738

:seealso: get_data_stream

739

"""

740

if format != self.get_format_signature():

741

trace.mutter('incompatible format signature inserting to %r', self)

742

raise KnitDataStreamIncompatible(

743

format, self.get_format_signature())

744

745

for version_id, options, length, parents in data_list:

746

if self.has_version(version_id):

747

# First check: the list of parents.

748

my_parents = self.get_parents_with_ghosts(version_id)

749

if my_parents != parents:

750

# XXX: KnitCorrupt is not quite the right exception here.

751

raise KnitCorrupt(

752

self.filename,

753

'parents list %r from data stream does not match '

754

'already recorded parents %r for %s'

755

% (parents, my_parents, version_id))

756

757

# Also check the SHA-1 of the fulltext this content will

758

# produce.

759

raw_data = reader_callable(length)

760

my_fulltext_sha1 = self.get_sha1(version_id)

761

df, rec = self._data._parse_record_header(version_id, raw_data)

762

stream_fulltext_sha1 = rec[3]

763

if my_fulltext_sha1 != stream_fulltext_sha1:

764

# Actually, we don't know if it's this knit that's corrupt,

765

# or the data stream we're trying to insert.

766

raise KnitCorrupt(

767

self.filename, 'sha-1 does not match %s' % version_id)

768

else:

769

if 'line-delta' in options:

770

# Make sure that this knit record is actually useful: a

771

# line-delta is no use unless we have its parent.

772

# Fetching from a broken repository with this problem

773

# shouldn't break the target repository.

774

if not self._index.has_version(parents[0]):

775

raise KnitCorrupt(

776

self.filename,

777

'line-delta from stream references '

778

'missing parent %s' % parents[0])

779

self._add_raw_records(

780

[(version_id, options, parents, length)],

781

reader_callable(length))

782

783

def versions(self):

784

"""See VersionedFile.versions."""

785

if 'evil' in debug.debug_flags:

786

trace.mutter_callsite(2, "versions scales with size of history")

787

return self._index.get_versions()

788

789

def has_version(self, version_id):

790

"""See VersionedFile.has_version."""

791

if 'evil' in debug.debug_flags:

792

trace.mutter_callsite(2, "has_version is a LBYL scenario")

793

return self._index.has_version(version_id)

794

795

__contains__ = has_version

796

797

def _merge_annotations(self, content, parents, parent_texts={},

798

delta=None, annotated=None,

799

left_matching_blocks=None):

800

"""Merge annotations for content. This is done by comparing

801

the annotations based on changed to the text.

802

"""

803

if left_matching_blocks is not None:

804

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

805

else:

806

delta_seq = None

807

if annotated:

808

for parent_id in parents:

809

merge_content = self._get_content(parent_id, parent_texts)

810

if (parent_id == parents[0] and delta_seq is not None):

811

seq = delta_seq

812

else:

813

seq = patiencediff.PatienceSequenceMatcher(

814

None, merge_content.text(), content.text())

815

for i, j, n in seq.get_matching_blocks():

816

if n == 0:

817

continue

818

# this appears to copy (origin, text) pairs across to the

819

# new content for any line that matches the last-checked

820

# parent.

821

content._lines[j:j+n] = merge_content._lines[i:i+n]

822

if delta:

823

if delta_seq is None:

824

reference_content = self._get_content(parents[0], parent_texts)

825

new_texts = content.text()

826

old_texts = reference_content.text()

827

delta_seq = patiencediff.PatienceSequenceMatcher(

828

None, old_texts, new_texts)

829

return self._make_line_delta(delta_seq, content)

830

831

def _make_line_delta(self, delta_seq, new_content):

832

"""Generate a line delta from delta_seq and new_content."""

833

diff_hunks = []

834

for op in delta_seq.get_opcodes():

835

if op[0] == 'equal':

836

continue

837

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

838

return diff_hunks

839

840

def _get_components_positions(self, version_ids):

841

"""Produce a map of position data for the components of versions.

842

843

This data is intended to be used for retrieving the knit records.

844

845

A dict of version_id to (method, data_pos, data_size, next) is

846

returned.

847

method is the way referenced data should be applied.

848

data_pos is the position of the data in the knit.

849

data_size is the size of the data in the knit.

850

next is the build-parent of the version, or None for fulltexts.

851

"""

852

component_data = {}

853

for version_id in version_ids:

854

cursor = version_id

855

856

while cursor is not None and cursor not in component_data:

857

method = self._index.get_method(cursor)

858

if method == 'fulltext':

859

next = None

860

else:

861

next = self.get_parents_with_ghosts(cursor)[0]

862

index_memo = self._index.get_position(cursor)

863

component_data[cursor] = (method, index_memo, next)

864

cursor = next

865

return component_data

866

867

def _get_content(self, version_id, parent_texts={}):

868

"""Returns a content object that makes up the specified

869

version."""

870

cached_version = parent_texts.get(version_id, None)

871

if cached_version is not None:

872

if not self.has_version(version_id):

873

raise RevisionNotPresent(version_id, self.filename)

874

return cached_version

875

876

text_map, contents_map = self._get_content_maps([version_id])

877

return contents_map[version_id]

878

879

def _check_versions_present(self, version_ids):

880

"""Check that all specified versions are present."""

881

self._index.check_versions_present(version_ids)

882

883

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

884

nostore_sha, random_id, check_content):

885

"""See VersionedFile.add_lines_with_ghosts()."""

886

self._check_add(version_id, lines, random_id, check_content)

887

return self._add(version_id, lines, parents, self.delta,

888

parent_texts, None, nostore_sha, random_id)

889

890

def _add_lines(self, version_id, parents, lines, parent_texts,

891

left_matching_blocks, nostore_sha, random_id, check_content):

892

"""See VersionedFile.add_lines."""

893

self._check_add(version_id, lines, random_id, check_content)

894

self._check_versions_present(parents)

895

return self._add(version_id, lines[:], parents, self.delta,

896

parent_texts, left_matching_blocks, nostore_sha, random_id)

897

898

def _check_add(self, version_id, lines, random_id, check_content):

899

"""check that version_id and lines are safe to add."""

900

if contains_whitespace(version_id):

901

raise InvalidRevisionId(version_id, self.filename)

902

self.check_not_reserved_id(version_id)

903

# Technically this could be avoided if we are happy to allow duplicate

904

# id insertion when other things than bzr core insert texts, but it

905

# seems useful for folk using the knit api directly to have some safety

906

# blanket that we can disable.

907

if not random_id and self.has_version(version_id):

908

raise RevisionAlreadyPresent(version_id, self.filename)

909

if check_content:

910

self._check_lines_not_unicode(lines)

911

self._check_lines_are_lines(lines)

912

913

def _add(self, version_id, lines, parents, delta, parent_texts,

914

left_matching_blocks, nostore_sha, random_id):

915

"""Add a set of lines on top of version specified by parents.

916

917

If delta is true, compress the text as a line-delta against

918

the first parent.

919

920

Any versions not present will be converted into ghosts.

921

"""

922

# first thing, if the content is something we don't need to store, find

923

# that out.

924

line_bytes = ''.join(lines)

925

digest = sha_string(line_bytes)

926

if nostore_sha == digest:

927

raise errors.ExistingContent

928

929

present_parents = []

930

if parent_texts is None:

931

parent_texts = {}

932

for parent in parents:

933

if self.has_version(parent):

934

present_parents.append(parent)

935

936

# can only compress against the left most present parent.

937

if (delta and

938

(len(present_parents) == 0 or

939

present_parents[0] != parents[0])):

940

delta = False

941

942

text_length = len(line_bytes)

943

options = []

944

if lines:

945

if lines[-1][-1] != '\n':

946

# copy the contents of lines.

947

lines = lines[:]

948

options.append('no-eol')

949

lines[-1] = lines[-1] + '\n'

950

line_bytes += '\n'

951

952

if delta:

953

# To speed the extract of texts the delta chain is limited

954

# to a fixed number of deltas. This should minimize both

955

# I/O and the time spend applying deltas.

956

delta = self._check_should_delta(present_parents)

957

958

assert isinstance(version_id, str)

959

content = self.factory.make(lines, version_id)

960

if delta or (self.factory.annotated and len(present_parents) > 0):

961

# Merge annotations from parent texts if needed.

962

delta_hunks = self._merge_annotations(content, present_parents,

963

parent_texts, delta, self.factory.annotated,

964

left_matching_blocks)

965

966

if delta:

967

options.append('line-delta')

968

store_lines = self.factory.lower_line_delta(delta_hunks)

969

size, bytes = self._data._record_to_data(version_id, digest,

970

store_lines)

971

else:

972

options.append('fulltext')

973

# isinstance is slower and we have no hierarchy.

974

if self.factory.__class__ == KnitPlainFactory:

975

# Use the already joined bytes saving iteration time in

976

# _record_to_data.

977

size, bytes = self._data._record_to_data(version_id, digest,

978

lines, [line_bytes])

979

else:

980

# get mixed annotation + content and feed it into the

981

# serialiser.

982

store_lines = self.factory.lower_fulltext(content)

983

size, bytes = self._data._record_to_data(version_id, digest,

984

store_lines)

985

986

access_memo = self._data.add_raw_records([size], bytes)[0]

987

self._index.add_versions(

988

((version_id, options, access_memo, parents),),

989

random_id=random_id)

990

return digest, text_length, content

991

992

def check(self, progress_bar=None):

993

"""See VersionedFile.check()."""

994

995

def _clone_text(self, new_version_id, old_version_id, parents):

996

"""See VersionedFile.clone_text()."""

997

# FIXME RBC 20060228 make fast by only inserting an index with null

998

# delta.

999

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

1000

1001

def get_lines(self, version_id):

1002

"""See VersionedFile.get_lines()."""

1003

return self.get_line_list([version_id])[0]

1004

1005

def _get_record_map(self, version_ids):

1006

"""Produce a dictionary of knit records.

1007

1008

The keys are version_ids, the values are tuples of (method, content,

1009

digest, next).

1010

method is the way the content should be applied.

1011

content is a KnitContent object.

1012

digest is the SHA1 digest of this version id after all steps are done

1013

next is the build-parent of the version, i.e. the leftmost ancestor.

1014

If the method is fulltext, next will be None.

1015

"""

1016

position_map = self._get_components_positions(version_ids)

1017

# c = component_id, m = method, i_m = index_memo, n = next

1018

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

1019

record_map = {}

1020

for component_id, content, digest in \

1021

self._data.read_records_iter(records):

1022

method, index_memo, next = position_map[component_id]

1023

record_map[component_id] = method, content, digest, next

1024

1025

return record_map

1026

1027

def get_text(self, version_id):

1028

"""See VersionedFile.get_text"""

1029

return self.get_texts([version_id])[0]

1030

1031

def get_texts(self, version_ids):

1032

return [''.join(l) for l in self.get_line_list(version_ids)]

1033

1034

def get_line_list(self, version_ids):

1035

"""Return the texts of listed versions as a list of strings."""

1036

for version_id in version_ids:

1037

self.check_not_reserved_id(version_id)

1038

text_map, content_map = self._get_content_maps(version_ids)

1039

return [text_map[v] for v in version_ids]

1040

1041

_get_lf_split_line_list = get_line_list

1042

1043

def _get_content_maps(self, version_ids):

1044

"""Produce maps of text and KnitContents

1045

1046

:return: (text_map, content_map) where text_map contains the texts for

1047

the requested versions and content_map contains the KnitContents.

1048

Both dicts take version_ids as their keys.

1049

"""

1050

# FUTURE: This function could be improved for the 'extract many' case

1051

# by tracking each component and only doing the copy when the number of

1052

# children than need to apply delta's to it is > 1 or it is part of the

1053

# final output.

1054

version_ids = list(version_ids)

1055

multiple_versions = len(version_ids) != 1

1056

record_map = self._get_record_map(version_ids)

1057

1058

text_map = {}

1059

content_map = {}

1060

final_content = {}

1061

for version_id in version_ids:

1062

components = []

1063

cursor = version_id

1064

while cursor is not None:

1065

method, data, digest, next = record_map[cursor]

1066

components.append((cursor, method, data, digest))

1067

if cursor in content_map:

1068

break

1069

cursor = next

1070

1071

content = None

1072

for component_id, method, data, digest in reversed(components):

1073

if component_id in content_map:

1074

content = content_map[component_id]

1075

else:

1076

if method == 'fulltext':

1077

assert content is None

1078

content = self.factory.parse_fulltext(data, version_id)

1079

elif method == 'line-delta':

1080

delta = self.factory.parse_line_delta(data, version_id)

1081

if multiple_versions:

1082

# only doing this when we want multiple versions

1083

# output avoids list copies - which reference and

1084

# dereference many strings.

1085

content = content.copy()

1086

content.apply_delta(delta, version_id)

1087

if multiple_versions:

1088

content_map[component_id] = content

1089

1090

if 'no-eol' in self._index.get_options(version_id):

1091

if multiple_versions:

1092

content = content.copy()

1093

content.strip_last_line_newline()

1094

final_content[version_id] = content

1095

1096

# digest here is the digest from the last applied component.

1097

text = content.text()

1098

actual_sha = sha_strings(text)

1099

if actual_sha != digest:

1100

raise KnitCorrupt(self.filename,

1101

'\n sha-1 %s'

1102

'\n of reconstructed text does not match'

1103

'\n expected %s'

1104

'\n for version %s' %

1105

(actual_sha, digest, version_id))

1106

text_map[version_id] = text

1107

return text_map, final_content

1108

1109

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1110

pb=None):

1111

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1112

if version_ids is None:

1113

version_ids = self.versions()

1114

if pb is None:

1115

pb = progress.DummyProgress()

1116

# we don't care about inclusions, the caller cares.

1117

# but we need to setup a list of records to visit.

1118

# we need version_id, position, length

1119

version_id_records = []

1120

requested_versions = set(version_ids)

1121

# filter for available versions

1122

for version_id in requested_versions:

1123

if not self.has_version(version_id):

1124

raise RevisionNotPresent(version_id, self.filename)

1125

# get a in-component-order queue:

1126

for version_id in self.versions():

1127

if version_id in requested_versions:

1128

index_memo = self._index.get_position(version_id)

1129

version_id_records.append((version_id, index_memo))

1130

1131

total = len(version_id_records)

1132

for version_idx, (version_id, data, sha_value) in \

1133

enumerate(self._data.read_records_iter(version_id_records)):

1134

pb.update('Walking content.', version_idx, total)

1135

method = self._index.get_method(version_id)

1136

1137

assert method in ('fulltext', 'line-delta')

1138

if method == 'fulltext':

1139

line_iterator = self.factory.get_fulltext_content(data)

1140

else:

1141

line_iterator = self.factory.get_linedelta_content(data)

1142

# XXX: It might be more efficient to yield (version_id,

1143

# line_iterator) in the future. However for now, this is a simpler

1144

# change to integrate into the rest of the codebase. RBC 20071110

1145

for line in line_iterator:

1146

yield line, version_id

1147

1148

pb.update('Walking content.', total, total)

1149

1150

def iter_parents(self, version_ids):

1151

"""Iterate through the parents for many version ids.

1152

1153

:param version_ids: An iterable yielding version_ids.

1154

:return: An iterator that yields (version_id, parents). Requested

1155

version_ids not present in the versioned file are simply skipped.

1156

The order is undefined, allowing for different optimisations in

1157

the underlying implementation.

1158

"""

1159

return self._index.iter_parents(version_ids)

1160

1161

def num_versions(self):

1162

"""See VersionedFile.num_versions()."""

1163

return self._index.num_versions()

1164

1165

__len__ = num_versions

1166

1167

def annotate_iter(self, version_id):

1168

"""See VersionedFile.annotate_iter."""

1169

return self.factory.annotate_iter(self, version_id)

1170

1171

def get_parents(self, version_id):

1172

"""See VersionedFile.get_parents."""

1173

# perf notes:

1174

# optimism counts!

1175

# 52554 calls in 1264 872 internal down from 3674

1176

try:

1177

return self._index.get_parents(version_id)

1178

except KeyError:

1179

raise RevisionNotPresent(version_id, self.filename)

1180

1181

def get_parents_with_ghosts(self, version_id):

1182

"""See VersionedFile.get_parents."""

1183

try:

1184

return self._index.get_parents_with_ghosts(version_id)

1185

except KeyError:

1186

raise RevisionNotPresent(version_id, self.filename)

1187

1188

def get_ancestry(self, versions, topo_sorted=True):

1189

"""See VersionedFile.get_ancestry."""

1190

if isinstance(versions, basestring):

1191

versions = [versions]

1192

if not versions:

1193

return []

1194

return self._index.get_ancestry(versions, topo_sorted)

1195

1196

def get_ancestry_with_ghosts(self, versions):

1197

"""See VersionedFile.get_ancestry_with_ghosts."""

1198

if isinstance(versions, basestring):

1199

versions = [versions]

1200

if not versions:

1201

return []

1202

return self._index.get_ancestry_with_ghosts(versions)

1203

1204

def plan_merge(self, ver_a, ver_b):

1205

"""See VersionedFile.plan_merge."""

1206

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1207

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1208

annotated_a = self.annotate(ver_a)

1209

annotated_b = self.annotate(ver_b)

1210

return merge._plan_annotate_merge(annotated_a, annotated_b,

1211

ancestors_a, ancestors_b)

1212

1213

1214

class _KnitComponentFile(object):

1215

"""One of the files used to implement a knit database"""

1216

1217

def __init__(self, transport, filename, mode, file_mode=None,

1218

create_parent_dir=False, dir_mode=None):

1219

self._transport = transport

1220

self._filename = filename

1221

self._mode = mode

1222

self._file_mode = file_mode

1223

self._dir_mode = dir_mode

1224

self._create_parent_dir = create_parent_dir

1225

self._need_to_create = False

1226

1227

def _full_path(self):

1228

"""Return the full path to this file."""

1229

return self._transport.base + self._filename

1230

1231

def check_header(self, fp):

1232

line = fp.readline()

1233

if line == '':

1234

# An empty file can actually be treated as though the file doesn't

1235

# exist yet.

1236

raise errors.NoSuchFile(self._full_path())

1237

if line != self.HEADER:

1238

raise KnitHeaderError(badline=line,

1239

filename=self._transport.abspath(self._filename))

1240

1241

def __repr__(self):

1242

return '%s(%s)' % (self.__class__.__name__, self._filename)

1243

1244

1245

class _KnitIndex(_KnitComponentFile):

1246

"""Manages knit index file.

1247

1248

The index is already kept in memory and read on startup, to enable

1249

fast lookups of revision information. The cursor of the index

1250

file is always pointing to the end, making it easy to append

1251

entries.

1252

1253

_cache is a cache for fast mapping from version id to a Index

1254

object.

1255

1256

_history is a cache for fast mapping from indexes to version ids.

1257

1258

The index data format is dictionary compressed when it comes to

1259

parent references; a index entry may only have parents that with a

1260

lover index number. As a result, the index is topological sorted.

1261

1262

Duplicate entries may be written to the index for a single version id

1263

if this is done then the latter one completely replaces the former:

1264

this allows updates to correct version and parent information.

1265

Note that the two entries may share the delta, and that successive

1266

annotations and references MUST point to the first entry.

1267

1268

The index file on disc contains a header, followed by one line per knit

1269

record. The same revision can be present in an index file more than once.

1270

The first occurrence gets assigned a sequence number starting from 0.

1271

1272

The format of a single line is

1273

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1274

REVISION_ID is a utf8-encoded revision id

1275

FLAGS is a comma separated list of flags about the record. Values include

1276

no-eol, line-delta, fulltext.

1277

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1278

that the the compressed data starts at.

1279

LENGTH is the ascii representation of the length of the data file.

1280

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1281

REVISION_ID.

1282

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1283

revision id already in the knit that is a parent of REVISION_ID.

1284

The ' :' marker is the end of record marker.

1285

1286

partial writes:

1287

when a write is interrupted to the index file, it will result in a line

1288

that does not end in ' :'. If the ' :' is not present at the end of a line,

1289

or at the end of the file, then the record that is missing it will be

1290

ignored by the parser.

1291

1292

When writing new records to the index file, the data is preceded by '\n'

1293

to ensure that records always start on new lines even if the last write was

1294

interrupted. As a result its normal for the last line in the index to be

1295

missing a trailing newline. One can be added with no harmful effects.

1296

"""

1297

1298

HEADER = "# bzr knit index 8\n"

1299

1300

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1301

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1302

1303

def _cache_version(self, version_id, options, pos, size, parents):

1304

"""Cache a version record in the history array and index cache.

1305

1306

This is inlined into _load_data for performance. KEEP IN SYNC.

1307

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1308

indexes).

1309

"""

1310

# only want the _history index to reference the 1st index entry

1311

# for version_id

1312

if version_id not in self._cache:

1313

index = len(self._history)

1314

self._history.append(version_id)

1315

else:

1316

index = self._cache[version_id][5]

1317

self._cache[version_id] = (version_id,

1318

options,

1319

pos,

1320

size,

1321

parents,

1322

index)

1323

1324

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1325

create_parent_dir=False, delay_create=False, dir_mode=None):

1326

_KnitComponentFile.__init__(self, transport, filename, mode,

1327

file_mode=file_mode,

1328

create_parent_dir=create_parent_dir,

1329

dir_mode=dir_mode)

1330

self._cache = {}

1331

# position in _history is the 'official' index for a revision

1332

# but the values may have come from a newer entry.

1333

# so - wc -l of a knit index is != the number of unique names

1334

# in the knit.

1335

self._history = []

1336

try:

1337

fp = self._transport.get(self._filename)

1338

try:

1339

# _load_data may raise NoSuchFile if the target knit is

1340

# completely empty.

1341

_load_data(self, fp)

1342

finally:

1343

fp.close()

1344

except NoSuchFile:

1345

if mode != 'w' or not create:

1346

raise

1347

elif delay_create:

1348

self._need_to_create = True

1349

else:

1350

self._transport.put_bytes_non_atomic(

1351

self._filename, self.HEADER, mode=self._file_mode)

1352

1353

def get_graph(self):

1354

"""Return a list of the node:parents lists from this knit index."""

1355

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1356

1357

def get_ancestry(self, versions, topo_sorted=True):

1358

"""See VersionedFile.get_ancestry."""

1359

# get a graph of all the mentioned versions:

1360

graph = {}

1361

pending = set(versions)

1362

cache = self._cache

1363

while pending:

1364

version = pending.pop()

1365

# trim ghosts

1366

try:

1367

parents = [p for p in cache[version][4] if p in cache]

1368

except KeyError:

1369

raise RevisionNotPresent(version, self._filename)

1370

# if not completed and not a ghost

1371

pending.update([p for p in parents if p not in graph])

1372

graph[version] = parents

1373

if not topo_sorted:

1374

return graph.keys()

1375

return topo_sort(graph.items())

1376

1377

def get_ancestry_with_ghosts(self, versions):

1378

"""See VersionedFile.get_ancestry_with_ghosts."""

1379

# get a graph of all the mentioned versions:

1380

self.check_versions_present(versions)

1381

cache = self._cache

1382

graph = {}

1383

pending = set(versions)

1384

while pending:

1385

version = pending.pop()

1386

try:

1387

parents = cache[version][4]

1388

except KeyError:

1389

# ghost, fake it

1390

graph[version] = []

1391

else:

1392

# if not completed

1393

pending.update([p for p in parents if p not in graph])

1394

graph[version] = parents

1395

return topo_sort(graph.items())

1396

1397

def iter_parents(self, version_ids):

1398

"""Iterate through the parents for many version ids.

1399

1400

:param version_ids: An iterable yielding version_ids.

1401

:return: An iterator that yields (version_id, parents). Requested

1402

version_ids not present in the versioned file are simply skipped.

1403

The order is undefined, allowing for different optimisations in

1404

the underlying implementation.

1405

"""

1406

for version_id in version_ids:

1407

try:

1408

yield version_id, tuple(self.get_parents(version_id))

1409

except KeyError:

1410

pass

1411

1412

def num_versions(self):

1413

return len(self._history)

1414

1415

__len__ = num_versions

1416

1417

def get_versions(self):

1418

"""Get all the versions in the file. not topologically sorted."""

1419

return self._history

1420

1421

def _version_list_to_index(self, versions):

1422

result_list = []

1423

cache = self._cache

1424

for version in versions:

1425

if version in cache:

1426

# -- inlined lookup() --

1427

result_list.append(str(cache[version][5]))

1428

# -- end lookup () --

1429

else:

1430

result_list.append('.' + version)

1431

return ' '.join(result_list)

1432

1433

def add_version(self, version_id, options, index_memo, parents):

1434

"""Add a version record to the index."""

1435

self.add_versions(((version_id, options, index_memo, parents),))

1436

1437

def add_versions(self, versions, random_id=False):

1438

"""Add multiple versions to the index.

1439

1440

:param versions: a list of tuples:

1441

(version_id, options, pos, size, parents).

1442

:param random_id: If True the ids being added were randomly generated

1443

and no check for existence will be performed.

1444

"""

1445

lines = []

1446

orig_history = self._history[:]

1447

orig_cache = self._cache.copy()

1448

1449

try:

1450

for version_id, options, (index, pos, size), parents in versions:

1451

line = "\n%s %s %s %s %s :" % (version_id,

1452

','.join(options),

1453

pos,

1454

size,

1455

self._version_list_to_index(parents))

1456

assert isinstance(line, str), \

1457

'content must be utf-8 encoded: %r' % (line,)

1458

lines.append(line)

1459

self._cache_version(version_id, options, pos, size, parents)

1460

if not self._need_to_create:

1461

self._transport.append_bytes(self._filename, ''.join(lines))

1462

else:

1463

sio = StringIO()

1464

sio.write(self.HEADER)

1465

sio.writelines(lines)

1466

sio.seek(0)

1467

self._transport.put_file_non_atomic(self._filename, sio,

1468

create_parent_dir=self._create_parent_dir,

1469

mode=self._file_mode,

1470

dir_mode=self._dir_mode)

1471

self._need_to_create = False

1472

except:

1473

# If any problems happen, restore the original values and re-raise

1474

self._history = orig_history

1475

self._cache = orig_cache

1476

raise

1477

1478

def has_version(self, version_id):

1479

"""True if the version is in the index."""

1480

return version_id in self._cache

1481

1482

def get_position(self, version_id):

1483

"""Return details needed to access the version.

1484

1485

.kndx indices do not support split-out data, so return None for the

1486

index field.

1487

1488

:return: a tuple (None, data position, size) to hand to the access

1489

logic to get the record.

1490

"""

1491

entry = self._cache[version_id]

1492

return None, entry[2], entry[3]

1493

1494

def get_method(self, version_id):

1495

"""Return compression method of specified version."""

1496

try:

1497

options = self._cache[version_id][1]

1498

except KeyError:

1499

raise RevisionNotPresent(version_id, self._filename)

1500

if 'fulltext' in options:

1501

return 'fulltext'

1502

else:

1503

if 'line-delta' not in options:

1504

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1505

return 'line-delta'

1506

1507

def get_options(self, version_id):

1508

"""Return a string represention options.

1509

1510

e.g. foo,bar

1511

"""

1512

return self._cache[version_id][1]

1513

1514

def get_parents(self, version_id):

1515

"""Return parents of specified version ignoring ghosts."""

1516

return [parent for parent in self._cache[version_id][4]

1517

if parent in self._cache]

1518

1519

def get_parents_with_ghosts(self, version_id):

1520

"""Return parents of specified version with ghosts."""

1521

return self._cache[version_id][4]

1522

1523

def check_versions_present(self, version_ids):

1524

"""Check that all specified versions are present."""

1525

cache = self._cache

1526

for version_id in version_ids:

1527

if version_id not in cache:

1528

raise RevisionNotPresent(version_id, self._filename)

1529

1530

1531

class KnitGraphIndex(object):

1532

"""A knit index that builds on GraphIndex."""

1533

1534

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1535

"""Construct a KnitGraphIndex on a graph_index.

1536

1537

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1538

:param deltas: Allow delta-compressed records.

1539

:param add_callback: If not None, allow additions to the index and call

1540

this callback with a list of added GraphIndex nodes:

1541

[(node, value, node_refs), ...]

1542

:param parents: If True, record knits parents, if not do not record

1543

parents.

1544

"""

1545

self._graph_index = graph_index

1546

self._deltas = deltas

1547

self._add_callback = add_callback

1548

self._parents = parents

1549

if deltas and not parents:

1550

raise KnitCorrupt(self, "Cannot do delta compression without "

1551

"parent tracking.")

1552

1553

def _get_entries(self, keys, check_present=False):

1554

"""Get the entries for keys.

1555

1556

:param keys: An iterable of index keys, - 1-tuples.

1557

"""

1558

keys = set(keys)

1559

found_keys = set()

1560

if self._parents:

1561

for node in self._graph_index.iter_entries(keys):

1562

yield node

1563

found_keys.add(node[1])

1564

else:

1565

# adapt parentless index to the rest of the code.

1566

for node in self._graph_index.iter_entries(keys):

1567

yield node[0], node[1], node[2], ()

1568

found_keys.add(node[1])

1569

if check_present:

1570

missing_keys = keys.difference(found_keys)

1571

if missing_keys:

1572

raise RevisionNotPresent(missing_keys.pop(), self)

1573

1574

def _present_keys(self, version_ids):

1575

return set([

1576

node[1] for node in self._get_entries(version_ids)])

1577

1578

def _parentless_ancestry(self, versions):

1579

"""Honour the get_ancestry API for parentless knit indices."""

1580

wanted_keys = self._version_ids_to_keys(versions)

1581

present_keys = self._present_keys(wanted_keys)

1582

missing = set(wanted_keys).difference(present_keys)

1583

if missing:

1584

raise RevisionNotPresent(missing.pop(), self)

1585

return list(self._keys_to_version_ids(present_keys))

1586

1587

def get_ancestry(self, versions, topo_sorted=True):

1588

"""See VersionedFile.get_ancestry."""

1589

if not self._parents:

1590

return self._parentless_ancestry(versions)

1591

# XXX: This will do len(history) index calls - perhaps

1592

# it should be altered to be a index core feature?

1593

# get a graph of all the mentioned versions:

1594

graph = {}

1595

ghosts = set()

1596

versions = self._version_ids_to_keys(versions)

1597

pending = set(versions)

1598

while pending:

1599

# get all pending nodes

1600

this_iteration = pending

1601

new_nodes = self._get_entries(this_iteration)

1602

found = set()

1603

pending = set()

1604

for (index, key, value, node_refs) in new_nodes:

1605

# dont ask for ghosties - otherwise

1606

# we we can end up looping with pending

1607

# being entirely ghosted.

1608

graph[key] = [parent for parent in node_refs[0]

1609

if parent not in ghosts]

1610

# queue parents

1611

for parent in graph[key]:

1612

# dont examine known nodes again

1613

if parent in graph:

1614

continue

1615

pending.add(parent)

1616

found.add(key)

1617

ghosts.update(this_iteration.difference(found))

1618

if versions.difference(graph):

1619

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1620

if topo_sorted:

1621

result_keys = topo_sort(graph.items())

1622

else:

1623

result_keys = graph.iterkeys()

1624

return [key[0] for key in result_keys]

1625

1626

def get_ancestry_with_ghosts(self, versions):

1627

"""See VersionedFile.get_ancestry."""

1628

if not self._parents:

1629

return self._parentless_ancestry(versions)

1630

# XXX: This will do len(history) index calls - perhaps

1631

# it should be altered to be a index core feature?

1632

# get a graph of all the mentioned versions:

1633

graph = {}

1634

versions = self._version_ids_to_keys(versions)

1635

pending = set(versions)

1636

while pending:

1637

# get all pending nodes

1638

this_iteration = pending

1639

new_nodes = self._get_entries(this_iteration)

1640

pending = set()

1641

for (index, key, value, node_refs) in new_nodes:

1642

graph[key] = node_refs[0]

1643

# queue parents

1644

for parent in graph[key]:

1645

# dont examine known nodes again

1646

if parent in graph:

1647

continue

1648

pending.add(parent)

1649

missing_versions = this_iteration.difference(graph)

1650

missing_needed = versions.intersection(missing_versions)

1651

if missing_needed:

1652

raise RevisionNotPresent(missing_needed.pop(), self)

1653

for missing_version in missing_versions:

1654

# add a key, no parents

1655

graph[missing_version] = []

1656

pending.discard(missing_version) # don't look for it

1657

result_keys = topo_sort(graph.items())

1658

return [key[0] for key in result_keys]

1659

1660

def get_graph(self):

1661

"""Return a list of the node:parents lists from this knit index."""

1662

if not self._parents:

1663

return [(key, ()) for key in self.get_versions()]

1664

result = []

1665

for index, key, value, refs in self._graph_index.iter_all_entries():

1666

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1667

return result

1668

1669

def iter_parents(self, version_ids):

1670

"""Iterate through the parents for many version ids.

1671

1672

:param version_ids: An iterable yielding version_ids.

1673

:return: An iterator that yields (version_id, parents). Requested

1674

version_ids not present in the versioned file are simply skipped.

1675

The order is undefined, allowing for different optimisations in

1676

the underlying implementation.

1677

"""

1678

if self._parents:

1679

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1680

all_parents = set()

1681

present_parents = set()

1682

for node in all_nodes:

1683

all_parents.update(node[3][0])

1684

# any node we are querying must be present

1685

present_parents.add(node[1])

1686

unknown_parents = all_parents.difference(present_parents)

1687

present_parents.update(self._present_keys(unknown_parents))

1688

for node in all_nodes:

1689

parents = []

1690

for parent in node[3][0]:

1691

if parent in present_parents:

1692

parents.append(parent[0])

1693

yield node[1][0], tuple(parents)

1694

else:

1695

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1696

yield node[1][0], ()

1697

1698

def num_versions(self):

1699

return len(list(self._graph_index.iter_all_entries()))

1700

1701

__len__ = num_versions

1702

1703

def get_versions(self):

1704

"""Get all the versions in the file. not topologically sorted."""

1705

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1706

1707

def has_version(self, version_id):

1708

"""True if the version is in the index."""

1709

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1710

1711

def _keys_to_version_ids(self, keys):

1712

return tuple(key[0] for key in keys)

1713

1714

def get_position(self, version_id):

1715

"""Return details needed to access the version.

1716

1717

:return: a tuple (index, data position, size) to hand to the access

1718

logic to get the record.

1719

"""

1720

node = self._get_node(version_id)

1721

bits = node[2][1:].split(' ')

1722

return node[0], int(bits[0]), int(bits[1])

1723

1724

def get_method(self, version_id):

1725

"""Return compression method of specified version."""

1726

if not self._deltas:

1727

return 'fulltext'

1728

return self._parent_compression(self._get_node(version_id)[3][1])

1729

1730

def _parent_compression(self, reference_list):

1731

# use the second reference list to decide if this is delta'd or not.

1732

if len(reference_list):

1733

return 'line-delta'

1734

else:

1735

return 'fulltext'

1736

1737

def _get_node(self, version_id):

1738

try:

1739

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1740

except IndexError:

1741

raise RevisionNotPresent(version_id, self)

1742

1743

def get_options(self, version_id):

1744

"""Return a string represention options.

1745

1746

e.g. foo,bar

1747

"""

1748

node = self._get_node(version_id)

1749

if not self._deltas:

1750

options = ['fulltext']

1751

else:

1752

options = [self._parent_compression(node[3][1])]

1753

if node[2][0] == 'N':

1754

options.append('no-eol')

1755

return options

1756

1757

def get_parents(self, version_id):

1758

"""Return parents of specified version ignoring ghosts."""

1759

parents = list(self.iter_parents([version_id]))

1760

if not parents:

1761

# missing key

1762

raise errors.RevisionNotPresent(version_id, self)

1763

return parents[0][1]

1764

1765

def get_parents_with_ghosts(self, version_id):

1766

"""Return parents of specified version with ghosts."""

1767

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1768

check_present=True))

1769

if not self._parents:

1770

return ()

1771

return self._keys_to_version_ids(nodes[0][3][0])

1772

1773

def check_versions_present(self, version_ids):

1774

"""Check that all specified versions are present."""

1775

keys = self._version_ids_to_keys(version_ids)

1776

present = self._present_keys(keys)

1777

missing = keys.difference(present)

1778

if missing:

1779

raise RevisionNotPresent(missing.pop(), self)

1780

1781

def add_version(self, version_id, options, access_memo, parents):

1782

"""Add a version record to the index."""

1783

return self.add_versions(((version_id, options, access_memo, parents),))

1784

1785

def add_versions(self, versions, random_id=False):

1786

"""Add multiple versions to the index.

1787

1788

This function does not insert data into the Immutable GraphIndex

1789

backing the KnitGraphIndex, instead it prepares data for insertion by

1790

the caller and checks that it is safe to insert then calls

1791

self._add_callback with the prepared GraphIndex nodes.

1792

1793

:param versions: a list of tuples:

1794

(version_id, options, pos, size, parents).

1795

:param random_id: If True the ids being added were randomly generated

1796

and no check for existence will be performed.

1797

"""

1798

if not self._add_callback:

1799

raise errors.ReadOnlyError(self)

1800

# we hope there are no repositories with inconsistent parentage

1801

# anymore.

1802

# check for dups

1803

1804

keys = {}

1805

for (version_id, options, access_memo, parents) in versions:

1806

index, pos, size = access_memo

1807

key = (version_id, )

1808

parents = tuple((parent, ) for parent in parents)

1809

if 'no-eol' in options:

1810

value = 'N'

1811

else:

1812

value = ' '

1813

value += "%d %d" % (pos, size)

1814

if not self._deltas:

1815

if 'line-delta' in options:

1816

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1817

if self._parents:

1818

if self._deltas:

1819

if 'line-delta' in options:

1820

node_refs = (parents, (parents[0],))

1821

else:

1822

node_refs = (parents, ())

1823

else:

1824

node_refs = (parents, )

1825

else:

1826

if parents:

1827

raise KnitCorrupt(self, "attempt to add node with parents "

1828

"in parentless index.")

1829

node_refs = ()

1830

keys[key] = (value, node_refs)

1831

if not random_id:

1832

present_nodes = self._get_entries(keys)

1833

for (index, key, value, node_refs) in present_nodes:

1834

if (value, node_refs) != keys[key]:

1835

raise KnitCorrupt(self, "inconsistent details in add_versions"

1836

": %s %s" % ((value, node_refs), keys[key]))

1837

del keys[key]

1838

result = []

1839

if self._parents:

1840

for key, (value, node_refs) in keys.iteritems():

1841

result.append((key, value, node_refs))

1842

else:

1843

for key, (value, node_refs) in keys.iteritems():

1844

result.append((key, value))

1845

self._add_callback(result)

1846

1847

def _version_ids_to_keys(self, version_ids):

1848

return set((version_id, ) for version_id in version_ids)

1849

1850

1851

class _KnitAccess(object):

1852

"""Access to knit records in a .knit file."""

1853

1854

def __init__(self, transport, filename, _file_mode, _dir_mode,

1855

_need_to_create, _create_parent_dir):

1856

"""Create a _KnitAccess for accessing and inserting data.

1857

1858

:param transport: The transport the .knit is located on.

1859

:param filename: The filename of the .knit.

1860

"""

1861

self._transport = transport

1862

self._filename = filename

1863

self._file_mode = _file_mode

1864

self._dir_mode = _dir_mode

1865

self._need_to_create = _need_to_create

1866

self._create_parent_dir = _create_parent_dir

1867

1868

def add_raw_records(self, sizes, raw_data):

1869

"""Add raw knit bytes to a storage area.

1870

1871

The data is spooled to whereever the access method is storing data.

1872

1873

:param sizes: An iterable containing the size of each raw data segment.

1874

:param raw_data: A bytestring containing the data.

1875

:return: A list of memos to retrieve the record later. Each memo is a

1876

tuple - (index, pos, length), where the index field is always None

1877

for the .knit access method.

1878

"""

1879

assert type(raw_data) == str, \

1880

'data must be plain bytes was %s' % type(raw_data)

1881

if not self._need_to_create:

1882

base = self._transport.append_bytes(self._filename, raw_data)

1883

else:

1884

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1885

create_parent_dir=self._create_parent_dir,

1886

mode=self._file_mode,

1887

dir_mode=self._dir_mode)

1888

self._need_to_create = False

1889

base = 0

1890

result = []

1891

for size in sizes:

1892

result.append((None, base, size))

1893

base += size

1894

return result

1895

1896

def create(self):

1897

"""IFF this data access has its own storage area, initialise it.

1898

1899

:return: None.

1900

"""

1901

self._transport.put_bytes_non_atomic(self._filename, '',

1902

mode=self._file_mode)

1903

1904

def open_file(self):

1905

"""IFF this data access can be represented as a single file, open it.

1906

1907

For knits that are not mapped to a single file on disk this will

1908

always return None.

1909

1910

:return: None or a file handle.

1911

"""

1912

try:

1913

return self._transport.get(self._filename)

1914

except NoSuchFile:

1915

pass

1916

return None

1917

1918

def get_raw_records(self, memos_for_retrieval):

1919

"""Get the raw bytes for a records.

1920

1921

:param memos_for_retrieval: An iterable containing the (index, pos,

1922

length) memo for retrieving the bytes. The .knit method ignores

1923

the index as there is always only a single file.

1924

:return: An iterator over the bytes of the records.

1925

"""

1926

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1927

for pos, data in self._transport.readv(self._filename, read_vector):

1928

yield data

1929

1930

1931

class _PackAccess(object):

1932

"""Access to knit records via a collection of packs."""

1933

1934

def __init__(self, index_to_packs, writer=None):

1935

"""Create a _PackAccess object.

1936

1937

:param index_to_packs: A dict mapping index objects to the transport

1938

and file names for obtaining data.

1939

:param writer: A tuple (pack.ContainerWriter, write_index) which

1940

contains the pack to write, and the index that reads from it will

1941

be associated with.

1942

"""

1943

if writer:

1944

self.container_writer = writer[0]

1945

self.write_index = writer[1]

1946

else:

1947

self.container_writer = None

1948

self.write_index = None

1949

self.indices = index_to_packs

1950

1951

def add_raw_records(self, sizes, raw_data):

1952

"""Add raw knit bytes to a storage area.

1953

1954

The data is spooled to the container writer in one bytes-record per

1955

raw data item.

1956

1957

:param sizes: An iterable containing the size of each raw data segment.

1958

:param raw_data: A bytestring containing the data.

1959

:return: A list of memos to retrieve the record later. Each memo is a

1960

tuple - (index, pos, length), where the index field is the

1961

write_index object supplied to the PackAccess object.

1962

"""

1963

assert type(raw_data) == str, \

1964

'data must be plain bytes was %s' % type(raw_data)

1965

result = []

1966

offset = 0

1967

for size in sizes:

1968

p_offset, p_length = self.container_writer.add_bytes_record(

1969

raw_data[offset:offset+size], [])

1970

offset += size

1971

result.append((self.write_index, p_offset, p_length))

1972

return result

1973

1974

def create(self):

1975

"""Pack based knits do not get individually created."""

1976

1977

def get_raw_records(self, memos_for_retrieval):

1978

"""Get the raw bytes for a records.

1979

1980

:param memos_for_retrieval: An iterable containing the (index, pos,

1981

length) memo for retrieving the bytes. The Pack access method

1982

looks up the pack to use for a given record in its index_to_pack

1983

map.

1984

:return: An iterator over the bytes of the records.

1985

"""

1986

# first pass, group into same-index requests

1987

request_lists = []

1988

current_index = None

1989

for (index, offset, length) in memos_for_retrieval:

1990

if current_index == index:

1991

current_list.append((offset, length))

1992

else:

1993

if current_index is not None:

1994

request_lists.append((current_index, current_list))

1995

current_index = index

1996

current_list = [(offset, length)]

1997

# handle the last entry

1998

if current_index is not None:

1999

request_lists.append((current_index, current_list))

2000

for index, offsets in request_lists:

2001

transport, path = self.indices[index]

2002

reader = pack.make_readv_reader(transport, path, offsets)

2003

for names, read_func in reader.iter_records():

2004

yield read_func(None)

2005

2006

def open_file(self):

2007

"""Pack based knits have no single file."""

2008

return None

2009

2010

def set_writer(self, writer, index, (transport, packname)):

2011

"""Set a writer to use for adding data."""

2012

if index is not None:

2013

self.indices[index] = (transport, packname)

2014

self.container_writer = writer

2015

self.write_index = index

2016

2017

2018

class _KnitData(object):

2019

"""Manage extraction of data from a KnitAccess, caching and decompressing.

2020

2021

The KnitData class provides the logic for parsing and using knit records,

2022

making use of an access method for the low level read and write operations.

2023

"""

2024

2025

def __init__(self, access):

2026

"""Create a KnitData object.

2027

2028

:param access: The access method to use. Access methods such as

2029

_KnitAccess manage the insertion of raw records and the subsequent

2030

retrieval of the same.

2031

"""

2032

self._access = access

2033

self._checked = False

2034

# TODO: jam 20060713 conceptually, this could spill to disk

2035

# if the cached size gets larger than a certain amount

2036

# but it complicates the model a bit, so for now just use

2037

# a simple dictionary

2038

self._cache = {}

2039

self._do_cache = False

2040

2041

def enable_cache(self):

2042

"""Enable caching of reads."""

2043

self._do_cache = True

2044

2045

def clear_cache(self):

2046

"""Clear the record cache."""

2047

self._do_cache = False

2048

self._cache = {}

2049

2050

def _open_file(self):

2051

return self._access.open_file()

2052

2053

def _record_to_data(self, version_id, digest, lines, dense_lines=None):

2054

"""Convert version_id, digest, lines into a raw data block.

2055

2056

:param dense_lines: The bytes of lines but in a denser form. For

2057

instance, if lines is a list of 1000 bytestrings each ending in \n,

2058

dense_lines may be a list with one line in it, containing all the

2059

1000's lines and their \n's. Using dense_lines if it is already

2060

known is a win because the string join to create bytes in this

2061

function spends less time resizing the final string.

2062

:return: (len, a StringIO instance with the raw data ready to read.)

2063

"""

2064

# Note: using a string copy here increases memory pressure with e.g.

2065

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

2066

# when doing the initial commit of a mozilla tree. RBC 20070921

2067

bytes = ''.join(chain(

2068

["version %s %d %s\n" % (version_id,

2069

len(lines),

2070

digest)],

2071

dense_lines or lines,

2072

["end %s\n" % version_id]))

2073

assert bytes.__class__ == str

2074

compressed_bytes = bytes_to_gzip(bytes)

2075

return len(compressed_bytes), compressed_bytes

2076

2077

def add_raw_records(self, sizes, raw_data):

2078

"""Append a prepared record to the data file.

2079

2080

:param sizes: An iterable containing the size of each raw data segment.

2081

:param raw_data: A bytestring containing the data.

2082

:return: a list of index data for the way the data was stored.

2083

See the access method add_raw_records documentation for more

2084

details.

2085

"""

2086

return self._access.add_raw_records(sizes, raw_data)

2087

2088

def _parse_record_header(self, version_id, raw_data):

2089

"""Parse a record header for consistency.

2090

2091

:return: the header and the decompressor stream.

2092

as (stream, header_record)

2093

"""

2094

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2095

try:

2096

rec = self._check_header(version_id, df.readline())

2097

except Exception, e:

2098

raise KnitCorrupt(self._access,

2099

"While reading {%s} got %s(%s)"

2100

% (version_id, e.__class__.__name__, str(e)))

2101

return df, rec

2102

2103

def _check_header(self, version_id, line):

2104

rec = line.split()

2105

if len(rec) != 4:

2106

raise KnitCorrupt(self._access,

2107

'unexpected number of elements in record header')

2108

if rec[1] != version_id:

2109

raise KnitCorrupt(self._access,

2110

'unexpected version, wanted %r, got %r'

2111

% (version_id, rec[1]))

2112

return rec

2113

2114

def _parse_record(self, version_id, data):

2115

# profiling notes:

2116

# 4168 calls in 2880 217 internal

2117

# 4168 calls to _parse_record_header in 2121

2118

# 4168 calls to readlines in 330

2119

df = GzipFile(mode='rb', fileobj=StringIO(data))

2120

2121

try:

2122

record_contents = df.readlines()

2123

except Exception, e:

2124

raise KnitCorrupt(self._access,

2125

"While reading {%s} got %s(%s)"

2126

% (version_id, e.__class__.__name__, str(e)))

2127

header = record_contents.pop(0)

2128

rec = self._check_header(version_id, header)

2129

2130

last_line = record_contents.pop()

2131

if len(record_contents) != int(rec[2]):

2132

raise KnitCorrupt(self._access,

2133

'incorrect number of lines %s != %s'

2134

' for version {%s}'

2135

% (len(record_contents), int(rec[2]),

2136

version_id))

2137

if last_line != 'end %s\n' % rec[1]:

2138

raise KnitCorrupt(self._access,

2139

'unexpected version end line %r, wanted %r'

2140

% (last_line, version_id))

2141

df.close()

2142

return record_contents, rec[3]

2143

2144

def read_records_iter_raw(self, records):

2145

"""Read text records from data file and yield raw data.

2146

2147

This unpacks enough of the text record to validate the id is

2148

as expected but thats all.

2149

"""

2150

# setup an iterator of the external records:

2151

# uses readv so nice and fast we hope.

2152

if len(records):

2153

# grab the disk data needed.

2154

if self._cache:

2155

# Don't check _cache if it is empty

2156

needed_offsets = [index_memo for version_id, index_memo

2157

in records

2158

if version_id not in self._cache]

2159

else:

2160

needed_offsets = [index_memo for version_id, index_memo

2161

in records]

2162

2163

raw_records = self._access.get_raw_records(needed_offsets)

2164

2165

for version_id, index_memo in records:

2166

if version_id in self._cache:

2167

# This data has already been validated

2168

data = self._cache[version_id]

2169

else:

2170

data = raw_records.next()

2171

if self._do_cache:

2172

self._cache[version_id] = data

2173

2174

# validate the header

2175

df, rec = self._parse_record_header(version_id, data)

2176

df.close()

2177

yield version_id, data

2178

2179

def read_records_iter(self, records):

2180

"""Read text records from data file and yield result.

2181

2182

The result will be returned in whatever is the fastest to read.

2183

Not by the order requested. Also, multiple requests for the same

2184

record will only yield 1 response.

2185

:param records: A list of (version_id, pos, len) entries

2186

:return: Yields (version_id, contents, digest) in the order

2187

read, not the order requested

2188

"""

2189

if not records:

2190

return

2191

2192

if self._cache:

2193

# Skip records we have alread seen

2194

yielded_records = set()

2195

needed_records = set()

2196

for record in records:

2197

if record[0] in self._cache:

2198

if record[0] in yielded_records:

2199

continue

2200

yielded_records.add(record[0])

2201

data = self._cache[record[0]]

2202

content, digest = self._parse_record(record[0], data)

2203

yield (record[0], content, digest)

2204

else:

2205

needed_records.add(record)

2206

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2207

else:

2208

needed_records = sorted(set(records), key=operator.itemgetter(1))

2209

2210

if not needed_records:

2211

return

2212

2213

# The transport optimizes the fetching as well

2214

# (ie, reads continuous ranges.)

2215

raw_data = self._access.get_raw_records(

2216

[index_memo for version_id, index_memo in needed_records])

2217

2218

for (version_id, index_memo), data in \

2219

izip(iter(needed_records), raw_data):

2220

content, digest = self._parse_record(version_id, data)

2221

if self._do_cache:

2222

self._cache[version_id] = data

2223

yield version_id, content, digest

2224

2225

def read_records(self, records):

2226

"""Read records into a dictionary."""

2227

components = {}

2228

for record_id, content, digest in \

2229

self.read_records_iter(records):

2230

components[record_id] = (content, digest)

2231

return components

2232

2233

2234

class InterKnit(InterVersionedFile):

2235

"""Optimised code paths for knit to knit operations."""

2236

2237

_matching_file_from_factory = KnitVersionedFile

2238

_matching_file_to_factory = KnitVersionedFile

2239

2240

@staticmethod

2241

def is_compatible(source, target):

2242

"""Be compatible with knits. """

2243

try:

2244

return (isinstance(source, KnitVersionedFile) and

2245

isinstance(target, KnitVersionedFile))

2246

except AttributeError:

2247

return False

2248

2249

def _copy_texts(self, pb, msg, version_ids, ignore_missing=False):

2250

"""Copy texts to the target by extracting and adding them one by one.

2251

2252

see join() for the parameter definitions.

2253

"""

2254

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2255

graph = self.source.get_graph(version_ids)

2256

order = topo_sort(graph.items())

2257

2258

def size_of_content(content):

2259

return sum(len(line) for line in content.text())

2260

# Cache at most 10MB of parent texts

2261

parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,

2262

compute_size=size_of_content)

2263

# TODO: jam 20071116 It would be nice to have a streaming interface to

2264

# get multiple texts from a source. The source could be smarter

2265

# about how it handled intermediate stages.

2266

# get_line_list() or make_mpdiffs() seem like a possibility, but

2267

# at the moment they extract all full texts into memory, which

2268

# causes us to store more than our 3x fulltext goal.

2269

# Repository.iter_files_bytes() may be another possibility

2270

to_process = [version for version in order

2271

if version not in self.target]

2272

total = len(to_process)

2273

pb = ui.ui_factory.nested_progress_bar()

2274

try:

2275

for index, version in enumerate(to_process):

2276

pb.update('Converting versioned data', index, total)

2277

sha1, num_bytes, parent_text = self.target.add_lines(version,

2278

self.source.get_parents(version),

2279

self.source.get_lines(version),

2280

parent_texts=parent_cache)

2281

parent_cache[version] = parent_text

2282

finally:

2283

pb.finished()

2284

return total

2285

2286

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2287

"""See InterVersionedFile.join."""

2288

assert isinstance(self.source, KnitVersionedFile)

2289

assert isinstance(self.target, KnitVersionedFile)

2290

2291

# If the source and target are mismatched w.r.t. annotations vs

2292

# plain, the data needs to be converted accordingly

2293

if self.source.factory.annotated == self.target.factory.annotated:

2294

converter = None

2295

elif self.source.factory.annotated:

2296

converter = self._anno_to_plain_converter

2297

else:

2298

# We're converting from a plain to an annotated knit. Copy them

2299

# across by full texts.

2300

return self._copy_texts(pb, msg, version_ids, ignore_missing)

2301

2302

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2303

if not version_ids:

2304

return 0

2305

2306

pb = ui.ui_factory.nested_progress_bar()

2307

try:

2308

version_ids = list(version_ids)

2309

if None in version_ids:

2310

version_ids.remove(None)

2311

2312

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2313

this_versions = set(self.target._index.get_versions())

2314

# XXX: For efficiency we should not look at the whole index,

2315

# we only need to consider the referenced revisions - they

2316

# must all be present, or the method must be full-text.

2317

# TODO, RBC 20070919

2318

needed_versions = self.source_ancestry - this_versions

2319

2320

if not needed_versions:

2321

return 0

2322

full_list = topo_sort(self.source.get_graph())

2323

2324

version_list = [i for i in full_list if (not self.target.has_version(i)

2325

and i in needed_versions)]

2326

2327

# plan the join:

2328

copy_queue = []

2329

copy_queue_records = []

2330

copy_set = set()

2331

for version_id in version_list:

2332

options = self.source._index.get_options(version_id)

2333

parents = self.source._index.get_parents_with_ghosts(version_id)

2334

# check that its will be a consistent copy:

2335

for parent in parents:

2336

# if source has the parent, we must :

2337

# * already have it or

2338

# * have it scheduled already

2339

# otherwise we don't care

2340

assert (self.target.has_version(parent) or

2341

parent in copy_set or

2342

not self.source.has_version(parent))

2343

index_memo = self.source._index.get_position(version_id)

2344

copy_queue_records.append((version_id, index_memo))

2345

copy_queue.append((version_id, options, parents))

2346

copy_set.add(version_id)

2347

2348

# data suck the join:

2349

count = 0

2350

total = len(version_list)

2351

raw_datum = []

2352

raw_records = []

2353

for (version_id, raw_data), \

2354

(version_id2, options, parents) in \

2355

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2356

copy_queue):

2357

assert version_id == version_id2, 'logic error, inconsistent results'

2358

count = count + 1

2359

pb.update("Joining knit", count, total)

2360

if converter:

2361

size, raw_data = converter(raw_data, version_id, options,

2362

parents)

2363

else:

2364

size = len(raw_data)

2365

raw_records.append((version_id, options, parents, size))

2366

raw_datum.append(raw_data)

2367

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2368

return count

2369

finally:

2370

pb.finished()

2371

2372

def _anno_to_plain_converter(self, raw_data, version_id, options,

2373

parents):

2374

"""Convert annotated content to plain content."""

2375

data, digest = self.source._data._parse_record(version_id, raw_data)

2376

if 'fulltext' in options:

2377

content = self.source.factory.parse_fulltext(data, version_id)

2378

lines = self.target.factory.lower_fulltext(content)

2379

else:

2380

delta = self.source.factory.parse_line_delta(data, version_id,

2381

plain=True)

2382

lines = self.target.factory.lower_line_delta(delta)

2383

return self.target._data._record_to_data(version_id, digest, lines)

2384

2385

2386

InterVersionedFile.register_optimiser(InterKnit)

2387

2388

2389

class WeaveToKnit(InterVersionedFile):

2390

"""Optimised code paths for weave to knit operations."""

2391

2392

_matching_file_from_factory = bzrlib.weave.WeaveFile

2393

_matching_file_to_factory = KnitVersionedFile

2394

2395

@staticmethod

2396

def is_compatible(source, target):

2397

"""Be compatible with weaves to knits."""

2398

try:

2399

return (isinstance(source, bzrlib.weave.Weave) and

2400

isinstance(target, KnitVersionedFile))

2401

except AttributeError:

2402

return False

2403

2404

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2405

"""See InterVersionedFile.join."""

2406

assert isinstance(self.source, bzrlib.weave.Weave)

2407

assert isinstance(self.target, KnitVersionedFile)

2408

2409

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2410

2411

if not version_ids:

2412

return 0

2413

2414

pb = ui.ui_factory.nested_progress_bar()

2415

try:

2416

version_ids = list(version_ids)

2417

2418

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2419

this_versions = set(self.target._index.get_versions())

2420

needed_versions = self.source_ancestry - this_versions

2421

2422

if not needed_versions:

2423

return 0

2424

full_list = topo_sort(self.source.get_graph())

2425

2426

version_list = [i for i in full_list if (not self.target.has_version(i)

2427

and i in needed_versions)]

2428

2429

# do the join:

2430

count = 0

2431

total = len(version_list)

2432

for version_id in version_list:

2433

pb.update("Converting to knit", count, total)

2434

parents = self.source.get_parents(version_id)

2435

# check that its will be a consistent copy:

2436

for parent in parents:

2437

# if source has the parent, we must already have it

2438

assert (self.target.has_version(parent))

2439

self.target.add_lines(

2440

version_id, parents, self.source.get_lines(version_id))

2441

count = count + 1

2442

return count

2443

finally:

2444

pb.finished()

2445

2446

2447

InterVersionedFile.register_optimiser(WeaveToKnit)

2448

2449

2450

# Deprecated, use PatienceSequenceMatcher instead

2451

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2452

2453

2454

def annotate_knit(knit, revision_id):

2455

"""Annotate a knit with no cached annotations.

2456

2457

This implementation is for knits with no cached annotations.

2458

It will work for knits with cached annotations, but this is not

2459

recommended.

2460

"""

2461

ancestry = knit.get_ancestry(revision_id)

2462

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2463

annotations = {}

2464

for candidate in ancestry:

2465

if candidate in annotations:

2466

continue

2467

parents = knit.get_parents(candidate)

2468

if len(parents) == 0:

2469

blocks = None

2470

elif knit._index.get_method(candidate) != 'line-delta':

2471

blocks = None

2472

else:

2473

parent, sha1, noeol, delta = knit.get_delta(candidate)

2474

blocks = KnitContent.get_line_delta_blocks(delta,

2475

fulltext[parents[0]], fulltext[candidate])

2476

annotations[candidate] = list(annotate.reannotate([annotations[p]

2477

for p in parents], fulltext[candidate], candidate, blocks))

2478

return iter(annotations[revision_id])

2479

2480

2481

try:

2482

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2483

except ImportError:

2484

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »