/brz/remove-bazaar : revision 3031.3.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2007-11-27 00:44:20 UTC
mto: This revision was merged to the branch mainline in revision 3217.
Revision ID: robertc@robertcollins.net-20071127004420-ktt8r1716pm4xq80

Remove the unneeded ExperimentalBranch class.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/ftp_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/authentication_conf.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

lru_cache,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

100

KnitHeaderError,

101

RevisionNotPresent,

102

RevisionAlreadyPresent,

103

)

104

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

105

from bzrlib.osutils import (

106

contains_whitespace,

107

contains_linebreaks,

108

sha_string,

109

sha_strings,

110

)

111

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

112

from bzrlib.tsort import topo_sort

113

import bzrlib.ui

114

import bzrlib.weave

115

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

116

117

118

# TODO: Split out code specific to this format into an associated object.

119

120

# TODO: Can we put in some kind of value to check that the index and data

121

# files belong together?

122

123

# TODO: accommodate binaries, perhaps by storing a byte count

124

125

# TODO: function to check whole file

126

127

# TODO: atomically append data, then measure backwards from the cursor

128

# position after writing to work out where it was located. we may need to

129

# bypass python file buffering.

130

131

DATA_SUFFIX = '.knit'

132

INDEX_SUFFIX = '.kndx'

133

134

135

class KnitContent(object):

136

"""Content of a knit version to which deltas can be applied."""

137

138

def annotate(self):

139

"""Return a list of (origin, text) tuples."""

140

return list(self.annotate_iter())

141

142

def apply_delta(self, delta, new_version_id):

143

"""Apply delta to this object to become new_version_id."""

144

raise NotImplementedError(self.apply_delta)

145

146

def line_delta_iter(self, new_lines):

147

"""Generate line-based delta from this content to new_lines."""

148

new_texts = new_lines.text()

149

old_texts = self.text()

150

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

151

for tag, i1, i2, j1, j2 in s.get_opcodes():

152

if tag == 'equal':

153

continue

154

# ofrom, oto, length, data

155

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

156

157

def line_delta(self, new_lines):

158

return list(self.line_delta_iter(new_lines))

159

160

@staticmethod

161

def get_line_delta_blocks(knit_delta, source, target):

162

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

163

target_len = len(target)

164

s_pos = 0

165

t_pos = 0

166

for s_begin, s_end, t_len, new_text in knit_delta:

167

true_n = s_begin - s_pos

168

n = true_n

169

if n > 0:

170

# knit deltas do not provide reliable info about whether the

171

# last line of a file matches, due to eol handling.

172

if source[s_pos + n -1] != target[t_pos + n -1]:

173

n-=1

174

if n > 0:

175

yield s_pos, t_pos, n

176

t_pos += t_len + true_n

177

s_pos = s_end

178

n = target_len - t_pos

179

if n > 0:

180

if source[s_pos + n -1] != target[t_pos + n -1]:

181

n-=1

182

if n > 0:

183

yield s_pos, t_pos, n

184

yield s_pos + (target_len - t_pos), target_len, 0

185

186

187

class AnnotatedKnitContent(KnitContent):

188

"""Annotated content."""

189

190

def __init__(self, lines):

191

self._lines = lines

192

193

def annotate_iter(self):

194

"""Yield tuples of (origin, text) for each content line."""

195

return iter(self._lines)

196

197

def apply_delta(self, delta, new_version_id):

198

"""Apply delta to this object to become new_version_id."""

199

offset = 0

200

lines = self._lines

201

for start, end, count, delta_lines in delta:

202

lines[offset+start:offset+end] = delta_lines

203

offset = offset + (start - end) + count

204

205

def strip_last_line_newline(self):

206

line = self._lines[-1][1].rstrip('\n')

207

self._lines[-1] = (self._lines[-1][0], line)

208

209

def text(self):

210

try:

211

return [text for origin, text in self._lines]

212

except ValueError, e:

213

# most commonly (only?) caused by the internal form of the knit

214

# missing annotation information because of a bug - see thread

215

# around 20071015

216

raise KnitCorrupt(self,

217

"line in annotated knit missing annotation information: %s"

218

% (e,))

219

220

def copy(self):

221

return AnnotatedKnitContent(self._lines[:])

222

223

224

class PlainKnitContent(KnitContent):

225

"""Unannotated content.

226

227

When annotate[_iter] is called on this content, the same version is reported

228

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

229

objects.

230

"""

231

232

def __init__(self, lines, version_id):

233

self._lines = lines

234

self._version_id = version_id

235

236

def annotate_iter(self):

237

"""Yield tuples of (origin, text) for each content line."""

238

for line in self._lines:

239

yield self._version_id, line

240

241

def apply_delta(self, delta, new_version_id):

242

"""Apply delta to this object to become new_version_id."""

243

offset = 0

244

lines = self._lines

245

for start, end, count, delta_lines in delta:

246

lines[offset+start:offset+end] = delta_lines

247

offset = offset + (start - end) + count

248

self._version_id = new_version_id

249

250

def copy(self):

251

return PlainKnitContent(self._lines[:], self._version_id)

252

253

def strip_last_line_newline(self):

254

self._lines[-1] = self._lines[-1].rstrip('\n')

255

256

def text(self):

257

return self._lines

258

259

260

class KnitAnnotateFactory(object):

261

"""Factory for creating annotated Content objects."""

262

263

annotated = True

264

265

def make(self, lines, version_id):

266

num_lines = len(lines)

267

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

268

269

def parse_fulltext(self, content, version_id):

270

"""Convert fulltext to internal representation

271

272

fulltext content is of the format

273

revid(utf8) plaintext\n

274

internal representation is of the format:

275

(revid, plaintext)

276

"""

277

# TODO: jam 20070209 The tests expect this to be returned as tuples,

278

# but the code itself doesn't really depend on that.

279

# Figure out a way to not require the overhead of turning the

280

# list back into tuples.

281

lines = [tuple(line.split(' ', 1)) for line in content]

282

return AnnotatedKnitContent(lines)

283

284

def parse_line_delta_iter(self, lines):

285

return iter(self.parse_line_delta(lines))

286

287

def parse_line_delta(self, lines, version_id, plain=False):

288

"""Convert a line based delta into internal representation.

289

290

line delta is in the form of:

291

intstart intend intcount

292

1..count lines:

293

revid(utf8) newline\n

294

internal representation is

295

(start, end, count, [1..count tuples (revid, newline)])

296

297

:param plain: If True, the lines are returned as a plain

298

list without annotations, not as a list of (origin, content) tuples, i.e.

299

(start, end, count, [1..count newline])

300

"""

301

result = []

302

lines = iter(lines)

303

next = lines.next

304

305

cache = {}

306

def cache_and_return(line):

307

origin, text = line.split(' ', 1)

308

return cache.setdefault(origin, origin), text

309

310

# walk through the lines parsing.

311

# Note that the plain test is explicitly pulled out of the

312

# loop to minimise any performance impact

313

if plain:

314

for header in lines:

315

start, end, count = [int(n) for n in header.split(',')]

316

contents = [next().split(' ', 1)[1] for i in xrange(count)]

317

result.append((start, end, count, contents))

318

else:

319

for header in lines:

320

start, end, count = [int(n) for n in header.split(',')]

321

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

322

result.append((start, end, count, contents))

323

return result

324

325

def get_fulltext_content(self, lines):

326

"""Extract just the content lines from a fulltext."""

327

return (line.split(' ', 1)[1] for line in lines)

328

329

def get_linedelta_content(self, lines):

330

"""Extract just the content from a line delta.

331

332

This doesn't return all of the extra information stored in a delta.

333

Only the actual content lines.

334

"""

335

lines = iter(lines)

336

next = lines.next

337

for header in lines:

338

header = header.split(',')

339

count = int(header[2])

340

for i in xrange(count):

341

origin, text = next().split(' ', 1)

342

yield text

343

344

def lower_fulltext(self, content):

345

"""convert a fulltext content record into a serializable form.

346

347

see parse_fulltext which this inverts.

348

"""

349

# TODO: jam 20070209 We only do the caching thing to make sure that

350

# the origin is a valid utf-8 line, eventually we could remove it

351

return ['%s %s' % (o, t) for o, t in content._lines]

352

353

def lower_line_delta(self, delta):

354

"""convert a delta into a serializable form.

355

356

See parse_line_delta which this inverts.

357

"""

358

# TODO: jam 20070209 We only do the caching thing to make sure that

359

# the origin is a valid utf-8 line, eventually we could remove it

360

out = []

361

for start, end, c, lines in delta:

362

out.append('%d,%d,%d\n' % (start, end, c))

363

out.extend(origin + ' ' + text

364

for origin, text in lines)

365

return out

366

367

def annotate_iter(self, knit, version_id):

368

content = knit._get_content(version_id)

369

return content.annotate_iter()

370

371

372

class KnitPlainFactory(object):

373

"""Factory for creating plain Content objects."""

374

375

annotated = False

376

377

def make(self, lines, version_id):

378

return PlainKnitContent(lines, version_id)

379

380

def parse_fulltext(self, content, version_id):

381

"""This parses an unannotated fulltext.

382

383

Note that this is not a noop - the internal representation

384

has (versionid, line) - its just a constant versionid.

385

"""

386

return self.make(content, version_id)

387

388

def parse_line_delta_iter(self, lines, version_id):

389

cur = 0

390

num_lines = len(lines)

391

while cur < num_lines:

392

header = lines[cur]

393

cur += 1

394

start, end, c = [int(n) for n in header.split(',')]

395

yield start, end, c, lines[cur:cur+c]

396

cur += c

397

398

def parse_line_delta(self, lines, version_id):

399

return list(self.parse_line_delta_iter(lines, version_id))

400

401

def get_fulltext_content(self, lines):

402

"""Extract just the content lines from a fulltext."""

403

return iter(lines)

404

405

def get_linedelta_content(self, lines):

406

"""Extract just the content from a line delta.

407

408

This doesn't return all of the extra information stored in a delta.

409

Only the actual content lines.

410

"""

411

lines = iter(lines)

412

next = lines.next

413

for header in lines:

414

header = header.split(',')

415

count = int(header[2])

416

for i in xrange(count):

417

yield next()

418

419

def lower_fulltext(self, content):

420

return content.text()

421

422

def lower_line_delta(self, delta):

423

out = []

424

for start, end, c, lines in delta:

425

out.append('%d,%d,%d\n' % (start, end, c))

426

out.extend(lines)

427

return out

428

429

def annotate_iter(self, knit, version_id):

430

return annotate_knit(knit, version_id)

431

432

433

def make_empty_knit(transport, relpath):

434

"""Construct a empty knit at the specified location."""

435

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

436

437

438

class KnitVersionedFile(VersionedFile):

439

"""Weave-like structure with faster random access.

440

441

A knit stores a number of texts and a summary of the relationships

442

between them. Texts are identified by a string version-id. Texts

443

are normally stored and retrieved as a series of lines, but can

444

also be passed as single strings.

445

446

Lines are stored with the trailing newline (if any) included, to

447

avoid special cases for files with no final newline. Lines are

448

composed of 8-bit characters, not unicode. The combination of

449

these approaches should mean any 'binary' file can be safely

450

stored and retrieved.

451

"""

452

453

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

454

factory=None, delta=True, create=False, create_parent_dir=False,

455

delay_create=False, dir_mode=None, index=None, access_method=None):

456

"""Construct a knit at location specified by relpath.

457

458

:param create: If not True, only open an existing knit.

459

:param create_parent_dir: If True, create the parent directory if

460

creating the file fails. (This is used for stores with

461

hash-prefixes that may not exist yet)

462

:param delay_create: The calling code is aware that the knit won't

463

actually be created until the first data is stored.

464

:param index: An index to use for the knit.

465

"""

466

if access_mode is None:

467

access_mode = 'w'

468

super(KnitVersionedFile, self).__init__(access_mode)

469

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

470

self.transport = transport

471

self.filename = relpath

472

self.factory = factory or KnitAnnotateFactory()

473

self.writable = (access_mode == 'w')

474

self.delta = delta

475

476

self._max_delta_chain = 200

477

478

if index is None:

479

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

480

access_mode, create=create, file_mode=file_mode,

481

create_parent_dir=create_parent_dir, delay_create=delay_create,

482

dir_mode=dir_mode)

483

else:

484

self._index = index

485

if access_method is None:

486

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

487

((create and not len(self)) and delay_create), create_parent_dir)

488

else:

489

_access = access_method

490

if create and not len(self) and not delay_create:

491

_access.create()

492

self._data = _KnitData(_access)

493

494

def __repr__(self):

495

return '%s(%s)' % (self.__class__.__name__,

496

self.transport.abspath(self.filename))

497

498

def _check_should_delta(self, first_parents):

499

"""Iterate back through the parent listing, looking for a fulltext.

500

501

This is used when we want to decide whether to add a delta or a new

502

fulltext. It searches for _max_delta_chain parents. When it finds a

503

fulltext parent, it sees if the total size of the deltas leading up to

504

it is large enough to indicate that we want a new full text anyway.

505

506

Return True if we should create a new delta, False if we should use a

507

full text.

508

"""

509

delta_size = 0

510

fulltext_size = None

511

delta_parents = first_parents

512

for count in xrange(self._max_delta_chain):

513

parent = delta_parents[0]

514

method = self._index.get_method(parent)

515

index, pos, size = self._index.get_position(parent)

516

if method == 'fulltext':

517

fulltext_size = size

518

break

519

delta_size += size

520

delta_parents = self._index.get_parents(parent)

521

else:

522

# We couldn't find a fulltext, so we must create a new one

523

return False

524

525

return fulltext_size > delta_size

526

527

def _add_raw_records(self, records, data):

528

"""Add all the records 'records' with data pre-joined in 'data'.

529

530

:param records: A list of tuples(version_id, options, parents, size).

531

:param data: The data for the records. When it is written, the records

532

are adjusted to have pos pointing into data by the sum of

533

the preceding records sizes.

534

"""

535

# write all the data

536

raw_record_sizes = [record[3] for record in records]

537

positions = self._data.add_raw_records(raw_record_sizes, data)

538

offset = 0

539

index_entries = []

540

for (version_id, options, parents, size), access_memo in zip(

541

records, positions):

542

index_entries.append((version_id, options, access_memo, parents))

543

if self._data._do_cache:

544

self._data._cache[version_id] = data[offset:offset+size]

545

offset += size

546

self._index.add_versions(index_entries)

547

548

def enable_cache(self):

549

"""Start caching data for this knit"""

550

self._data.enable_cache()

551

552

def clear_cache(self):

553

"""Clear the data cache only."""

554

self._data.clear_cache()

555

556

def copy_to(self, name, transport):

557

"""See VersionedFile.copy_to()."""

558

# copy the current index to a temp index to avoid racing with local

559

# writes

560

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

561

self.transport.get(self._index._filename))

562

# copy the data file

563

f = self._data._open_file()

564

try:

565

transport.put_file(name + DATA_SUFFIX, f)

566

finally:

567

f.close()

568

# move the copied index into place

569

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

570

571

def create_empty(self, name, transport, mode=None):

572

return KnitVersionedFile(name, transport, factory=self.factory,

573

delta=self.delta, create=True)

574

575

def get_data_stream(self, required_versions):

576

"""Get a data stream for the specified versions.

577

578

Versions may be returned in any order, not necessarily the order

579

specified.

580

581

:param required_versions: The exact set of versions to be extracted.

582

Unlike some other knit methods, this is not used to generate a

583

transitive closure, rather it is used precisely as given.

584

585

:returns: format_signature, list of (version, options, length, parents),

586

reader_callable.

587

"""

588

if not isinstance(required_versions, set):

589

required_versions = set(required_versions)

590

# we don't care about inclusions, the caller cares.

591

# but we need to setup a list of records to visit.

592

for version_id in required_versions:

593

if not self.has_version(version_id):

594

raise RevisionNotPresent(version_id, self.filename)

595

# Pick the desired versions out of the index in oldest-to-newest order

596

version_list = []

597

for version_id in self.versions():

598

if version_id in required_versions:

599

version_list.append(version_id)

600

601

# create the list of version information for the result

602

copy_queue_records = []

603

copy_set = set()

604

result_version_list = []

605

for version_id in version_list:

606

options = self._index.get_options(version_id)

607

parents = self._index.get_parents_with_ghosts(version_id)

608

index_memo = self._index.get_position(version_id)

609

copy_queue_records.append((version_id, index_memo))

610

none, data_pos, data_size = index_memo

611

copy_set.add(version_id)

612

# version, options, length, parents

613

result_version_list.append((version_id, options, data_size,

614

parents))

615

616

# Read the compressed record data.

617

# XXX:

618

# From here down to the return should really be logic in the returned

619

# callable -- in a class that adapts read_records_iter_raw to read

620

# requests.

621

raw_datum = []

622

for (version_id, raw_data), \

623

(version_id2, options, _, parents) in \

624

izip(self._data.read_records_iter_raw(copy_queue_records),

625

result_version_list):

626

assert version_id == version_id2, 'logic error, inconsistent results'

627

raw_datum.append(raw_data)

628

pseudo_file = StringIO(''.join(raw_datum))

629

def read(length):

630

if length is None:

631

return pseudo_file.read()

632

else:

633

return pseudo_file.read(length)

634

return (self.get_format_signature(), result_version_list, read)

635

636

def _extract_blocks(self, version_id, source, target):

637

if self._index.get_method(version_id) != 'line-delta':

638

return None

639

parent, sha1, noeol, delta = self.get_delta(version_id)

640

return KnitContent.get_line_delta_blocks(delta, source, target)

641

642

def get_delta(self, version_id):

643

"""Get a delta for constructing version from some other version."""

644

self.check_not_reserved_id(version_id)

645

parents = self.get_parents(version_id)

646

if len(parents):

647

parent = parents[0]

648

else:

649

parent = None

650

index_memo = self._index.get_position(version_id)

651

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

652

noeol = 'no-eol' in self._index.get_options(version_id)

653

if 'fulltext' == self._index.get_method(version_id):

654

new_content = self.factory.parse_fulltext(data, version_id)

655

if parent is not None:

656

reference_content = self._get_content(parent)

657

old_texts = reference_content.text()

658

else:

659

old_texts = []

660

new_texts = new_content.text()

661

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

662

new_texts)

663

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

664

else:

665

delta = self.factory.parse_line_delta(data, version_id)

666

return parent, sha1, noeol, delta

667

668

def get_format_signature(self):

669

"""See VersionedFile.get_format_signature()."""

670

if self.factory.annotated:

671

annotated_part = "annotated"

672

else:

673

annotated_part = "plain"

674

return "knit-%s" % (annotated_part,)

675

676

def get_graph_with_ghosts(self):

677

"""See VersionedFile.get_graph_with_ghosts()."""

678

graph_items = self._index.get_graph()

679

return dict(graph_items)

680

681

def get_sha1(self, version_id):

682

return self.get_sha1s([version_id])[0]

683

684

def get_sha1s(self, version_ids):

685

"""See VersionedFile.get_sha1()."""

686

record_map = self._get_record_map(version_ids)

687

# record entry 2 is the 'digest'.

688

return [record_map[v][2] for v in version_ids]

689

690

@staticmethod

691

def get_suffixes():

692

"""See VersionedFile.get_suffixes()."""

693

return [DATA_SUFFIX, INDEX_SUFFIX]

694

695

def has_ghost(self, version_id):

696

"""True if there is a ghost reference in the file to version_id."""

697

# maybe we have it

698

if self.has_version(version_id):

699

return False

700

# optimisable if needed by memoising the _ghosts set.

701

items = self._index.get_graph()

702

for node, parents in items:

703

for parent in parents:

704

if parent not in self._index._cache:

705

if parent == version_id:

706

return True

707

return False

708

709

def insert_data_stream(self, (format, data_list, reader_callable)):

710

"""Insert knit records from a data stream into this knit.

711

712

If a version in the stream is already present in this knit, it will not

713

be inserted a second time. It will be checked for consistency with the

714

stored version however, and may cause a KnitCorrupt error to be raised

715

if the data in the stream disagrees with the already stored data.

716

717

:seealso: get_data_stream

718

"""

719

if format != self.get_format_signature():

720

trace.mutter('incompatible format signature inserting to %r', self)

721

raise KnitDataStreamIncompatible(

722

format, self.get_format_signature())

723

724

for version_id, options, length, parents in data_list:

725

if self.has_version(version_id):

726

# First check: the list of parents.

727

my_parents = self.get_parents_with_ghosts(version_id)

728

if my_parents != parents:

729

# XXX: KnitCorrupt is not quite the right exception here.

730

raise KnitCorrupt(

731

self.filename,

732

'parents list %r from data stream does not match '

733

'already recorded parents %r for %s'

734

% (parents, my_parents, version_id))

735

736

# Also check the SHA-1 of the fulltext this content will

737

# produce.

738

raw_data = reader_callable(length)

739

my_fulltext_sha1 = self.get_sha1(version_id)

740

df, rec = self._data._parse_record_header(version_id, raw_data)

741

stream_fulltext_sha1 = rec[3]

742

if my_fulltext_sha1 != stream_fulltext_sha1:

743

# Actually, we don't know if it's this knit that's corrupt,

744

# or the data stream we're trying to insert.

745

raise KnitCorrupt(

746

self.filename, 'sha-1 does not match %s' % version_id)

747

else:

748

if 'line-delta' in options:

749

# Make sure that this knit record is actually useful: a

750

# line-delta is no use unless we have its parent.

751

# Fetching from a broken repository with this problem

752

# shouldn't break the target repository.

753

if not self._index.has_version(parents[0]):

754

raise KnitCorrupt(

755

self.filename,

756

'line-delta from stream references '

757

'missing parent %s' % parents[0])

758

self._add_raw_records(

759

[(version_id, options, parents, length)],

760

reader_callable(length))

761

762

def versions(self):

763

"""See VersionedFile.versions."""

764

if 'evil' in debug.debug_flags:

765

trace.mutter_callsite(2, "versions scales with size of history")

766

return self._index.get_versions()

767

768

def has_version(self, version_id):

769

"""See VersionedFile.has_version."""

770

if 'evil' in debug.debug_flags:

771

trace.mutter_callsite(2, "has_version is a LBYL scenario")

772

return self._index.has_version(version_id)

773

774

__contains__ = has_version

775

776

def _merge_annotations(self, content, parents, parent_texts={},

777

delta=None, annotated=None,

778

left_matching_blocks=None):

779

"""Merge annotations for content. This is done by comparing

780

the annotations based on changed to the text.

781

"""

782

if left_matching_blocks is not None:

783

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

784

else:

785

delta_seq = None

786

if annotated:

787

for parent_id in parents:

788

merge_content = self._get_content(parent_id, parent_texts)

789

if (parent_id == parents[0] and delta_seq is not None):

790

seq = delta_seq

791

else:

792

seq = patiencediff.PatienceSequenceMatcher(

793

None, merge_content.text(), content.text())

794

for i, j, n in seq.get_matching_blocks():

795

if n == 0:

796

continue

797

# this appears to copy (origin, text) pairs across to the

798

# new content for any line that matches the last-checked

799

# parent.

800

content._lines[j:j+n] = merge_content._lines[i:i+n]

801

if delta:

802

if delta_seq is None:

803

reference_content = self._get_content(parents[0], parent_texts)

804

new_texts = content.text()

805

old_texts = reference_content.text()

806

delta_seq = patiencediff.PatienceSequenceMatcher(

807

None, old_texts, new_texts)

808

return self._make_line_delta(delta_seq, content)

809

810

def _make_line_delta(self, delta_seq, new_content):

811

"""Generate a line delta from delta_seq and new_content."""

812

diff_hunks = []

813

for op in delta_seq.get_opcodes():

814

if op[0] == 'equal':

815

continue

816

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

817

return diff_hunks

818

819

def _get_components_positions(self, version_ids):

820

"""Produce a map of position data for the components of versions.

821

822

This data is intended to be used for retrieving the knit records.

823

824

A dict of version_id to (method, data_pos, data_size, next) is

825

returned.

826

method is the way referenced data should be applied.

827

data_pos is the position of the data in the knit.

828

data_size is the size of the data in the knit.

829

next is the build-parent of the version, or None for fulltexts.

830

"""

831

component_data = {}

832

for version_id in version_ids:

833

cursor = version_id

834

835

while cursor is not None and cursor not in component_data:

836

method = self._index.get_method(cursor)

837

if method == 'fulltext':

838

next = None

839

else:

840

next = self.get_parents_with_ghosts(cursor)[0]

841

index_memo = self._index.get_position(cursor)

842

component_data[cursor] = (method, index_memo, next)

843

cursor = next

844

return component_data

845

846

def _get_content(self, version_id, parent_texts={}):

847

"""Returns a content object that makes up the specified

848

version."""

849

cached_version = parent_texts.get(version_id, None)

850

if cached_version is not None:

851

if not self.has_version(version_id):

852

raise RevisionNotPresent(version_id, self.filename)

853

return cached_version

854

855

text_map, contents_map = self._get_content_maps([version_id])

856

return contents_map[version_id]

857

858

def _check_versions_present(self, version_ids):

859

"""Check that all specified versions are present."""

860

self._index.check_versions_present(version_ids)

861

862

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

863

nostore_sha, random_id, check_content):

864

"""See VersionedFile.add_lines_with_ghosts()."""

865

self._check_add(version_id, lines, random_id, check_content)

866

return self._add(version_id, lines, parents, self.delta,

867

parent_texts, None, nostore_sha, random_id)

868

869

def _add_lines(self, version_id, parents, lines, parent_texts,

870

left_matching_blocks, nostore_sha, random_id, check_content):

871

"""See VersionedFile.add_lines."""

872

self._check_add(version_id, lines, random_id, check_content)

873

self._check_versions_present(parents)

874

return self._add(version_id, lines[:], parents, self.delta,

875

parent_texts, left_matching_blocks, nostore_sha, random_id)

876

877

def _check_add(self, version_id, lines, random_id, check_content):

878

"""check that version_id and lines are safe to add."""

879

if contains_whitespace(version_id):

880

raise InvalidRevisionId(version_id, self.filename)

881

self.check_not_reserved_id(version_id)

882

# Technically this could be avoided if we are happy to allow duplicate

883

# id insertion when other things than bzr core insert texts, but it

884

# seems useful for folk using the knit api directly to have some safety

885

# blanket that we can disable.

886

if not random_id and self.has_version(version_id):

887

raise RevisionAlreadyPresent(version_id, self.filename)

888

if check_content:

889

self._check_lines_not_unicode(lines)

890

self._check_lines_are_lines(lines)

891

892

def _add(self, version_id, lines, parents, delta, parent_texts,

893

left_matching_blocks, nostore_sha, random_id):

894

"""Add a set of lines on top of version specified by parents.

895

896

If delta is true, compress the text as a line-delta against

897

the first parent.

898

899

Any versions not present will be converted into ghosts.

900

"""

901

# first thing, if the content is something we don't need to store, find

902

# that out.

903

line_bytes = ''.join(lines)

904

digest = sha_string(line_bytes)

905

if nostore_sha == digest:

906

raise errors.ExistingContent

907

908

present_parents = []

909

if parent_texts is None:

910

parent_texts = {}

911

for parent in parents:

912

if self.has_version(parent):

913

present_parents.append(parent)

914

915

# can only compress against the left most present parent.

916

if (delta and

917

(len(present_parents) == 0 or

918

present_parents[0] != parents[0])):

919

delta = False

920

921

text_length = len(line_bytes)

922

options = []

923

if lines:

924

if lines[-1][-1] != '\n':

925

# copy the contents of lines.

926

lines = lines[:]

927

options.append('no-eol')

928

lines[-1] = lines[-1] + '\n'

929

line_bytes += '\n'

930

931

if delta:

932

# To speed the extract of texts the delta chain is limited

933

# to a fixed number of deltas. This should minimize both

934

# I/O and the time spend applying deltas.

935

delta = self._check_should_delta(present_parents)

936

937

assert isinstance(version_id, str)

938

content = self.factory.make(lines, version_id)

939

if delta or (self.factory.annotated and len(present_parents) > 0):

940

# Merge annotations from parent texts if needed.

941

delta_hunks = self._merge_annotations(content, present_parents,

942

parent_texts, delta, self.factory.annotated,

943

left_matching_blocks)

944

945

if delta:

946

options.append('line-delta')

947

store_lines = self.factory.lower_line_delta(delta_hunks)

948

size, bytes = self._data._record_to_data(version_id, digest,

949

store_lines)

950

else:

951

options.append('fulltext')

952

# isinstance is slower and we have no hierarchy.

953

if self.factory.__class__ == KnitPlainFactory:

954

# Use the already joined bytes saving iteration time in

955

# _record_to_data.

956

size, bytes = self._data._record_to_data(version_id, digest,

957

lines, [line_bytes])

958

else:

959

# get mixed annotation + content and feed it into the

960

# serialiser.

961

store_lines = self.factory.lower_fulltext(content)

962

size, bytes = self._data._record_to_data(version_id, digest,

963

store_lines)

964

965

access_memo = self._data.add_raw_records([size], bytes)[0]

966

self._index.add_versions(

967

((version_id, options, access_memo, parents),),

968

random_id=random_id)

969

return digest, text_length, content

970

971

def check(self, progress_bar=None):

972

"""See VersionedFile.check()."""

973

974

def _clone_text(self, new_version_id, old_version_id, parents):

975

"""See VersionedFile.clone_text()."""

976

# FIXME RBC 20060228 make fast by only inserting an index with null

977

# delta.

978

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

979

980

def get_lines(self, version_id):

981

"""See VersionedFile.get_lines()."""

982

return self.get_line_list([version_id])[0]

983

984

def _get_record_map(self, version_ids):

985

"""Produce a dictionary of knit records.

986

987

The keys are version_ids, the values are tuples of (method, content,

988

digest, next).

989

method is the way the content should be applied.

990

content is a KnitContent object.

991

digest is the SHA1 digest of this version id after all steps are done

992

next is the build-parent of the version, i.e. the leftmost ancestor.

993

If the method is fulltext, next will be None.

994

"""

995

position_map = self._get_components_positions(version_ids)

996

# c = component_id, m = method, i_m = index_memo, n = next

997

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

998

record_map = {}

999

for component_id, content, digest in \

1000

self._data.read_records_iter(records):

1001

method, index_memo, next = position_map[component_id]

1002

record_map[component_id] = method, content, digest, next

1003

1004

return record_map

1005

1006

def get_text(self, version_id):

1007

"""See VersionedFile.get_text"""

1008

return self.get_texts([version_id])[0]

1009

1010

def get_texts(self, version_ids):

1011

return [''.join(l) for l in self.get_line_list(version_ids)]

1012

1013

def get_line_list(self, version_ids):

1014

"""Return the texts of listed versions as a list of strings."""

1015

for version_id in version_ids:

1016

self.check_not_reserved_id(version_id)

1017

text_map, content_map = self._get_content_maps(version_ids)

1018

return [text_map[v] for v in version_ids]

1019

1020

_get_lf_split_line_list = get_line_list

1021

1022

def _get_content_maps(self, version_ids):

1023

"""Produce maps of text and KnitContents

1024

1025

:return: (text_map, content_map) where text_map contains the texts for

1026

the requested versions and content_map contains the KnitContents.

1027

Both dicts take version_ids as their keys.

1028

"""

1029

# FUTURE: This function could be improved for the 'extract many' case

1030

# by tracking each component and only doing the copy when the number of

1031

# children than need to apply delta's to it is > 1 or it is part of the

1032

# final output.

1033

version_ids = list(version_ids)

1034

multiple_versions = len(version_ids) != 1

1035

record_map = self._get_record_map(version_ids)

1036

1037

text_map = {}

1038

content_map = {}

1039

final_content = {}

1040

for version_id in version_ids:

1041

components = []

1042

cursor = version_id

1043

while cursor is not None:

1044

method, data, digest, next = record_map[cursor]

1045

components.append((cursor, method, data, digest))

1046

if cursor in content_map:

1047

break

1048

cursor = next

1049

1050

content = None

1051

for component_id, method, data, digest in reversed(components):

1052

if component_id in content_map:

1053

content = content_map[component_id]

1054

else:

1055

if method == 'fulltext':

1056

assert content is None

1057

content = self.factory.parse_fulltext(data, version_id)

1058

elif method == 'line-delta':

1059

delta = self.factory.parse_line_delta(data, version_id)

1060

if multiple_versions:

1061

# only doing this when we want multiple versions

1062

# output avoids list copies - which reference and

1063

# dereference many strings.

1064

content = content.copy()

1065

content.apply_delta(delta, version_id)

1066

if multiple_versions:

1067

content_map[component_id] = content

1068

1069

if 'no-eol' in self._index.get_options(version_id):

1070

if multiple_versions:

1071

content = content.copy()

1072

content.strip_last_line_newline()

1073

final_content[version_id] = content

1074

1075

# digest here is the digest from the last applied component.

1076

text = content.text()

1077

actual_sha = sha_strings(text)

1078

if actual_sha != digest:

1079

raise KnitCorrupt(self.filename,

1080

'\n sha-1 %s'

1081

'\n of reconstructed text does not match'

1082

'\n expected %s'

1083

'\n for version %s' %

1084

(actual_sha, digest, version_id))

1085

text_map[version_id] = text

1086

return text_map, final_content

1087

1088

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1089

pb=None):

1090

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1091

if version_ids is None:

1092

version_ids = self.versions()

1093

if pb is None:

1094

pb = progress.DummyProgress()

1095

# we don't care about inclusions, the caller cares.

1096

# but we need to setup a list of records to visit.

1097

# we need version_id, position, length

1098

version_id_records = []

1099

requested_versions = set(version_ids)

1100

# filter for available versions

1101

for version_id in requested_versions:

1102

if not self.has_version(version_id):

1103

raise RevisionNotPresent(version_id, self.filename)

1104

# get a in-component-order queue:

1105

for version_id in self.versions():

1106

if version_id in requested_versions:

1107

index_memo = self._index.get_position(version_id)

1108

version_id_records.append((version_id, index_memo))

1109

1110

total = len(version_id_records)

1111

for version_idx, (version_id, data, sha_value) in \

1112

enumerate(self._data.read_records_iter(version_id_records)):

1113

pb.update('Walking content.', version_idx, total)

1114

method = self._index.get_method(version_id)

1115

1116

assert method in ('fulltext', 'line-delta')

1117

if method == 'fulltext':

1118

line_iterator = self.factory.get_fulltext_content(data)

1119

else:

1120

line_iterator = self.factory.get_linedelta_content(data)

1121

# XXX: It might be more efficient to yield (version_id,

1122

# line_iterator) in the future. However for now, this is a simpler

1123

# change to integrate into the rest of the codebase. RBC 20071110

1124

for line in line_iterator:

1125

yield line, version_id

1126

1127

pb.update('Walking content.', total, total)

1128

1129

def iter_parents(self, version_ids):

1130

"""Iterate through the parents for many version ids.

1131

1132

:param version_ids: An iterable yielding version_ids.

1133

:return: An iterator that yields (version_id, parents). Requested

1134

version_ids not present in the versioned file are simply skipped.

1135

The order is undefined, allowing for different optimisations in

1136

the underlying implementation.

1137

"""

1138

return self._index.iter_parents(version_ids)

1139

1140

def num_versions(self):

1141

"""See VersionedFile.num_versions()."""

1142

return self._index.num_versions()

1143

1144

__len__ = num_versions

1145

1146

def annotate_iter(self, version_id):

1147

"""See VersionedFile.annotate_iter."""

1148

return self.factory.annotate_iter(self, version_id)

1149

1150

def get_parents(self, version_id):

1151

"""See VersionedFile.get_parents."""

1152

# perf notes:

1153

# optimism counts!

1154

# 52554 calls in 1264 872 internal down from 3674

1155

try:

1156

return self._index.get_parents(version_id)

1157

except KeyError:

1158

raise RevisionNotPresent(version_id, self.filename)

1159

1160

def get_parents_with_ghosts(self, version_id):

1161

"""See VersionedFile.get_parents."""

1162

try:

1163

return self._index.get_parents_with_ghosts(version_id)

1164

except KeyError:

1165

raise RevisionNotPresent(version_id, self.filename)

1166

1167

def get_ancestry(self, versions, topo_sorted=True):

1168

"""See VersionedFile.get_ancestry."""

1169

if isinstance(versions, basestring):

1170

versions = [versions]

1171

if not versions:

1172

return []

1173

return self._index.get_ancestry(versions, topo_sorted)

1174

1175

def get_ancestry_with_ghosts(self, versions):

1176

"""See VersionedFile.get_ancestry_with_ghosts."""

1177

if isinstance(versions, basestring):

1178

versions = [versions]

1179

if not versions:

1180

return []

1181

return self._index.get_ancestry_with_ghosts(versions)

1182

1183

def plan_merge(self, ver_a, ver_b):

1184

"""See VersionedFile.plan_merge."""

1185

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1186

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1187

annotated_a = self.annotate(ver_a)

1188

annotated_b = self.annotate(ver_b)

1189

return merge._plan_annotate_merge(annotated_a, annotated_b,

1190

ancestors_a, ancestors_b)

1191

1192

1193

class _KnitComponentFile(object):

1194

"""One of the files used to implement a knit database"""

1195

1196

def __init__(self, transport, filename, mode, file_mode=None,

1197

create_parent_dir=False, dir_mode=None):

1198

self._transport = transport

1199

self._filename = filename

1200

self._mode = mode

1201

self._file_mode = file_mode

1202

self._dir_mode = dir_mode

1203

self._create_parent_dir = create_parent_dir

1204

self._need_to_create = False

1205

1206

def _full_path(self):

1207

"""Return the full path to this file."""

1208

return self._transport.base + self._filename

1209

1210

def check_header(self, fp):

1211

line = fp.readline()

1212

if line == '':

1213

# An empty file can actually be treated as though the file doesn't

1214

# exist yet.

1215

raise errors.NoSuchFile(self._full_path())

1216

if line != self.HEADER:

1217

raise KnitHeaderError(badline=line,

1218

filename=self._transport.abspath(self._filename))

1219

1220

def __repr__(self):

1221

return '%s(%s)' % (self.__class__.__name__, self._filename)

1222

1223

1224

class _KnitIndex(_KnitComponentFile):

1225

"""Manages knit index file.

1226

1227

The index is already kept in memory and read on startup, to enable

1228

fast lookups of revision information. The cursor of the index

1229

file is always pointing to the end, making it easy to append

1230

entries.

1231

1232

_cache is a cache for fast mapping from version id to a Index

1233

object.

1234

1235

_history is a cache for fast mapping from indexes to version ids.

1236

1237

The index data format is dictionary compressed when it comes to

1238

parent references; a index entry may only have parents that with a

1239

lover index number. As a result, the index is topological sorted.

1240

1241

Duplicate entries may be written to the index for a single version id

1242

if this is done then the latter one completely replaces the former:

1243

this allows updates to correct version and parent information.

1244

Note that the two entries may share the delta, and that successive

1245

annotations and references MUST point to the first entry.

1246

1247

The index file on disc contains a header, followed by one line per knit

1248

record. The same revision can be present in an index file more than once.

1249

The first occurrence gets assigned a sequence number starting from 0.

1250

1251

The format of a single line is

1252

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1253

REVISION_ID is a utf8-encoded revision id

1254

FLAGS is a comma separated list of flags about the record. Values include

1255

no-eol, line-delta, fulltext.

1256

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1257

that the the compressed data starts at.

1258

LENGTH is the ascii representation of the length of the data file.

1259

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1260

REVISION_ID.

1261

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1262

revision id already in the knit that is a parent of REVISION_ID.

1263

The ' :' marker is the end of record marker.

1264

1265

partial writes:

1266

when a write is interrupted to the index file, it will result in a line

1267

that does not end in ' :'. If the ' :' is not present at the end of a line,

1268

or at the end of the file, then the record that is missing it will be

1269

ignored by the parser.

1270

1271

When writing new records to the index file, the data is preceded by '\n'

1272

to ensure that records always start on new lines even if the last write was

1273

interrupted. As a result its normal for the last line in the index to be

1274

missing a trailing newline. One can be added with no harmful effects.

1275

"""

1276

1277

HEADER = "# bzr knit index 8\n"

1278

1279

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1280

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1281

1282

def _cache_version(self, version_id, options, pos, size, parents):

1283

"""Cache a version record in the history array and index cache.

1284

1285

This is inlined into _load_data for performance. KEEP IN SYNC.

1286

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1287

indexes).

1288

"""

1289

# only want the _history index to reference the 1st index entry

1290

# for version_id

1291

if version_id not in self._cache:

1292

index = len(self._history)

1293

self._history.append(version_id)

1294

else:

1295

index = self._cache[version_id][5]

1296

self._cache[version_id] = (version_id,

1297

options,

1298

pos,

1299

size,

1300

parents,

1301

index)

1302

1303

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1304

create_parent_dir=False, delay_create=False, dir_mode=None):

1305

_KnitComponentFile.__init__(self, transport, filename, mode,

1306

file_mode=file_mode,

1307

create_parent_dir=create_parent_dir,

1308

dir_mode=dir_mode)

1309

self._cache = {}

1310

# position in _history is the 'official' index for a revision

1311

# but the values may have come from a newer entry.

1312

# so - wc -l of a knit index is != the number of unique names

1313

# in the knit.

1314

self._history = []

1315

try:

1316

fp = self._transport.get(self._filename)

1317

try:

1318

# _load_data may raise NoSuchFile if the target knit is

1319

# completely empty.

1320

_load_data(self, fp)

1321

finally:

1322

fp.close()

1323

except NoSuchFile:

1324

if mode != 'w' or not create:

1325

raise

1326

elif delay_create:

1327

self._need_to_create = True

1328

else:

1329

self._transport.put_bytes_non_atomic(

1330

self._filename, self.HEADER, mode=self._file_mode)

1331

1332

def get_graph(self):

1333

"""Return a list of the node:parents lists from this knit index."""

1334

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1335

1336

def get_ancestry(self, versions, topo_sorted=True):

1337

"""See VersionedFile.get_ancestry."""

1338

# get a graph of all the mentioned versions:

1339

graph = {}

1340

pending = set(versions)

1341

cache = self._cache

1342

while pending:

1343

version = pending.pop()

1344

# trim ghosts

1345

try:

1346

parents = [p for p in cache[version][4] if p in cache]

1347

except KeyError:

1348

raise RevisionNotPresent(version, self._filename)

1349

# if not completed and not a ghost

1350

pending.update([p for p in parents if p not in graph])

1351

graph[version] = parents

1352

if not topo_sorted:

1353

return graph.keys()

1354

return topo_sort(graph.items())

1355

1356

def get_ancestry_with_ghosts(self, versions):

1357

"""See VersionedFile.get_ancestry_with_ghosts."""

1358

# get a graph of all the mentioned versions:

1359

self.check_versions_present(versions)

1360

cache = self._cache

1361

graph = {}

1362

pending = set(versions)

1363

while pending:

1364

version = pending.pop()

1365

try:

1366

parents = cache[version][4]

1367

except KeyError:

1368

# ghost, fake it

1369

graph[version] = []

1370

else:

1371

# if not completed

1372

pending.update([p for p in parents if p not in graph])

1373

graph[version] = parents

1374

return topo_sort(graph.items())

1375

1376

def iter_parents(self, version_ids):

1377

"""Iterate through the parents for many version ids.

1378

1379

:param version_ids: An iterable yielding version_ids.

1380

:return: An iterator that yields (version_id, parents). Requested

1381

version_ids not present in the versioned file are simply skipped.

1382

The order is undefined, allowing for different optimisations in

1383

the underlying implementation.

1384

"""

1385

for version_id in version_ids:

1386

try:

1387

yield version_id, tuple(self.get_parents(version_id))

1388

except KeyError:

1389

pass

1390

1391

def num_versions(self):

1392

return len(self._history)

1393

1394

__len__ = num_versions

1395

1396

def get_versions(self):

1397

"""Get all the versions in the file. not topologically sorted."""

1398

return self._history

1399

1400

def _version_list_to_index(self, versions):

1401

result_list = []

1402

cache = self._cache

1403

for version in versions:

1404

if version in cache:

1405

# -- inlined lookup() --

1406

result_list.append(str(cache[version][5]))

1407

# -- end lookup () --

1408

else:

1409

result_list.append('.' + version)

1410

return ' '.join(result_list)

1411

1412

def add_version(self, version_id, options, index_memo, parents):

1413

"""Add a version record to the index."""

1414

self.add_versions(((version_id, options, index_memo, parents),))

1415

1416

def add_versions(self, versions, random_id=False):

1417

"""Add multiple versions to the index.

1418

1419

:param versions: a list of tuples:

1420

(version_id, options, pos, size, parents).

1421

:param random_id: If True the ids being added were randomly generated

1422

and no check for existence will be performed.

1423

"""

1424

lines = []

1425

orig_history = self._history[:]

1426

orig_cache = self._cache.copy()

1427

1428

try:

1429

for version_id, options, (index, pos, size), parents in versions:

1430

line = "\n%s %s %s %s %s :" % (version_id,

1431

','.join(options),

1432

pos,

1433

size,

1434

self._version_list_to_index(parents))

1435

assert isinstance(line, str), \

1436

'content must be utf-8 encoded: %r' % (line,)

1437

lines.append(line)

1438

self._cache_version(version_id, options, pos, size, parents)

1439

if not self._need_to_create:

1440

self._transport.append_bytes(self._filename, ''.join(lines))

1441

else:

1442

sio = StringIO()

1443

sio.write(self.HEADER)

1444

sio.writelines(lines)

1445

sio.seek(0)

1446

self._transport.put_file_non_atomic(self._filename, sio,

1447

create_parent_dir=self._create_parent_dir,

1448

mode=self._file_mode,

1449

dir_mode=self._dir_mode)

1450

self._need_to_create = False

1451

except:

1452

# If any problems happen, restore the original values and re-raise

1453

self._history = orig_history

1454

self._cache = orig_cache

1455

raise

1456

1457

def has_version(self, version_id):

1458

"""True if the version is in the index."""

1459

return version_id in self._cache

1460

1461

def get_position(self, version_id):

1462

"""Return details needed to access the version.

1463

1464

.kndx indices do not support split-out data, so return None for the

1465

index field.

1466

1467

:return: a tuple (None, data position, size) to hand to the access

1468

logic to get the record.

1469

"""

1470

entry = self._cache[version_id]

1471

return None, entry[2], entry[3]

1472

1473

def get_method(self, version_id):

1474

"""Return compression method of specified version."""

1475

try:

1476

options = self._cache[version_id][1]

1477

except KeyError:

1478

raise RevisionNotPresent(version_id, self._filename)

1479

if 'fulltext' in options:

1480

return 'fulltext'

1481

else:

1482

if 'line-delta' not in options:

1483

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1484

return 'line-delta'

1485

1486

def get_options(self, version_id):

1487

"""Return a string represention options.

1488

1489

e.g. foo,bar

1490

"""

1491

return self._cache[version_id][1]

1492

1493

def get_parents(self, version_id):

1494

"""Return parents of specified version ignoring ghosts."""

1495

return [parent for parent in self._cache[version_id][4]

1496

if parent in self._cache]

1497

1498

def get_parents_with_ghosts(self, version_id):

1499

"""Return parents of specified version with ghosts."""

1500

return self._cache[version_id][4]

1501

1502

def check_versions_present(self, version_ids):

1503

"""Check that all specified versions are present."""

1504

cache = self._cache

1505

for version_id in version_ids:

1506

if version_id not in cache:

1507

raise RevisionNotPresent(version_id, self._filename)

1508

1509

1510

class KnitGraphIndex(object):

1511

"""A knit index that builds on GraphIndex."""

1512

1513

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1514

"""Construct a KnitGraphIndex on a graph_index.

1515

1516

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1517

:param deltas: Allow delta-compressed records.

1518

:param add_callback: If not None, allow additions to the index and call

1519

this callback with a list of added GraphIndex nodes:

1520

[(node, value, node_refs), ...]

1521

:param parents: If True, record knits parents, if not do not record

1522

parents.

1523

"""

1524

self._graph_index = graph_index

1525

self._deltas = deltas

1526

self._add_callback = add_callback

1527

self._parents = parents

1528

if deltas and not parents:

1529

raise KnitCorrupt(self, "Cannot do delta compression without "

1530

"parent tracking.")

1531

1532

def _get_entries(self, keys, check_present=False):

1533

"""Get the entries for keys.

1534

1535

:param keys: An iterable of index keys, - 1-tuples.

1536

"""

1537

keys = set(keys)

1538

found_keys = set()

1539

if self._parents:

1540

for node in self._graph_index.iter_entries(keys):

1541

yield node

1542

found_keys.add(node[1])

1543

else:

1544

# adapt parentless index to the rest of the code.

1545

for node in self._graph_index.iter_entries(keys):

1546

yield node[0], node[1], node[2], ()

1547

found_keys.add(node[1])

1548

if check_present:

1549

missing_keys = keys.difference(found_keys)

1550

if missing_keys:

1551

raise RevisionNotPresent(missing_keys.pop(), self)

1552

1553

def _present_keys(self, version_ids):

1554

return set([

1555

node[1] for node in self._get_entries(version_ids)])

1556

1557

def _parentless_ancestry(self, versions):

1558

"""Honour the get_ancestry API for parentless knit indices."""

1559

wanted_keys = self._version_ids_to_keys(versions)

1560

present_keys = self._present_keys(wanted_keys)

1561

missing = set(wanted_keys).difference(present_keys)

1562

if missing:

1563

raise RevisionNotPresent(missing.pop(), self)

1564

return list(self._keys_to_version_ids(present_keys))

1565

1566

def get_ancestry(self, versions, topo_sorted=True):

1567

"""See VersionedFile.get_ancestry."""

1568

if not self._parents:

1569

return self._parentless_ancestry(versions)

1570

# XXX: This will do len(history) index calls - perhaps

1571

# it should be altered to be a index core feature?

1572

# get a graph of all the mentioned versions:

1573

graph = {}

1574

ghosts = set()

1575

versions = self._version_ids_to_keys(versions)

1576

pending = set(versions)

1577

while pending:

1578

# get all pending nodes

1579

this_iteration = pending

1580

new_nodes = self._get_entries(this_iteration)

1581

found = set()

1582

pending = set()

1583

for (index, key, value, node_refs) in new_nodes:

1584

# dont ask for ghosties - otherwise

1585

# we we can end up looping with pending

1586

# being entirely ghosted.

1587

graph[key] = [parent for parent in node_refs[0]

1588

if parent not in ghosts]

1589

# queue parents

1590

for parent in graph[key]:

1591

# dont examine known nodes again

1592

if parent in graph:

1593

continue

1594

pending.add(parent)

1595

found.add(key)

1596

ghosts.update(this_iteration.difference(found))

1597

if versions.difference(graph):

1598

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1599

if topo_sorted:

1600

result_keys = topo_sort(graph.items())

1601

else:

1602

result_keys = graph.iterkeys()

1603

return [key[0] for key in result_keys]

1604

1605

def get_ancestry_with_ghosts(self, versions):

1606

"""See VersionedFile.get_ancestry."""

1607

if not self._parents:

1608

return self._parentless_ancestry(versions)

1609

# XXX: This will do len(history) index calls - perhaps

1610

# it should be altered to be a index core feature?

1611

# get a graph of all the mentioned versions:

1612

graph = {}

1613

versions = self._version_ids_to_keys(versions)

1614

pending = set(versions)

1615

while pending:

1616

# get all pending nodes

1617

this_iteration = pending

1618

new_nodes = self._get_entries(this_iteration)

1619

pending = set()

1620

for (index, key, value, node_refs) in new_nodes:

1621

graph[key] = node_refs[0]

1622

# queue parents

1623

for parent in graph[key]:

1624

# dont examine known nodes again

1625

if parent in graph:

1626

continue

1627

pending.add(parent)

1628

missing_versions = this_iteration.difference(graph)

1629

missing_needed = versions.intersection(missing_versions)

1630

if missing_needed:

1631

raise RevisionNotPresent(missing_needed.pop(), self)

1632

for missing_version in missing_versions:

1633

# add a key, no parents

1634

graph[missing_version] = []

1635

pending.discard(missing_version) # don't look for it

1636

result_keys = topo_sort(graph.items())

1637

return [key[0] for key in result_keys]

1638

1639

def get_graph(self):

1640

"""Return a list of the node:parents lists from this knit index."""

1641

if not self._parents:

1642

return [(key, ()) for key in self.get_versions()]

1643

result = []

1644

for index, key, value, refs in self._graph_index.iter_all_entries():

1645

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1646

return result

1647

1648

def iter_parents(self, version_ids):

1649

"""Iterate through the parents for many version ids.

1650

1651

:param version_ids: An iterable yielding version_ids.

1652

:return: An iterator that yields (version_id, parents). Requested

1653

version_ids not present in the versioned file are simply skipped.

1654

The order is undefined, allowing for different optimisations in

1655

the underlying implementation.

1656

"""

1657

if self._parents:

1658

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1659

all_parents = set()

1660

present_parents = set()

1661

for node in all_nodes:

1662

all_parents.update(node[3][0])

1663

# any node we are querying must be present

1664

present_parents.add(node[1])

1665

unknown_parents = all_parents.difference(present_parents)

1666

present_parents.update(self._present_keys(unknown_parents))

1667

for node in all_nodes:

1668

parents = []

1669

for parent in node[3][0]:

1670

if parent in present_parents:

1671

parents.append(parent[0])

1672

yield node[1][0], tuple(parents)

1673

else:

1674

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1675

yield node[1][0], ()

1676

1677

def num_versions(self):

1678

return len(list(self._graph_index.iter_all_entries()))

1679

1680

__len__ = num_versions

1681

1682

def get_versions(self):

1683

"""Get all the versions in the file. not topologically sorted."""

1684

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1685

1686

def has_version(self, version_id):

1687

"""True if the version is in the index."""

1688

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1689

1690

def _keys_to_version_ids(self, keys):

1691

return tuple(key[0] for key in keys)

1692

1693

def get_position(self, version_id):

1694

"""Return details needed to access the version.

1695

1696

:return: a tuple (index, data position, size) to hand to the access

1697

logic to get the record.

1698

"""

1699

node = self._get_node(version_id)

1700

bits = node[2][1:].split(' ')

1701

return node[0], int(bits[0]), int(bits[1])

1702

1703

def get_method(self, version_id):

1704

"""Return compression method of specified version."""

1705

if not self._deltas:

1706

return 'fulltext'

1707

return self._parent_compression(self._get_node(version_id)[3][1])

1708

1709

def _parent_compression(self, reference_list):

1710

# use the second reference list to decide if this is delta'd or not.

1711

if len(reference_list):

1712

return 'line-delta'

1713

else:

1714

return 'fulltext'

1715

1716

def _get_node(self, version_id):

1717

try:

1718

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1719

except IndexError:

1720

raise RevisionNotPresent(version_id, self)

1721

1722

def get_options(self, version_id):

1723

"""Return a string represention options.

1724

1725

e.g. foo,bar

1726

"""

1727

node = self._get_node(version_id)

1728

if not self._deltas:

1729

options = ['fulltext']

1730

else:

1731

options = [self._parent_compression(node[3][1])]

1732

if node[2][0] == 'N':

1733

options.append('no-eol')

1734

return options

1735

1736

def get_parents(self, version_id):

1737

"""Return parents of specified version ignoring ghosts."""

1738

parents = list(self.iter_parents([version_id]))

1739

if not parents:

1740

# missing key

1741

raise errors.RevisionNotPresent(version_id, self)

1742

return parents[0][1]

1743

1744

def get_parents_with_ghosts(self, version_id):

1745

"""Return parents of specified version with ghosts."""

1746

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1747

check_present=True))

1748

if not self._parents:

1749

return ()

1750

return self._keys_to_version_ids(nodes[0][3][0])

1751

1752

def check_versions_present(self, version_ids):

1753

"""Check that all specified versions are present."""

1754

keys = self._version_ids_to_keys(version_ids)

1755

present = self._present_keys(keys)

1756

missing = keys.difference(present)

1757

if missing:

1758

raise RevisionNotPresent(missing.pop(), self)

1759

1760

def add_version(self, version_id, options, access_memo, parents):

1761

"""Add a version record to the index."""

1762

return self.add_versions(((version_id, options, access_memo, parents),))

1763

1764

def add_versions(self, versions, random_id=False):

1765

"""Add multiple versions to the index.

1766

1767

This function does not insert data into the Immutable GraphIndex

1768

backing the KnitGraphIndex, instead it prepares data for insertion by

1769

the caller and checks that it is safe to insert then calls

1770

self._add_callback with the prepared GraphIndex nodes.

1771

1772

:param versions: a list of tuples:

1773

(version_id, options, pos, size, parents).

1774

:param random_id: If True the ids being added were randomly generated

1775

and no check for existence will be performed.

1776

"""

1777

if not self._add_callback:

1778

raise errors.ReadOnlyError(self)

1779

# we hope there are no repositories with inconsistent parentage

1780

# anymore.

1781

# check for dups

1782

1783

keys = {}

1784

for (version_id, options, access_memo, parents) in versions:

1785

index, pos, size = access_memo

1786

key = (version_id, )

1787

parents = tuple((parent, ) for parent in parents)

1788

if 'no-eol' in options:

1789

value = 'N'

1790

else:

1791

value = ' '

1792

value += "%d %d" % (pos, size)

1793

if not self._deltas:

1794

if 'line-delta' in options:

1795

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1796

if self._parents:

1797

if self._deltas:

1798

if 'line-delta' in options:

1799

node_refs = (parents, (parents[0],))

1800

else:

1801

node_refs = (parents, ())

1802

else:

1803

node_refs = (parents, )

1804

else:

1805

if parents:

1806

raise KnitCorrupt(self, "attempt to add node with parents "

1807

"in parentless index.")

1808

node_refs = ()

1809

keys[key] = (value, node_refs)

1810

if not random_id:

1811

present_nodes = self._get_entries(keys)

1812

for (index, key, value, node_refs) in present_nodes:

1813

if (value, node_refs) != keys[key]:

1814

raise KnitCorrupt(self, "inconsistent details in add_versions"

1815

": %s %s" % ((value, node_refs), keys[key]))

1816

del keys[key]

1817

result = []

1818

if self._parents:

1819

for key, (value, node_refs) in keys.iteritems():

1820

result.append((key, value, node_refs))

1821

else:

1822

for key, (value, node_refs) in keys.iteritems():

1823

result.append((key, value))

1824

self._add_callback(result)

1825

1826

def _version_ids_to_keys(self, version_ids):

1827

return set((version_id, ) for version_id in version_ids)

1828

1829

1830

class _KnitAccess(object):

1831

"""Access to knit records in a .knit file."""

1832

1833

def __init__(self, transport, filename, _file_mode, _dir_mode,

1834

_need_to_create, _create_parent_dir):

1835

"""Create a _KnitAccess for accessing and inserting data.

1836

1837

:param transport: The transport the .knit is located on.

1838

:param filename: The filename of the .knit.

1839

"""

1840

self._transport = transport

1841

self._filename = filename

1842

self._file_mode = _file_mode

1843

self._dir_mode = _dir_mode

1844

self._need_to_create = _need_to_create

1845

self._create_parent_dir = _create_parent_dir

1846

1847

def add_raw_records(self, sizes, raw_data):

1848

"""Add raw knit bytes to a storage area.

1849

1850

The data is spooled to whereever the access method is storing data.

1851

1852

:param sizes: An iterable containing the size of each raw data segment.

1853

:param raw_data: A bytestring containing the data.

1854

:return: A list of memos to retrieve the record later. Each memo is a

1855

tuple - (index, pos, length), where the index field is always None

1856

for the .knit access method.

1857

"""

1858

assert type(raw_data) == str, \

1859

'data must be plain bytes was %s' % type(raw_data)

1860

if not self._need_to_create:

1861

base = self._transport.append_bytes(self._filename, raw_data)

1862

else:

1863

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1864

create_parent_dir=self._create_parent_dir,

1865

mode=self._file_mode,

1866

dir_mode=self._dir_mode)

1867

self._need_to_create = False

1868

base = 0

1869

result = []

1870

for size in sizes:

1871

result.append((None, base, size))

1872

base += size

1873

return result

1874

1875

def create(self):

1876

"""IFF this data access has its own storage area, initialise it.

1877

1878

:return: None.

1879

"""

1880

self._transport.put_bytes_non_atomic(self._filename, '',

1881

mode=self._file_mode)

1882

1883

def open_file(self):

1884

"""IFF this data access can be represented as a single file, open it.

1885

1886

For knits that are not mapped to a single file on disk this will

1887

always return None.

1888

1889

:return: None or a file handle.

1890

"""

1891

try:

1892

return self._transport.get(self._filename)

1893

except NoSuchFile:

1894

pass

1895

return None

1896

1897

def get_raw_records(self, memos_for_retrieval):

1898

"""Get the raw bytes for a records.

1899

1900

:param memos_for_retrieval: An iterable containing the (index, pos,

1901

length) memo for retrieving the bytes. The .knit method ignores

1902

the index as there is always only a single file.

1903

:return: An iterator over the bytes of the records.

1904

"""

1905

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1906

for pos, data in self._transport.readv(self._filename, read_vector):

1907

yield data

1908

1909

1910

class _PackAccess(object):

1911

"""Access to knit records via a collection of packs."""

1912

1913

def __init__(self, index_to_packs, writer=None):

1914

"""Create a _PackAccess object.

1915

1916

:param index_to_packs: A dict mapping index objects to the transport

1917

and file names for obtaining data.

1918

:param writer: A tuple (pack.ContainerWriter, write_index) which

1919

contains the pack to write, and the index that reads from it will

1920

be associated with.

1921

"""

1922

if writer:

1923

self.container_writer = writer[0]

1924

self.write_index = writer[1]

1925

else:

1926

self.container_writer = None

1927

self.write_index = None

1928

self.indices = index_to_packs

1929

1930

def add_raw_records(self, sizes, raw_data):

1931

"""Add raw knit bytes to a storage area.

1932

1933

The data is spooled to the container writer in one bytes-record per

1934

raw data item.

1935

1936

:param sizes: An iterable containing the size of each raw data segment.

1937

:param raw_data: A bytestring containing the data.

1938

:return: A list of memos to retrieve the record later. Each memo is a

1939

tuple - (index, pos, length), where the index field is the

1940

write_index object supplied to the PackAccess object.

1941

"""

1942

assert type(raw_data) == str, \

1943

'data must be plain bytes was %s' % type(raw_data)

1944

result = []

1945

offset = 0

1946

for size in sizes:

1947

p_offset, p_length = self.container_writer.add_bytes_record(

1948

raw_data[offset:offset+size], [])

1949

offset += size

1950

result.append((self.write_index, p_offset, p_length))

1951

return result

1952

1953

def create(self):

1954

"""Pack based knits do not get individually created."""

1955

1956

def get_raw_records(self, memos_for_retrieval):

1957

"""Get the raw bytes for a records.

1958

1959

:param memos_for_retrieval: An iterable containing the (index, pos,

1960

length) memo for retrieving the bytes. The Pack access method

1961

looks up the pack to use for a given record in its index_to_pack

1962

map.

1963

:return: An iterator over the bytes of the records.

1964

"""

1965

# first pass, group into same-index requests

1966

request_lists = []

1967

current_index = None

1968

for (index, offset, length) in memos_for_retrieval:

1969

if current_index == index:

1970

current_list.append((offset, length))

1971

else:

1972

if current_index is not None:

1973

request_lists.append((current_index, current_list))

1974

current_index = index

1975

current_list = [(offset, length)]

1976

# handle the last entry

1977

if current_index is not None:

1978

request_lists.append((current_index, current_list))

1979

for index, offsets in request_lists:

1980

transport, path = self.indices[index]

1981

reader = pack.make_readv_reader(transport, path, offsets)

1982

for names, read_func in reader.iter_records():

1983

yield read_func(None)

1984

1985

def open_file(self):

1986

"""Pack based knits have no single file."""

1987

return None

1988

1989

def set_writer(self, writer, index, (transport, packname)):

1990

"""Set a writer to use for adding data."""

1991

if index is not None:

1992

self.indices[index] = (transport, packname)

1993

self.container_writer = writer

1994

self.write_index = index

1995

1996

1997

class _KnitData(object):

1998

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1999

2000

The KnitData class provides the logic for parsing and using knit records,

2001

making use of an access method for the low level read and write operations.

2002

"""

2003

2004

def __init__(self, access):

2005

"""Create a KnitData object.

2006

2007

:param access: The access method to use. Access methods such as

2008

_KnitAccess manage the insertion of raw records and the subsequent

2009

retrieval of the same.

2010

"""

2011

self._access = access

2012

self._checked = False

2013

# TODO: jam 20060713 conceptually, this could spill to disk

2014

# if the cached size gets larger than a certain amount

2015

# but it complicates the model a bit, so for now just use

2016

# a simple dictionary

2017

self._cache = {}

2018

self._do_cache = False

2019

2020

def enable_cache(self):

2021

"""Enable caching of reads."""

2022

self._do_cache = True

2023

2024

def clear_cache(self):

2025

"""Clear the record cache."""

2026

self._do_cache = False

2027

self._cache = {}

2028

2029

def _open_file(self):

2030

return self._access.open_file()

2031

2032

def _record_to_data(self, version_id, digest, lines, dense_lines=None):

2033

"""Convert version_id, digest, lines into a raw data block.

2034

2035

:param dense_lines: The bytes of lines but in a denser form. For

2036

instance, if lines is a list of 1000 bytestrings each ending in \n,

2037

dense_lines may be a list with one line in it, containing all the

2038

1000's lines and their \n's. Using dense_lines if it is already

2039

known is a win because the string join to create bytes in this

2040

function spends less time resizing the final string.

2041

:return: (len, a StringIO instance with the raw data ready to read.)

2042

"""

2043

# Note: using a string copy here increases memory pressure with e.g.

2044

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

2045

# when doing the initial commit of a mozilla tree. RBC 20070921

2046

bytes = ''.join(chain(

2047

["version %s %d %s\n" % (version_id,

2048

len(lines),

2049

digest)],

2050

dense_lines or lines,

2051

["end %s\n" % version_id]))

2052

assert bytes.__class__ == str

2053

compressed_bytes = bytes_to_gzip(bytes)

2054

return len(compressed_bytes), compressed_bytes

2055

2056

def add_raw_records(self, sizes, raw_data):

2057

"""Append a prepared record to the data file.

2058

2059

:param sizes: An iterable containing the size of each raw data segment.

2060

:param raw_data: A bytestring containing the data.

2061

:return: a list of index data for the way the data was stored.

2062

See the access method add_raw_records documentation for more

2063

details.

2064

"""

2065

return self._access.add_raw_records(sizes, raw_data)

2066

2067

def _parse_record_header(self, version_id, raw_data):

2068

"""Parse a record header for consistency.

2069

2070

:return: the header and the decompressor stream.

2071

as (stream, header_record)

2072

"""

2073

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2074

try:

2075

rec = self._check_header(version_id, df.readline())

2076

except Exception, e:

2077

raise KnitCorrupt(self._access,

2078

"While reading {%s} got %s(%s)"

2079

% (version_id, e.__class__.__name__, str(e)))

2080

return df, rec

2081

2082

def _check_header(self, version_id, line):

2083

rec = line.split()

2084

if len(rec) != 4:

2085

raise KnitCorrupt(self._access,

2086

'unexpected number of elements in record header')

2087

if rec[1] != version_id:

2088

raise KnitCorrupt(self._access,

2089

'unexpected version, wanted %r, got %r'

2090

% (version_id, rec[1]))

2091

return rec

2092

2093

def _parse_record(self, version_id, data):

2094

# profiling notes:

2095

# 4168 calls in 2880 217 internal

2096

# 4168 calls to _parse_record_header in 2121

2097

# 4168 calls to readlines in 330

2098

df = GzipFile(mode='rb', fileobj=StringIO(data))

2099

2100

try:

2101

record_contents = df.readlines()

2102

except Exception, e:

2103

raise KnitCorrupt(self._access,

2104

"While reading {%s} got %s(%s)"

2105

% (version_id, e.__class__.__name__, str(e)))

2106

header = record_contents.pop(0)

2107

rec = self._check_header(version_id, header)

2108

2109

last_line = record_contents.pop()

2110

if len(record_contents) != int(rec[2]):

2111

raise KnitCorrupt(self._access,

2112

'incorrect number of lines %s != %s'

2113

' for version {%s}'

2114

% (len(record_contents), int(rec[2]),

2115

version_id))

2116

if last_line != 'end %s\n' % rec[1]:

2117

raise KnitCorrupt(self._access,

2118

'unexpected version end line %r, wanted %r'

2119

% (last_line, version_id))

2120

df.close()

2121

return record_contents, rec[3]

2122

2123

def read_records_iter_raw(self, records):

2124

"""Read text records from data file and yield raw data.

2125

2126

This unpacks enough of the text record to validate the id is

2127

as expected but thats all.

2128

"""

2129

# setup an iterator of the external records:

2130

# uses readv so nice and fast we hope.

2131

if len(records):

2132

# grab the disk data needed.

2133

if self._cache:

2134

# Don't check _cache if it is empty

2135

needed_offsets = [index_memo for version_id, index_memo

2136

in records

2137

if version_id not in self._cache]

2138

else:

2139

needed_offsets = [index_memo for version_id, index_memo

2140

in records]

2141

2142

raw_records = self._access.get_raw_records(needed_offsets)

2143

2144

for version_id, index_memo in records:

2145

if version_id in self._cache:

2146

# This data has already been validated

2147

data = self._cache[version_id]

2148

else:

2149

data = raw_records.next()

2150

if self._do_cache:

2151

self._cache[version_id] = data

2152

2153

# validate the header

2154

df, rec = self._parse_record_header(version_id, data)

2155

df.close()

2156

yield version_id, data

2157

2158

def read_records_iter(self, records):

2159

"""Read text records from data file and yield result.

2160

2161

The result will be returned in whatever is the fastest to read.

2162

Not by the order requested. Also, multiple requests for the same

2163

record will only yield 1 response.

2164

:param records: A list of (version_id, pos, len) entries

2165

:return: Yields (version_id, contents, digest) in the order

2166

read, not the order requested

2167

"""

2168

if not records:

2169

return

2170

2171

if self._cache:

2172

# Skip records we have alread seen

2173

yielded_records = set()

2174

needed_records = set()

2175

for record in records:

2176

if record[0] in self._cache:

2177

if record[0] in yielded_records:

2178

continue

2179

yielded_records.add(record[0])

2180

data = self._cache[record[0]]

2181

content, digest = self._parse_record(record[0], data)

2182

yield (record[0], content, digest)

2183

else:

2184

needed_records.add(record)

2185

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2186

else:

2187

needed_records = sorted(set(records), key=operator.itemgetter(1))

2188

2189

if not needed_records:

2190

return

2191

2192

# The transport optimizes the fetching as well

2193

# (ie, reads continuous ranges.)

2194

raw_data = self._access.get_raw_records(

2195

[index_memo for version_id, index_memo in needed_records])

2196

2197

for (version_id, index_memo), data in \

2198

izip(iter(needed_records), raw_data):

2199

content, digest = self._parse_record(version_id, data)

2200

if self._do_cache:

2201

self._cache[version_id] = data

2202

yield version_id, content, digest

2203

2204

def read_records(self, records):

2205

"""Read records into a dictionary."""

2206

components = {}

2207

for record_id, content, digest in \

2208

self.read_records_iter(records):

2209

components[record_id] = (content, digest)

2210

return components

2211

2212

2213

class InterKnit(InterVersionedFile):

2214

"""Optimised code paths for knit to knit operations."""

2215

2216

_matching_file_from_factory = KnitVersionedFile

2217

_matching_file_to_factory = KnitVersionedFile

2218

2219

@staticmethod

2220

def is_compatible(source, target):

2221

"""Be compatible with knits. """

2222

try:

2223

return (isinstance(source, KnitVersionedFile) and

2224

isinstance(target, KnitVersionedFile))

2225

except AttributeError:

2226

return False

2227

2228

def _copy_texts(self, pb, msg, version_ids, ignore_missing=False):

2229

"""Copy texts to the target by extracting and adding them one by one.

2230

2231

see join() for the parameter definitions.

2232

"""

2233

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2234

graph = self.source.get_graph(version_ids)

2235

order = topo_sort(graph.items())

2236

2237

def size_of_content(content):

2238

return sum(len(line) for line in content.text())

2239

# Cache at most 10MB of parent texts

2240

parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,

2241

compute_size=size_of_content)

2242

# TODO: jam 20071116 It would be nice to have a streaming interface to

2243

# get multiple texts from a source. The source could be smarter

2244

# about how it handled intermediate stages.

2245

# get_line_list() or make_mpdiffs() seem like a possibility, but

2246

# at the moment they extract all full texts into memory, which

2247

# causes us to store more than our 3x fulltext goal.

2248

# Repository.iter_files_bytes() may be another possibility

2249

to_process = [version for version in order

2250

if version not in self.target]

2251

total = len(to_process)

2252

pb = ui.ui_factory.nested_progress_bar()

2253

try:

2254

for index, version in enumerate(to_process):

2255

pb.update('Converting versioned data', index, total)

2256

sha1, num_bytes, parent_text = self.target.add_lines(version,

2257

self.source.get_parents(version),

2258

self.source.get_lines(version),

2259

parent_texts=parent_cache)

2260

parent_cache[version] = parent_text

2261

finally:

2262

pb.finished()

2263

return total

2264

2265

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2266

"""See InterVersionedFile.join."""

2267

assert isinstance(self.source, KnitVersionedFile)

2268

assert isinstance(self.target, KnitVersionedFile)

2269

2270

# If the source and target are mismatched w.r.t. annotations vs

2271

# plain, the data needs to be converted accordingly

2272

if self.source.factory.annotated == self.target.factory.annotated:

2273

converter = None

2274

elif self.source.factory.annotated:

2275

converter = self._anno_to_plain_converter

2276

else:

2277

# We're converting from a plain to an annotated knit. Copy them

2278

# across by full texts.

2279

return self._copy_texts(pb, msg, version_ids, ignore_missing)

2280

2281

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2282

if not version_ids:

2283

return 0

2284

2285

pb = ui.ui_factory.nested_progress_bar()

2286

try:

2287

version_ids = list(version_ids)

2288

if None in version_ids:

2289

version_ids.remove(None)

2290

2291

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2292

this_versions = set(self.target._index.get_versions())

2293

# XXX: For efficiency we should not look at the whole index,

2294

# we only need to consider the referenced revisions - they

2295

# must all be present, or the method must be full-text.

2296

# TODO, RBC 20070919

2297

needed_versions = self.source_ancestry - this_versions

2298

2299

if not needed_versions:

2300

return 0

2301

full_list = topo_sort(self.source.get_graph())

2302

2303

version_list = [i for i in full_list if (not self.target.has_version(i)

2304

and i in needed_versions)]

2305

2306

# plan the join:

2307

copy_queue = []

2308

copy_queue_records = []

2309

copy_set = set()

2310

for version_id in version_list:

2311

options = self.source._index.get_options(version_id)

2312

parents = self.source._index.get_parents_with_ghosts(version_id)

2313

# check that its will be a consistent copy:

2314

for parent in parents:

2315

# if source has the parent, we must :

2316

# * already have it or

2317

# * have it scheduled already

2318

# otherwise we don't care

2319

assert (self.target.has_version(parent) or

2320

parent in copy_set or

2321

not self.source.has_version(parent))

2322

index_memo = self.source._index.get_position(version_id)

2323

copy_queue_records.append((version_id, index_memo))

2324

copy_queue.append((version_id, options, parents))

2325

copy_set.add(version_id)

2326

2327

# data suck the join:

2328

count = 0

2329

total = len(version_list)

2330

raw_datum = []

2331

raw_records = []

2332

for (version_id, raw_data), \

2333

(version_id2, options, parents) in \

2334

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2335

copy_queue):

2336

assert version_id == version_id2, 'logic error, inconsistent results'

2337

count = count + 1

2338

pb.update("Joining knit", count, total)

2339

if converter:

2340

size, raw_data = converter(raw_data, version_id, options,

2341

parents)

2342

else:

2343

size = len(raw_data)

2344

raw_records.append((version_id, options, parents, size))

2345

raw_datum.append(raw_data)

2346

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2347

return count

2348

finally:

2349

pb.finished()

2350

2351

def _anno_to_plain_converter(self, raw_data, version_id, options,

2352

parents):

2353

"""Convert annotated content to plain content."""

2354

data, digest = self.source._data._parse_record(version_id, raw_data)

2355

if 'fulltext' in options:

2356

content = self.source.factory.parse_fulltext(data, version_id)

2357

lines = self.target.factory.lower_fulltext(content)

2358

else:

2359

delta = self.source.factory.parse_line_delta(data, version_id,

2360

plain=True)

2361

lines = self.target.factory.lower_line_delta(delta)

2362

return self.target._data._record_to_data(version_id, digest, lines)

2363

2364

2365

InterVersionedFile.register_optimiser(InterKnit)

2366

2367

2368

class WeaveToKnit(InterVersionedFile):

2369

"""Optimised code paths for weave to knit operations."""

2370

2371

_matching_file_from_factory = bzrlib.weave.WeaveFile

2372

_matching_file_to_factory = KnitVersionedFile

2373

2374

@staticmethod

2375

def is_compatible(source, target):

2376

"""Be compatible with weaves to knits."""

2377

try:

2378

return (isinstance(source, bzrlib.weave.Weave) and

2379

isinstance(target, KnitVersionedFile))

2380

except AttributeError:

2381

return False

2382

2383

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2384

"""See InterVersionedFile.join."""

2385

assert isinstance(self.source, bzrlib.weave.Weave)

2386

assert isinstance(self.target, KnitVersionedFile)

2387

2388

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2389

2390

if not version_ids:

2391

return 0

2392

2393

pb = ui.ui_factory.nested_progress_bar()

2394

try:

2395

version_ids = list(version_ids)

2396

2397

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2398

this_versions = set(self.target._index.get_versions())

2399

needed_versions = self.source_ancestry - this_versions

2400

2401

if not needed_versions:

2402

return 0

2403

full_list = topo_sort(self.source.get_graph())

2404

2405

version_list = [i for i in full_list if (not self.target.has_version(i)

2406

and i in needed_versions)]

2407

2408

# do the join:

2409

count = 0

2410

total = len(version_list)

2411

for version_id in version_list:

2412

pb.update("Converting to knit", count, total)

2413

parents = self.source.get_parents(version_id)

2414

# check that its will be a consistent copy:

2415

for parent in parents:

2416

# if source has the parent, we must already have it

2417

assert (self.target.has_version(parent))

2418

self.target.add_lines(

2419

version_id, parents, self.source.get_lines(version_id))

2420

count = count + 1

2421

return count

2422

finally:

2423

pb.finished()

2424

2425

2426

InterVersionedFile.register_optimiser(WeaveToKnit)

2427

2428

2429

# Deprecated, use PatienceSequenceMatcher instead

2430

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2431

2432

2433

def annotate_knit(knit, revision_id):

2434

"""Annotate a knit with no cached annotations.

2435

2436

This implementation is for knits with no cached annotations.

2437

It will work for knits with cached annotations, but this is not

2438

recommended.

2439

"""

2440

ancestry = knit.get_ancestry(revision_id)

2441

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2442

annotations = {}

2443

for candidate in ancestry:

2444

if candidate in annotations:

2445

continue

2446

parents = knit.get_parents(candidate)

2447

if len(parents) == 0:

2448

blocks = None

2449

elif knit._index.get_method(candidate) != 'line-delta':

2450

blocks = None

2451

else:

2452

parent, sha1, noeol, delta = knit.get_delta(candidate)

2453

blocks = KnitContent.get_line_delta_blocks(delta,

2454

fulltext[parents[0]], fulltext[candidate])

2455

annotations[candidate] = list(annotate.reannotate([annotations[p]

2456

for p in parents], fulltext[candidate], candidate, blocks))

2457

return iter(annotations[revision_id])

2458

2459

2460

try:

2461

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2462

except ImportError:

2463

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »