/brz/remove-bazaar : revision 2696.1.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2007-08-15 04:33:34 UTC
mto: (2701.1.2 remove-should-cache)
mto: This revision was merged to the branch mainline in revision 2710.
Revision ID: mbp@sourcefrog.net-20070815043334-01dx9emb0vjiy29v

Remove things deprecated in 0.11 and earlier

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/bug_trackers.txt

doc/centralized_workflow.txt

doc/configuration.txt

doc/conflicts.txt

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/scratch.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/shared_repository_layouts.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

pack,

)

""")

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

100

from bzrlib.osutils import (

101

contains_whitespace,

102

contains_linebreaks,

103

sha_strings,

104

)

105

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

106

from bzrlib.tsort import topo_sort

107

import bzrlib.ui

108

import bzrlib.weave

109

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

110

111

112

# TODO: Split out code specific to this format into an associated object.

113

114

# TODO: Can we put in some kind of value to check that the index and data

115

# files belong together?

116

117

# TODO: accommodate binaries, perhaps by storing a byte count

118

119

# TODO: function to check whole file

120

121

# TODO: atomically append data, then measure backwards from the cursor

122

# position after writing to work out where it was located. we may need to

123

# bypass python file buffering.

124

125

DATA_SUFFIX = '.knit'

126

INDEX_SUFFIX = '.kndx'

127

128

129

class KnitContent(object):

130

"""Content of a knit version to which deltas can be applied."""

131

132

def __init__(self, lines):

133

self._lines = lines

134

135

def annotate_iter(self):

136

"""Yield tuples of (origin, text) for each content line."""

137

return iter(self._lines)

138

139

def annotate(self):

140

"""Return a list of (origin, text) tuples."""

141

return list(self.annotate_iter())

142

143

def line_delta_iter(self, new_lines):

144

"""Generate line-based delta from this content to new_lines."""

145

new_texts = new_lines.text()

146

old_texts = self.text()

147

s = KnitSequenceMatcher(None, old_texts, new_texts)

148

for tag, i1, i2, j1, j2 in s.get_opcodes():

149

if tag == 'equal':

150

continue

151

# ofrom, oto, length, data

152

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

153

154

def line_delta(self, new_lines):

155

return list(self.line_delta_iter(new_lines))

156

157

def text(self):

158

return [text for origin, text in self._lines]

159

160

def copy(self):

161

return KnitContent(self._lines[:])

162

163

@staticmethod

164

def get_line_delta_blocks(knit_delta, source, target):

165

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

166

target_len = len(target)

167

s_pos = 0

168

t_pos = 0

169

for s_begin, s_end, t_len, new_text in knit_delta:

170

true_n = s_begin - s_pos

171

n = true_n

172

if n > 0:

173

# knit deltas do not provide reliable info about whether the

174

# last line of a file matches, due to eol handling.

175

if source[s_pos + n -1] != target[t_pos + n -1]:

176

n-=1

177

if n > 0:

178

yield s_pos, t_pos, n

179

t_pos += t_len + true_n

180

s_pos = s_end

181

n = target_len - t_pos

182

if n > 0:

183

if source[s_pos + n -1] != target[t_pos + n -1]:

184

n-=1

185

if n > 0:

186

yield s_pos, t_pos, n

187

yield s_pos + (target_len - t_pos), target_len, 0

188

189

190

class _KnitFactory(object):

191

"""Base factory for creating content objects."""

192

193

def make(self, lines, version_id):

194

num_lines = len(lines)

195

return KnitContent(zip([version_id] * num_lines, lines))

196

197

198

class KnitAnnotateFactory(_KnitFactory):

199

"""Factory for creating annotated Content objects."""

200

201

annotated = True

202

203

def parse_fulltext(self, content, version_id):

204

"""Convert fulltext to internal representation

205

206

fulltext content is of the format

207

revid(utf8) plaintext\n

208

internal representation is of the format:

209

(revid, plaintext)

210

"""

211

# TODO: jam 20070209 The tests expect this to be returned as tuples,

212

# but the code itself doesn't really depend on that.

213

# Figure out a way to not require the overhead of turning the

214

# list back into tuples.

215

lines = [tuple(line.split(' ', 1)) for line in content]

216

return KnitContent(lines)

217

218

def parse_line_delta_iter(self, lines):

219

return iter(self.parse_line_delta(lines))

220

221

def parse_line_delta(self, lines, version_id):

222

"""Convert a line based delta into internal representation.

223

224

line delta is in the form of:

225

intstart intend intcount

226

1..count lines:

227

revid(utf8) newline\n

228

internal representation is

229

(start, end, count, [1..count tuples (revid, newline)])

230

"""

231

result = []

232

lines = iter(lines)

233

next = lines.next

234

235

cache = {}

236

def cache_and_return(line):

237

origin, text = line.split(' ', 1)

238

return cache.setdefault(origin, origin), text

239

240

# walk through the lines parsing.

241

for header in lines:

242

start, end, count = [int(n) for n in header.split(',')]

243

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

244

result.append((start, end, count, contents))

245

return result

246

247

def get_fulltext_content(self, lines):

248

"""Extract just the content lines from a fulltext."""

249

return (line.split(' ', 1)[1] for line in lines)

250

251

def get_linedelta_content(self, lines):

252

"""Extract just the content from a line delta.

253

254

This doesn't return all of the extra information stored in a delta.

255

Only the actual content lines.

256

"""

257

lines = iter(lines)

258

next = lines.next

259

for header in lines:

260

header = header.split(',')

261

count = int(header[2])

262

for i in xrange(count):

263

origin, text = next().split(' ', 1)

264

yield text

265

266

def lower_fulltext(self, content):

267

"""convert a fulltext content record into a serializable form.

268

269

see parse_fulltext which this inverts.

270

"""

271

# TODO: jam 20070209 We only do the caching thing to make sure that

272

# the origin is a valid utf-8 line, eventually we could remove it

273

return ['%s %s' % (o, t) for o, t in content._lines]

274

275

def lower_line_delta(self, delta):

276

"""convert a delta into a serializable form.

277

278

See parse_line_delta which this inverts.

279

"""

280

# TODO: jam 20070209 We only do the caching thing to make sure that

281

# the origin is a valid utf-8 line, eventually we could remove it

282

out = []

283

for start, end, c, lines in delta:

284

out.append('%d,%d,%d\n' % (start, end, c))

285

out.extend(origin + ' ' + text

286

for origin, text in lines)

287

return out

288

289

290

class KnitPlainFactory(_KnitFactory):

291

"""Factory for creating plain Content objects."""

292

293

annotated = False

294

295

def parse_fulltext(self, content, version_id):

296

"""This parses an unannotated fulltext.

297

298

Note that this is not a noop - the internal representation

299

has (versionid, line) - its just a constant versionid.

300

"""

301

return self.make(content, version_id)

302

303

def parse_line_delta_iter(self, lines, version_id):

304

cur = 0

305

num_lines = len(lines)

306

while cur < num_lines:

307

header = lines[cur]

308

cur += 1

309

start, end, c = [int(n) for n in header.split(',')]

310

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

311

cur += c

312

313

def parse_line_delta(self, lines, version_id):

314

return list(self.parse_line_delta_iter(lines, version_id))

315

316

def get_fulltext_content(self, lines):

317

"""Extract just the content lines from a fulltext."""

318

return iter(lines)

319

320

def get_linedelta_content(self, lines):

321

"""Extract just the content from a line delta.

322

323

This doesn't return all of the extra information stored in a delta.

324

Only the actual content lines.

325

"""

326

lines = iter(lines)

327

next = lines.next

328

for header in lines:

329

header = header.split(',')

330

count = int(header[2])

331

for i in xrange(count):

332

yield next()

333

334

def lower_fulltext(self, content):

335

return content.text()

336

337

def lower_line_delta(self, delta):

338

out = []

339

for start, end, c, lines in delta:

340

out.append('%d,%d,%d\n' % (start, end, c))

341

out.extend([text for origin, text in lines])

342

return out

343

344

345

def make_empty_knit(transport, relpath):

346

"""Construct a empty knit at the specified location."""

347

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

348

349

350

class KnitVersionedFile(VersionedFile):

351

"""Weave-like structure with faster random access.

352

353

A knit stores a number of texts and a summary of the relationships

354

between them. Texts are identified by a string version-id. Texts

355

are normally stored and retrieved as a series of lines, but can

356

also be passed as single strings.

357

358

Lines are stored with the trailing newline (if any) included, to

359

avoid special cases for files with no final newline. Lines are

360

composed of 8-bit characters, not unicode. The combination of

361

these approaches should mean any 'binary' file can be safely

362

stored and retrieved.

363

"""

364

365

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

366

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

367

create=False, create_parent_dir=False, delay_create=False,

368

dir_mode=None, index=None, access_method=None):

369

"""Construct a knit at location specified by relpath.

370

371

:param create: If not True, only open an existing knit.

372

:param create_parent_dir: If True, create the parent directory if

373

creating the file fails. (This is used for stores with

374

hash-prefixes that may not exist yet)

375

:param delay_create: The calling code is aware that the knit won't

376

actually be created until the first data is stored.

377

:param index: An index to use for the knit.

378

"""

379

if deprecated_passed(basis_knit):

380

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

381

" deprecated as of bzr 0.9.",

382

DeprecationWarning, stacklevel=2)

383

if access_mode is None:

384

access_mode = 'w'

385

super(KnitVersionedFile, self).__init__(access_mode)

386

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

387

self.transport = transport

388

self.filename = relpath

389

self.factory = factory or KnitAnnotateFactory()

390

self.writable = (access_mode == 'w')

391

self.delta = delta

392

393

self._max_delta_chain = 200

394

395

if index is None:

396

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

397

access_mode, create=create, file_mode=file_mode,

398

create_parent_dir=create_parent_dir, delay_create=delay_create,

399

dir_mode=dir_mode)

400

else:

401

self._index = index

402

if access_method is None:

403

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

404

((create and not len(self)) and delay_create), create_parent_dir)

405

else:

406

_access = access_method

407

if create and not len(self) and not delay_create:

408

_access.create()

409

self._data = _KnitData(_access)

410

411

def __repr__(self):

412

return '%s(%s)' % (self.__class__.__name__,

413

self.transport.abspath(self.filename))

414

415

def _check_should_delta(self, first_parents):

416

"""Iterate back through the parent listing, looking for a fulltext.

417

418

This is used when we want to decide whether to add a delta or a new

419

fulltext. It searches for _max_delta_chain parents. When it finds a

420

fulltext parent, it sees if the total size of the deltas leading up to

421

it is large enough to indicate that we want a new full text anyway.

422

423

Return True if we should create a new delta, False if we should use a

424

full text.

425

"""

426

delta_size = 0

427

fulltext_size = None

428

delta_parents = first_parents

429

for count in xrange(self._max_delta_chain):

430

parent = delta_parents[0]

431

method = self._index.get_method(parent)

432

index, pos, size = self._index.get_position(parent)

433

if method == 'fulltext':

434

fulltext_size = size

435

break

436

delta_size += size

437

delta_parents = self._index.get_parents(parent)

438

else:

439

# We couldn't find a fulltext, so we must create a new one

440

return False

441

442

return fulltext_size > delta_size

443

444

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

445

"""See VersionedFile._add_delta()."""

446

self._check_add(version_id, []) # should we check the lines ?

447

self._check_versions_present(parents)

448

present_parents = []

449

ghosts = []

450

parent_texts = {}

451

for parent in parents:

452

if not self.has_version(parent):

453

ghosts.append(parent)

454

else:

455

present_parents.append(parent)

456

457

if delta_parent is None:

458

# reconstitute as full text.

459

assert len(delta) == 1 or len(delta) == 0

460

if len(delta):

461

assert delta[0][0] == 0

462

assert delta[0][1] == 0, delta[0][1]

463

return super(KnitVersionedFile, self)._add_delta(version_id,

464

parents,

465

delta_parent,

466

sha1,

467

noeol,

468

delta)

469

470

digest = sha1

471

472

options = []

473

if noeol:

474

options.append('no-eol')

475

476

if delta_parent is not None:

477

# determine the current delta chain length.

478

# To speed the extract of texts the delta chain is limited

479

# to a fixed number of deltas. This should minimize both

480

# I/O and the time spend applying deltas.

481

# The window was changed to a maximum of 200 deltas, but also added

482

# was a check that the total compressed size of the deltas is

483

# smaller than the compressed size of the fulltext.

484

if not self._check_should_delta([delta_parent]):

485

# We don't want a delta here, just do a normal insertion.

486

return super(KnitVersionedFile, self)._add_delta(version_id,

487

parents,

488

delta_parent,

489

sha1,

490

noeol,

491

delta)

492

493

options.append('line-delta')

494

store_lines = self.factory.lower_line_delta(delta)

495

496

access_memo = self._data.add_record(version_id, digest, store_lines)

497

self._index.add_version(version_id, options, access_memo, parents)

498

499

def _add_raw_records(self, records, data):

500

"""Add all the records 'records' with data pre-joined in 'data'.

501

502

:param records: A list of tuples(version_id, options, parents, size).

503

:param data: The data for the records. When it is written, the records

504

are adjusted to have pos pointing into data by the sum of

505

the preceding records sizes.

506

"""

507

# write all the data

508

raw_record_sizes = [record[3] for record in records]

509

positions = self._data.add_raw_records(raw_record_sizes, data)

510

offset = 0

511

index_entries = []

512

for (version_id, options, parents, size), access_memo in zip(

513

records, positions):

514

index_entries.append((version_id, options, access_memo, parents))

515

if self._data._do_cache:

516

self._data._cache[version_id] = data[offset:offset+size]

517

offset += size

518

self._index.add_versions(index_entries)

519

520

def enable_cache(self):

521

"""Start caching data for this knit"""

522

self._data.enable_cache()

523

524

def clear_cache(self):

525

"""Clear the data cache only."""

526

self._data.clear_cache()

527

528

def copy_to(self, name, transport):

529

"""See VersionedFile.copy_to()."""

530

# copy the current index to a temp index to avoid racing with local

531

# writes

532

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

533

self.transport.get(self._index._filename))

534

# copy the data file

535

f = self._data._open_file()

536

try:

537

transport.put_file(name + DATA_SUFFIX, f)

538

finally:

539

f.close()

540

# move the copied index into place

541

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

542

543

def create_empty(self, name, transport, mode=None):

544

return KnitVersionedFile(name, transport, factory=self.factory,

545

delta=self.delta, create=True)

546

547

def _fix_parents(self, version_id, new_parents):

548

"""Fix the parents list for version.

549

550

This is done by appending a new version to the index

551

with identical data except for the parents list.

552

the parents list must be a superset of the current

553

list.

554

"""

555

current_values = self._index._cache[version_id]

556

assert set(current_values[4]).difference(set(new_parents)) == set()

557

self._index.add_version(version_id,

558

current_values[1],

559

(None, current_values[2], current_values[3]),

560

new_parents)

561

562

def _extract_blocks(self, version_id, source, target):

563

if self._index.get_method(version_id) != 'line-delta':

564

return None

565

parent, sha1, noeol, delta = self.get_delta(version_id)

566

return KnitContent.get_line_delta_blocks(delta, source, target)

567

568

def get_delta(self, version_id):

569

"""Get a delta for constructing version from some other version."""

570

version_id = osutils.safe_revision_id(version_id)

571

self.check_not_reserved_id(version_id)

572

if not self.has_version(version_id):

573

raise RevisionNotPresent(version_id, self.filename)

574

575

parents = self.get_parents(version_id)

576

if len(parents):

577

parent = parents[0]

578

else:

579

parent = None

580

index_memo = self._index.get_position(version_id)

581

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

582

noeol = 'no-eol' in self._index.get_options(version_id)

583

if 'fulltext' == self._index.get_method(version_id):

584

new_content = self.factory.parse_fulltext(data, version_id)

585

if parent is not None:

586

reference_content = self._get_content(parent)

587

old_texts = reference_content.text()

588

else:

589

old_texts = []

590

new_texts = new_content.text()

591

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

592

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

593

else:

594

delta = self.factory.parse_line_delta(data, version_id)

595

return parent, sha1, noeol, delta

596

597

def get_graph_with_ghosts(self):

598

"""See VersionedFile.get_graph_with_ghosts()."""

599

graph_items = self._index.get_graph()

600

return dict(graph_items)

601

602

def get_sha1(self, version_id):

603

return self.get_sha1s([version_id])[0]

604

605

def get_sha1s(self, version_ids):

606

"""See VersionedFile.get_sha1()."""

607

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

608

record_map = self._get_record_map(version_ids)

609

# record entry 2 is the 'digest'.

610

return [record_map[v][2] for v in version_ids]

611

612

@staticmethod

613

def get_suffixes():

614

"""See VersionedFile.get_suffixes()."""

615

return [DATA_SUFFIX, INDEX_SUFFIX]

616

617

def has_ghost(self, version_id):

618

"""True if there is a ghost reference in the file to version_id."""

619

version_id = osutils.safe_revision_id(version_id)

620

# maybe we have it

621

if self.has_version(version_id):

622

return False

623

# optimisable if needed by memoising the _ghosts set.

624

items = self._index.get_graph()

625

for node, parents in items:

626

for parent in parents:

627

if parent not in self._index._cache:

628

if parent == version_id:

629

return True

630

return False

631

632

def versions(self):

633

"""See VersionedFile.versions."""

634

return self._index.get_versions()

635

636

def has_version(self, version_id):

637

"""See VersionedFile.has_version."""

638

version_id = osutils.safe_revision_id(version_id)

639

return self._index.has_version(version_id)

640

641

__contains__ = has_version

642

643

def _merge_annotations(self, content, parents, parent_texts={},

644

delta=None, annotated=None):

645

"""Merge annotations for content. This is done by comparing

646

the annotations based on changed to the text.

647

"""

648

if annotated:

649

delta_seq = None

650

for parent_id in parents:

651

merge_content = self._get_content(parent_id, parent_texts)

652

seq = patiencediff.PatienceSequenceMatcher(

653

None, merge_content.text(), content.text())

654

if delta_seq is None:

655

# setup a delta seq to reuse.

656

delta_seq = seq

657

for i, j, n in seq.get_matching_blocks():

658

if n == 0:

659

continue

660

# this appears to copy (origin, text) pairs across to the new

661

# content for any line that matches the last-checked parent.

662

# FIXME: save the sequence control data for delta compression

663

# against the most relevant parent rather than rediffing.

664

content._lines[j:j+n] = merge_content._lines[i:i+n]

665

if delta:

666

if not annotated:

667

reference_content = self._get_content(parents[0], parent_texts)

668

new_texts = content.text()

669

old_texts = reference_content.text()

670

delta_seq = patiencediff.PatienceSequenceMatcher(

671

None, old_texts, new_texts)

672

return self._make_line_delta(delta_seq, content)

673

674

def _make_line_delta(self, delta_seq, new_content):

675

"""Generate a line delta from delta_seq and new_content."""

676

diff_hunks = []

677

for op in delta_seq.get_opcodes():

678

if op[0] == 'equal':

679

continue

680

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

681

return diff_hunks

682

683

def _get_components_positions(self, version_ids):

684

"""Produce a map of position data for the components of versions.

685

686

This data is intended to be used for retrieving the knit records.

687

688

A dict of version_id to (method, data_pos, data_size, next) is

689

returned.

690

method is the way referenced data should be applied.

691

data_pos is the position of the data in the knit.

692

data_size is the size of the data in the knit.

693

next is the build-parent of the version, or None for fulltexts.

694

"""

695

component_data = {}

696

for version_id in version_ids:

697

cursor = version_id

698

699

while cursor is not None and cursor not in component_data:

700

method = self._index.get_method(cursor)

701

if method == 'fulltext':

702

next = None

703

else:

704

next = self.get_parents(cursor)[0]

705

index_memo = self._index.get_position(cursor)

706

component_data[cursor] = (method, index_memo, next)

707

cursor = next

708

return component_data

709

710

def _get_content(self, version_id, parent_texts={}):

711

"""Returns a content object that makes up the specified

712

version."""

713

if not self.has_version(version_id):

714

raise RevisionNotPresent(version_id, self.filename)

715

716

cached_version = parent_texts.get(version_id, None)

717

if cached_version is not None:

718

return cached_version

719

720

text_map, contents_map = self._get_content_maps([version_id])

721

return contents_map[version_id]

722

723

def _check_versions_present(self, version_ids):

724

"""Check that all specified versions are present."""

725

self._index.check_versions_present(version_ids)

726

727

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

728

"""See VersionedFile.add_lines_with_ghosts()."""

729

self._check_add(version_id, lines)

730

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

731

732

def _add_lines(self, version_id, parents, lines, parent_texts):

733

"""See VersionedFile.add_lines."""

734

self._check_add(version_id, lines)

735

self._check_versions_present(parents)

736

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

737

738

def _check_add(self, version_id, lines):

739

"""check that version_id and lines are safe to add."""

740

assert self.writable, "knit is not opened for write"

741

### FIXME escape. RBC 20060228

742

if contains_whitespace(version_id):

743

raise InvalidRevisionId(version_id, self.filename)

744

self.check_not_reserved_id(version_id)

745

if self.has_version(version_id):

746

raise RevisionAlreadyPresent(version_id, self.filename)

747

self._check_lines_not_unicode(lines)

748

self._check_lines_are_lines(lines)

749

750

def _add(self, version_id, lines, parents, delta, parent_texts):

751

"""Add a set of lines on top of version specified by parents.

752

753

If delta is true, compress the text as a line-delta against

754

the first parent.

755

756

Any versions not present will be converted into ghosts.

757

"""

758

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

759

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

760

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

761

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

762

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

763

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

764

# +1383 0 8.0370 8.0370 +<len>

765

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

766

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

767

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

768

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

769

770

present_parents = []

771

ghosts = []

772

if parent_texts is None:

773

parent_texts = {}

774

for parent in parents:

775

if not self.has_version(parent):

776

ghosts.append(parent)

777

else:

778

present_parents.append(parent)

779

780

if delta and not len(present_parents):

781

delta = False

782

783

digest = sha_strings(lines)

784

options = []

785

if lines:

786

if lines[-1][-1] != '\n':

787

options.append('no-eol')

788

lines[-1] = lines[-1] + '\n'

789

790

if len(present_parents) and delta:

791

# To speed the extract of texts the delta chain is limited

792

# to a fixed number of deltas. This should minimize both

793

# I/O and the time spend applying deltas.

794

delta = self._check_should_delta(present_parents)

795

796

assert isinstance(version_id, str)

797

lines = self.factory.make(lines, version_id)

798

if delta or (self.factory.annotated and len(present_parents) > 0):

799

# Merge annotations from parent texts if so is needed.

800

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

801

delta, self.factory.annotated)

802

803

if delta:

804

options.append('line-delta')

805

store_lines = self.factory.lower_line_delta(delta_hunks)

806

else:

807

options.append('fulltext')

808

store_lines = self.factory.lower_fulltext(lines)

809

810

access_memo = self._data.add_record(version_id, digest, store_lines)

811

self._index.add_version(version_id, options, access_memo, parents)

812

return lines

813

814

def check(self, progress_bar=None):

815

"""See VersionedFile.check()."""

816

817

def _clone_text(self, new_version_id, old_version_id, parents):

818

"""See VersionedFile.clone_text()."""

819

# FIXME RBC 20060228 make fast by only inserting an index with null

820

# delta.

821

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

822

823

def get_lines(self, version_id):

824

"""See VersionedFile.get_lines()."""

825

return self.get_line_list([version_id])[0]

826

827

def _get_record_map(self, version_ids):

828

"""Produce a dictionary of knit records.

829

830

The keys are version_ids, the values are tuples of (method, content,

831

digest, next).

832

method is the way the content should be applied.

833

content is a KnitContent object.

834

digest is the SHA1 digest of this version id after all steps are done

835

next is the build-parent of the version, i.e. the leftmost ancestor.

836

If the method is fulltext, next will be None.

837

"""

838

position_map = self._get_components_positions(version_ids)

839

# c = component_id, m = method, i_m = index_memo, n = next

840

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

841

record_map = {}

842

for component_id, content, digest in \

843

self._data.read_records_iter(records):

844

method, index_memo, next = position_map[component_id]

845

record_map[component_id] = method, content, digest, next

846

847

return record_map

848

849

def get_text(self, version_id):

850

"""See VersionedFile.get_text"""

851

return self.get_texts([version_id])[0]

852

853

def get_texts(self, version_ids):

854

return [''.join(l) for l in self.get_line_list(version_ids)]

855

856

def get_line_list(self, version_ids):

857

"""Return the texts of listed versions as a list of strings."""

858

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

859

for version_id in version_ids:

860

self.check_not_reserved_id(version_id)

861

text_map, content_map = self._get_content_maps(version_ids)

862

return [text_map[v] for v in version_ids]

863

864

_get_lf_split_line_list = get_line_list

865

866

def _get_content_maps(self, version_ids):

867

"""Produce maps of text and KnitContents

868

869

:return: (text_map, content_map) where text_map contains the texts for

870

the requested versions and content_map contains the KnitContents.

871

Both dicts take version_ids as their keys.

872

"""

873

for version_id in version_ids:

874

if not self.has_version(version_id):

875

raise RevisionNotPresent(version_id, self.filename)

876

record_map = self._get_record_map(version_ids)

877

878

text_map = {}

879

content_map = {}

880

final_content = {}

881

for version_id in version_ids:

882

components = []

883

cursor = version_id

884

while cursor is not None:

885

method, data, digest, next = record_map[cursor]

886

components.append((cursor, method, data, digest))

887

if cursor in content_map:

888

break

889

cursor = next

890

891

content = None

892

for component_id, method, data, digest in reversed(components):

893

if component_id in content_map:

894

content = content_map[component_id]

895

else:

896

if method == 'fulltext':

897

assert content is None

898

content = self.factory.parse_fulltext(data, version_id)

899

elif method == 'line-delta':

900

delta = self.factory.parse_line_delta(data, version_id)

901

content = content.copy()

902

content._lines = self._apply_delta(content._lines,

903

delta)

904

content_map[component_id] = content

905

906

if 'no-eol' in self._index.get_options(version_id):

907

content = content.copy()

908

line = content._lines[-1][1].rstrip('\n')

909

content._lines[-1] = (content._lines[-1][0], line)

910

final_content[version_id] = content

911

912

# digest here is the digest from the last applied component.

913

text = content.text()

914

if sha_strings(text) != digest:

915

raise KnitCorrupt(self.filename,

916

'sha-1 does not match %s' % version_id)

917

918

text_map[version_id] = text

919

return text_map, final_content

920

921

def iter_lines_added_or_present_in_versions(self, version_ids=None,

922

pb=None):

923

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

924

if version_ids is None:

925

version_ids = self.versions()

926

else:

927

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

928

if pb is None:

929

pb = progress.DummyProgress()

930

# we don't care about inclusions, the caller cares.

931

# but we need to setup a list of records to visit.

932

# we need version_id, position, length

933

version_id_records = []

934

requested_versions = set(version_ids)

935

# filter for available versions

936

for version_id in requested_versions:

937

if not self.has_version(version_id):

938

raise RevisionNotPresent(version_id, self.filename)

939

# get a in-component-order queue:

940

for version_id in self.versions():

941

if version_id in requested_versions:

942

index_memo = self._index.get_position(version_id)

943

version_id_records.append((version_id, index_memo))

944

945

total = len(version_id_records)

946

for version_idx, (version_id, data, sha_value) in \

947

enumerate(self._data.read_records_iter(version_id_records)):

948

pb.update('Walking content.', version_idx, total)

949

method = self._index.get_method(version_id)

950

951

assert method in ('fulltext', 'line-delta')

952

if method == 'fulltext':

953

line_iterator = self.factory.get_fulltext_content(data)

954

else:

955

line_iterator = self.factory.get_linedelta_content(data)

956

for line in line_iterator:

957

yield line

958

959

pb.update('Walking content.', total, total)

960

961

def iter_parents(self, version_ids):

962

"""Iterate through the parents for many version ids.

963

964

:param version_ids: An iterable yielding version_ids.

965

:return: An iterator that yields (version_id, parents). Requested

966

version_ids not present in the versioned file are simply skipped.

967

The order is undefined, allowing for different optimisations in

968

the underlying implementation.

969

"""

970

version_ids = [osutils.safe_revision_id(version_id) for

971

version_id in version_ids]

972

return self._index.iter_parents(version_ids)

973

974

def num_versions(self):

975

"""See VersionedFile.num_versions()."""

976

return self._index.num_versions()

977

978

__len__ = num_versions

979

980

def annotate_iter(self, version_id):

981

"""See VersionedFile.annotate_iter."""

982

version_id = osutils.safe_revision_id(version_id)

983

content = self._get_content(version_id)

984

for origin, text in content.annotate_iter():

985

yield origin, text

986

987

def get_parents(self, version_id):

988

"""See VersionedFile.get_parents."""

989

# perf notes:

990

# optimism counts!

991

# 52554 calls in 1264 872 internal down from 3674

992

version_id = osutils.safe_revision_id(version_id)

993

try:

994

return self._index.get_parents(version_id)

995

except KeyError:

996

raise RevisionNotPresent(version_id, self.filename)

997

998

def get_parents_with_ghosts(self, version_id):

999

"""See VersionedFile.get_parents."""

1000

version_id = osutils.safe_revision_id(version_id)

1001

try:

1002

return self._index.get_parents_with_ghosts(version_id)

1003

except KeyError:

1004

raise RevisionNotPresent(version_id, self.filename)

1005

1006

def get_ancestry(self, versions, topo_sorted=True):

1007

"""See VersionedFile.get_ancestry."""

1008

if isinstance(versions, basestring):

1009

versions = [versions]

1010

if not versions:

1011

return []

1012

versions = [osutils.safe_revision_id(v) for v in versions]

1013

return self._index.get_ancestry(versions, topo_sorted)

1014

1015

def get_ancestry_with_ghosts(self, versions):

1016

"""See VersionedFile.get_ancestry_with_ghosts."""

1017

if isinstance(versions, basestring):

1018

versions = [versions]

1019

if not versions:

1020

return []

1021

versions = [osutils.safe_revision_id(v) for v in versions]

1022

return self._index.get_ancestry_with_ghosts(versions)

1023

1024

def plan_merge(self, ver_a, ver_b):

1025

"""See VersionedFile.plan_merge."""

1026

ver_a = osutils.safe_revision_id(ver_a)

1027

ver_b = osutils.safe_revision_id(ver_b)

1028

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1029

1030

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1031

annotated_a = self.annotate(ver_a)

1032

annotated_b = self.annotate(ver_b)

1033

return merge._plan_annotate_merge(annotated_a, annotated_b,

1034

ancestors_a, ancestors_b)

1035

1036

1037

class _KnitComponentFile(object):

1038

"""One of the files used to implement a knit database"""

1039

1040

def __init__(self, transport, filename, mode, file_mode=None,

1041

create_parent_dir=False, dir_mode=None):

1042

self._transport = transport

1043

self._filename = filename

1044

self._mode = mode

1045

self._file_mode = file_mode

1046

self._dir_mode = dir_mode

1047

self._create_parent_dir = create_parent_dir

1048

self._need_to_create = False

1049

1050

def _full_path(self):

1051

"""Return the full path to this file."""

1052

return self._transport.base + self._filename

1053

1054

def check_header(self, fp):

1055

line = fp.readline()

1056

if line == '':

1057

# An empty file can actually be treated as though the file doesn't

1058

# exist yet.

1059

raise errors.NoSuchFile(self._full_path())

1060

if line != self.HEADER:

1061

raise KnitHeaderError(badline=line,

1062

filename=self._transport.abspath(self._filename))

1063

1064

def __repr__(self):

1065

return '%s(%s)' % (self.__class__.__name__, self._filename)

1066

1067

1068

class _KnitIndex(_KnitComponentFile):

1069

"""Manages knit index file.

1070

1071

The index is already kept in memory and read on startup, to enable

1072

fast lookups of revision information. The cursor of the index

1073

file is always pointing to the end, making it easy to append

1074

entries.

1075

1076

_cache is a cache for fast mapping from version id to a Index

1077

object.

1078

1079

_history is a cache for fast mapping from indexes to version ids.

1080

1081

The index data format is dictionary compressed when it comes to

1082

parent references; a index entry may only have parents that with a

1083

lover index number. As a result, the index is topological sorted.

1084

1085

Duplicate entries may be written to the index for a single version id

1086

if this is done then the latter one completely replaces the former:

1087

this allows updates to correct version and parent information.

1088

Note that the two entries may share the delta, and that successive

1089

annotations and references MUST point to the first entry.

1090

1091

The index file on disc contains a header, followed by one line per knit

1092

record. The same revision can be present in an index file more than once.

1093

The first occurrence gets assigned a sequence number starting from 0.

1094

1095

The format of a single line is

1096

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1097

REVISION_ID is a utf8-encoded revision id

1098

FLAGS is a comma separated list of flags about the record. Values include

1099

no-eol, line-delta, fulltext.

1100

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1101

that the the compressed data starts at.

1102

LENGTH is the ascii representation of the length of the data file.

1103

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1104

REVISION_ID.

1105

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1106

revision id already in the knit that is a parent of REVISION_ID.

1107

The ' :' marker is the end of record marker.

1108

1109

partial writes:

1110

when a write is interrupted to the index file, it will result in a line

1111

that does not end in ' :'. If the ' :' is not present at the end of a line,

1112

or at the end of the file, then the record that is missing it will be

1113

ignored by the parser.

1114

1115

When writing new records to the index file, the data is preceded by '\n'

1116

to ensure that records always start on new lines even if the last write was

1117

interrupted. As a result its normal for the last line in the index to be

1118

missing a trailing newline. One can be added with no harmful effects.

1119

"""

1120

1121

HEADER = "# bzr knit index 8\n"

1122

1123

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1124

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1125

1126

def _cache_version(self, version_id, options, pos, size, parents):

1127

"""Cache a version record in the history array and index cache.

1128

1129

This is inlined into _load_data for performance. KEEP IN SYNC.

1130

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1131

indexes).

1132

"""

1133

# only want the _history index to reference the 1st index entry

1134

# for version_id

1135

if version_id not in self._cache:

1136

index = len(self._history)

1137

self._history.append(version_id)

1138

else:

1139

index = self._cache[version_id][5]

1140

self._cache[version_id] = (version_id,

1141

options,

1142

pos,

1143

size,

1144

parents,

1145

index)

1146

1147

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1148

create_parent_dir=False, delay_create=False, dir_mode=None):

1149

_KnitComponentFile.__init__(self, transport, filename, mode,

1150

file_mode=file_mode,

1151

create_parent_dir=create_parent_dir,

1152

dir_mode=dir_mode)

1153

self._cache = {}

1154

# position in _history is the 'official' index for a revision

1155

# but the values may have come from a newer entry.

1156

# so - wc -l of a knit index is != the number of unique names

1157

# in the knit.

1158

self._history = []

1159

try:

1160

fp = self._transport.get(self._filename)

1161

try:

1162

# _load_data may raise NoSuchFile if the target knit is

1163

# completely empty.

1164

_load_data(self, fp)

1165

finally:

1166

fp.close()

1167

except NoSuchFile:

1168

if mode != 'w' or not create:

1169

raise

1170

elif delay_create:

1171

self._need_to_create = True

1172

else:

1173

self._transport.put_bytes_non_atomic(

1174

self._filename, self.HEADER, mode=self._file_mode)

1175

1176

def get_graph(self):

1177

"""Return a list of the node:parents lists from this knit index."""

1178

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1179

1180

def get_ancestry(self, versions, topo_sorted=True):

1181

"""See VersionedFile.get_ancestry."""

1182

# get a graph of all the mentioned versions:

1183

graph = {}

1184

pending = set(versions)

1185

cache = self._cache

1186

while pending:

1187

version = pending.pop()

1188

# trim ghosts

1189

try:

1190

parents = [p for p in cache[version][4] if p in cache]

1191

except KeyError:

1192

raise RevisionNotPresent(version, self._filename)

1193

# if not completed and not a ghost

1194

pending.update([p for p in parents if p not in graph])

1195

graph[version] = parents

1196

if not topo_sorted:

1197

return graph.keys()

1198

return topo_sort(graph.items())

1199

1200

def get_ancestry_with_ghosts(self, versions):

1201

"""See VersionedFile.get_ancestry_with_ghosts."""

1202

# get a graph of all the mentioned versions:

1203

self.check_versions_present(versions)

1204

cache = self._cache

1205

graph = {}

1206

pending = set(versions)

1207

while pending:

1208

version = pending.pop()

1209

try:

1210

parents = cache[version][4]

1211

except KeyError:

1212

# ghost, fake it

1213

graph[version] = []

1214

else:

1215

# if not completed

1216

pending.update([p for p in parents if p not in graph])

1217

graph[version] = parents

1218

return topo_sort(graph.items())

1219

1220

def iter_parents(self, version_ids):

1221

"""Iterate through the parents for many version ids.

1222

1223

:param version_ids: An iterable yielding version_ids.

1224

:return: An iterator that yields (version_id, parents). Requested

1225

version_ids not present in the versioned file are simply skipped.

1226

The order is undefined, allowing for different optimisations in

1227

the underlying implementation.

1228

"""

1229

for version_id in version_ids:

1230

try:

1231

yield version_id, tuple(self.get_parents(version_id))

1232

except KeyError:

1233

pass

1234

1235

def num_versions(self):

1236

return len(self._history)

1237

1238

__len__ = num_versions

1239

1240

def get_versions(self):

1241

"""Get all the versions in the file. not topologically sorted."""

1242

return self._history

1243

1244

def _version_list_to_index(self, versions):

1245

result_list = []

1246

cache = self._cache

1247

for version in versions:

1248

if version in cache:

1249

# -- inlined lookup() --

1250

result_list.append(str(cache[version][5]))

1251

# -- end lookup () --

1252

else:

1253

result_list.append('.' + version)

1254

return ' '.join(result_list)

1255

1256

def add_version(self, version_id, options, index_memo, parents):

1257

"""Add a version record to the index."""

1258

self.add_versions(((version_id, options, index_memo, parents),))

1259

1260

def add_versions(self, versions):

1261

"""Add multiple versions to the index.

1262

1263

:param versions: a list of tuples:

1264

(version_id, options, pos, size, parents).

1265

"""

1266

lines = []

1267

orig_history = self._history[:]

1268

orig_cache = self._cache.copy()

1269

1270

try:

1271

for version_id, options, (index, pos, size), parents in versions:

1272

line = "\n%s %s %s %s %s :" % (version_id,

1273

','.join(options),

1274

pos,

1275

size,

1276

self._version_list_to_index(parents))

1277

assert isinstance(line, str), \

1278

'content must be utf-8 encoded: %r' % (line,)

1279

lines.append(line)

1280

self._cache_version(version_id, options, pos, size, parents)

1281

if not self._need_to_create:

1282

self._transport.append_bytes(self._filename, ''.join(lines))

1283

else:

1284

sio = StringIO()

1285

sio.write(self.HEADER)

1286

sio.writelines(lines)

1287

sio.seek(0)

1288

self._transport.put_file_non_atomic(self._filename, sio,

1289

create_parent_dir=self._create_parent_dir,

1290

mode=self._file_mode,

1291

dir_mode=self._dir_mode)

1292

self._need_to_create = False

1293

except:

1294

# If any problems happen, restore the original values and re-raise

1295

self._history = orig_history

1296

self._cache = orig_cache

1297

raise

1298

1299

def has_version(self, version_id):

1300

"""True if the version is in the index."""

1301

return version_id in self._cache

1302

1303

def get_position(self, version_id):

1304

"""Return details needed to access the version.

1305

1306

.kndx indices do not support split-out data, so return None for the

1307

index field.

1308

1309

:return: a tuple (None, data position, size) to hand to the access

1310

logic to get the record.

1311

"""

1312

entry = self._cache[version_id]

1313

return None, entry[2], entry[3]

1314

1315

def get_method(self, version_id):

1316

"""Return compression method of specified version."""

1317

options = self._cache[version_id][1]

1318

if 'fulltext' in options:

1319

return 'fulltext'

1320

else:

1321

if 'line-delta' not in options:

1322

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1323

return 'line-delta'

1324

1325

def get_options(self, version_id):

1326

"""Return a string represention options.

1327

1328

e.g. foo,bar

1329

"""

1330

return self._cache[version_id][1]

1331

1332

def get_parents(self, version_id):

1333

"""Return parents of specified version ignoring ghosts."""

1334

return [parent for parent in self._cache[version_id][4]

1335

if parent in self._cache]

1336

1337

def get_parents_with_ghosts(self, version_id):

1338

"""Return parents of specified version with ghosts."""

1339

return self._cache[version_id][4]

1340

1341

def check_versions_present(self, version_ids):

1342

"""Check that all specified versions are present."""

1343

cache = self._cache

1344

for version_id in version_ids:

1345

if version_id not in cache:

1346

raise RevisionNotPresent(version_id, self._filename)

1347

1348

1349

class KnitGraphIndex(object):

1350

"""A knit index that builds on GraphIndex."""

1351

1352

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1353

"""Construct a KnitGraphIndex on a graph_index.

1354

1355

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1356

:param deltas: Allow delta-compressed records.

1357

:param add_callback: If not None, allow additions to the index and call

1358

this callback with a list of added GraphIndex nodes:

1359

[(node, value, node_refs), ...]

1360

:param parents: If True, record knits parents, if not do not record

1361

parents.

1362

"""

1363

self._graph_index = graph_index

1364

self._deltas = deltas

1365

self._add_callback = add_callback

1366

self._parents = parents

1367

if deltas and not parents:

1368

raise KnitCorrupt(self, "Cannot do delta compression without "

1369

"parent tracking.")

1370

1371

def _get_entries(self, keys, check_present=False):

1372

"""Get the entries for keys.

1373

1374

:param keys: An iterable of index keys, - 1-tuples.

1375

"""

1376

keys = set(keys)

1377

found_keys = set()

1378

if self._parents:

1379

for node in self._graph_index.iter_entries(keys):

1380

yield node

1381

found_keys.add(node[1])

1382

else:

1383

# adapt parentless index to the rest of the code.

1384

for node in self._graph_index.iter_entries(keys):

1385

yield node[0], node[1], node[2], ()

1386

found_keys.add(node[1])

1387

if check_present:

1388

missing_keys = keys.difference(found_keys)

1389

if missing_keys:

1390

raise RevisionNotPresent(missing_keys.pop(), self)

1391

1392

def _present_keys(self, version_ids):

1393

return set([

1394

node[1] for node in self._get_entries(version_ids)])

1395

1396

def _parentless_ancestry(self, versions):

1397

"""Honour the get_ancestry API for parentless knit indices."""

1398

wanted_keys = self._version_ids_to_keys(versions)

1399

present_keys = self._present_keys(wanted_keys)

1400

missing = set(wanted_keys).difference(present_keys)

1401

if missing:

1402

raise RevisionNotPresent(missing.pop(), self)

1403

return list(self._keys_to_version_ids(present_keys))

1404

1405

def get_ancestry(self, versions, topo_sorted=True):

1406

"""See VersionedFile.get_ancestry."""

1407

if not self._parents:

1408

return self._parentless_ancestry(versions)

1409

# XXX: This will do len(history) index calls - perhaps

1410

# it should be altered to be a index core feature?

1411

# get a graph of all the mentioned versions:

1412

graph = {}

1413

ghosts = set()

1414

versions = self._version_ids_to_keys(versions)

1415

pending = set(versions)

1416

while pending:

1417

# get all pending nodes

1418

this_iteration = pending

1419

new_nodes = self._get_entries(this_iteration)

1420

found = set()

1421

pending = set()

1422

for (index, key, value, node_refs) in new_nodes:

1423

# dont ask for ghosties - otherwise

1424

# we we can end up looping with pending

1425

# being entirely ghosted.

1426

graph[key] = [parent for parent in node_refs[0]

1427

if parent not in ghosts]

1428

# queue parents

1429

for parent in graph[key]:

1430

# dont examine known nodes again

1431

if parent in graph:

1432

continue

1433

pending.add(parent)

1434

found.add(key)

1435

ghosts.update(this_iteration.difference(found))

1436

if versions.difference(graph):

1437

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1438

if topo_sorted:

1439

result_keys = topo_sort(graph.items())

1440

else:

1441

result_keys = graph.iterkeys()

1442

return [key[0] for key in result_keys]

1443

1444

def get_ancestry_with_ghosts(self, versions):

1445

"""See VersionedFile.get_ancestry."""

1446

if not self._parents:

1447

return self._parentless_ancestry(versions)

1448

# XXX: This will do len(history) index calls - perhaps

1449

# it should be altered to be a index core feature?

1450

# get a graph of all the mentioned versions:

1451

graph = {}

1452

versions = self._version_ids_to_keys(versions)

1453

pending = set(versions)

1454

while pending:

1455

# get all pending nodes

1456

this_iteration = pending

1457

new_nodes = self._get_entries(this_iteration)

1458

pending = set()

1459

for (index, key, value, node_refs) in new_nodes:

1460

graph[key] = node_refs[0]

1461

# queue parents

1462

for parent in graph[key]:

1463

# dont examine known nodes again

1464

if parent in graph:

1465

continue

1466

pending.add(parent)

1467

missing_versions = this_iteration.difference(graph)

1468

missing_needed = versions.intersection(missing_versions)

1469

if missing_needed:

1470

raise RevisionNotPresent(missing_needed.pop(), self)

1471

for missing_version in missing_versions:

1472

# add a key, no parents

1473

graph[missing_version] = []

1474

pending.discard(missing_version) # don't look for it

1475

result_keys = topo_sort(graph.items())

1476

return [key[0] for key in result_keys]

1477

1478

def get_graph(self):

1479

"""Return a list of the node:parents lists from this knit index."""

1480

if not self._parents:

1481

return [(key, ()) for key in self.get_versions()]

1482

result = []

1483

for index, key, value, refs in self._graph_index.iter_all_entries():

1484

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1485

return result

1486

1487

def iter_parents(self, version_ids):

1488

"""Iterate through the parents for many version ids.

1489

1490

:param version_ids: An iterable yielding version_ids.

1491

:return: An iterator that yields (version_id, parents). Requested

1492

version_ids not present in the versioned file are simply skipped.

1493

The order is undefined, allowing for different optimisations in

1494

the underlying implementation.

1495

"""

1496

if self._parents:

1497

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1498

all_parents = set()

1499

present_parents = set()

1500

for node in all_nodes:

1501

all_parents.update(node[3][0])

1502

# any node we are querying must be present

1503

present_parents.add(node[1])

1504

unknown_parents = all_parents.difference(present_parents)

1505

present_parents.update(self._present_keys(unknown_parents))

1506

for node in all_nodes:

1507

parents = []

1508

for parent in node[3][0]:

1509

if parent in present_parents:

1510

parents.append(parent[0])

1511

yield node[1][0], tuple(parents)

1512

else:

1513

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1514

yield node[1][0], ()

1515

1516

def num_versions(self):

1517

return len(list(self._graph_index.iter_all_entries()))

1518

1519

__len__ = num_versions

1520

1521

def get_versions(self):

1522

"""Get all the versions in the file. not topologically sorted."""

1523

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1524

1525

def has_version(self, version_id):

1526

"""True if the version is in the index."""

1527

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1528

1529

def _keys_to_version_ids(self, keys):

1530

return tuple(key[0] for key in keys)

1531

1532

def get_position(self, version_id):

1533

"""Return details needed to access the version.

1534

1535

:return: a tuple (index, data position, size) to hand to the access

1536

logic to get the record.

1537

"""

1538

node = self._get_node(version_id)

1539

bits = node[2][1:].split(' ')

1540

return node[0], int(bits[0]), int(bits[1])

1541

1542

def get_method(self, version_id):

1543

"""Return compression method of specified version."""

1544

if not self._deltas:

1545

return 'fulltext'

1546

return self._parent_compression(self._get_node(version_id)[3][1])

1547

1548

def _parent_compression(self, reference_list):

1549

# use the second reference list to decide if this is delta'd or not.

1550

if len(reference_list):

1551

return 'line-delta'

1552

else:

1553

return 'fulltext'

1554

1555

def _get_node(self, version_id):

1556

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1557

1558

def get_options(self, version_id):

1559

"""Return a string represention options.

1560

1561

e.g. foo,bar

1562

"""

1563

node = self._get_node(version_id)

1564

if not self._deltas:

1565

options = ['fulltext']

1566

else:

1567

options = [self._parent_compression(node[3][1])]

1568

if node[2][0] == 'N':

1569

options.append('no-eol')

1570

return options

1571

1572

def get_parents(self, version_id):

1573

"""Return parents of specified version ignoring ghosts."""

1574

parents = list(self.iter_parents([version_id]))

1575

if not parents:

1576

# missing key

1577

raise errors.RevisionNotPresent(version_id, self)

1578

return parents[0][1]

1579

1580

def get_parents_with_ghosts(self, version_id):

1581

"""Return parents of specified version with ghosts."""

1582

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1583

check_present=True))

1584

if not self._parents:

1585

return ()

1586

return self._keys_to_version_ids(nodes[0][3][0])

1587

1588

def check_versions_present(self, version_ids):

1589

"""Check that all specified versions are present."""

1590

keys = self._version_ids_to_keys(version_ids)

1591

present = self._present_keys(keys)

1592

missing = keys.difference(present)

1593

if missing:

1594

raise RevisionNotPresent(missing.pop(), self)

1595

1596

def add_version(self, version_id, options, access_memo, parents):

1597

"""Add a version record to the index."""

1598

return self.add_versions(((version_id, options, access_memo, parents),))

1599

1600

def add_versions(self, versions):

1601

"""Add multiple versions to the index.

1602

1603

This function does not insert data into the Immutable GraphIndex

1604

backing the KnitGraphIndex, instead it prepares data for insertion by

1605

the caller and checks that it is safe to insert then calls

1606

self._add_callback with the prepared GraphIndex nodes.

1607

1608

:param versions: a list of tuples:

1609

(version_id, options, pos, size, parents).

1610

"""

1611

if not self._add_callback:

1612

raise errors.ReadOnlyError(self)

1613

# we hope there are no repositories with inconsistent parentage

1614

# anymore.

1615

# check for dups

1616

1617

keys = {}

1618

for (version_id, options, access_memo, parents) in versions:

1619

index, pos, size = access_memo

1620

key = (version_id, )

1621

parents = tuple((parent, ) for parent in parents)

1622

if 'no-eol' in options:

1623

value = 'N'

1624

else:

1625

value = ' '

1626

value += "%d %d" % (pos, size)

1627

if not self._deltas:

1628

if 'line-delta' in options:

1629

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1630

if self._parents:

1631

if self._deltas:

1632

if 'line-delta' in options:

1633

node_refs = (parents, (parents[0],))

1634

else:

1635

node_refs = (parents, ())

1636

else:

1637

node_refs = (parents, )

1638

else:

1639

if parents:

1640

raise KnitCorrupt(self, "attempt to add node with parents "

1641

"in parentless index.")

1642

node_refs = ()

1643

keys[key] = (value, node_refs)

1644

present_nodes = self._get_entries(keys)

1645

for (index, key, value, node_refs) in present_nodes:

1646

if (value, node_refs) != keys[key]:

1647

raise KnitCorrupt(self, "inconsistent details in add_versions"

1648

": %s %s" % ((value, node_refs), keys[key]))

1649

del keys[key]

1650

result = []

1651

if self._parents:

1652

for key, (value, node_refs) in keys.iteritems():

1653

result.append((key, value, node_refs))

1654

else:

1655

for key, (value, node_refs) in keys.iteritems():

1656

result.append((key, value))

1657

self._add_callback(result)

1658

1659

def _version_ids_to_keys(self, version_ids):

1660

return set((version_id, ) for version_id in version_ids)

1661

1662

1663

class _KnitAccess(object):

1664

"""Access to knit records in a .knit file."""

1665

1666

def __init__(self, transport, filename, _file_mode, _dir_mode,

1667

_need_to_create, _create_parent_dir):

1668

"""Create a _KnitAccess for accessing and inserting data.

1669

1670

:param transport: The transport the .knit is located on.

1671

:param filename: The filename of the .knit.

1672

"""

1673

self._transport = transport

1674

self._filename = filename

1675

self._file_mode = _file_mode

1676

self._dir_mode = _dir_mode

1677

self._need_to_create = _need_to_create

1678

self._create_parent_dir = _create_parent_dir

1679

1680

def add_raw_records(self, sizes, raw_data):

1681

"""Add raw knit bytes to a storage area.

1682

1683

The data is spooled to whereever the access method is storing data.

1684

1685

:param sizes: An iterable containing the size of each raw data segment.

1686

:param raw_data: A bytestring containing the data.

1687

:return: A list of memos to retrieve the record later. Each memo is a

1688

tuple - (index, pos, length), where the index field is always None

1689

for the .knit access method.

1690

"""

1691

assert type(raw_data) == str, \

1692

'data must be plain bytes was %s' % type(raw_data)

1693

if not self._need_to_create:

1694

base = self._transport.append_bytes(self._filename, raw_data)

1695

else:

1696

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1697

create_parent_dir=self._create_parent_dir,

1698

mode=self._file_mode,

1699

dir_mode=self._dir_mode)

1700

self._need_to_create = False

1701

base = 0

1702

result = []

1703

for size in sizes:

1704

result.append((None, base, size))

1705

base += size

1706

return result

1707

1708

def create(self):

1709

"""IFF this data access has its own storage area, initialise it.

1710

1711

:return: None.

1712

"""

1713

self._transport.put_bytes_non_atomic(self._filename, '',

1714

mode=self._file_mode)

1715

1716

def open_file(self):

1717

"""IFF this data access can be represented as a single file, open it.

1718

1719

For knits that are not mapped to a single file on disk this will

1720

always return None.

1721

1722

:return: None or a file handle.

1723

"""

1724

try:

1725

return self._transport.get(self._filename)

1726

except NoSuchFile:

1727

pass

1728

return None

1729

1730

def get_raw_records(self, memos_for_retrieval):

1731

"""Get the raw bytes for a records.

1732

1733

:param memos_for_retrieval: An iterable containing the (index, pos,

1734

length) memo for retrieving the bytes. The .knit method ignores

1735

the index as there is always only a single file.

1736

:return: An iterator over the bytes of the records.

1737

"""

1738

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1739

for pos, data in self._transport.readv(self._filename, read_vector):

1740

yield data

1741

1742

1743

class _PackAccess(object):

1744

"""Access to knit records via a collection of packs."""

1745

1746

def __init__(self, index_to_packs, writer=None):

1747

"""Create a _PackAccess object.

1748

1749

:param index_to_packs: A dict mapping index objects to the transport

1750

and file names for obtaining data.

1751

:param writer: A tuple (pack.ContainerWriter, write_index) which

1752

contains the pack to write, and the index that reads from it will

1753

be associated with.

1754

"""

1755

if writer:

1756

self.container_writer = writer[0]

1757

self.write_index = writer[1]

1758

else:

1759

self.container_writer = None

1760

self.write_index = None

1761

self.indices = index_to_packs

1762

1763

def add_raw_records(self, sizes, raw_data):

1764

"""Add raw knit bytes to a storage area.

1765

1766

The data is spooled to the container writer in one bytes-record per

1767

raw data item.

1768

1769

:param sizes: An iterable containing the size of each raw data segment.

1770

:param raw_data: A bytestring containing the data.

1771

:return: A list of memos to retrieve the record later. Each memo is a

1772

tuple - (index, pos, length), where the index field is the

1773

write_index object supplied to the PackAccess object.

1774

"""

1775

assert type(raw_data) == str, \

1776

'data must be plain bytes was %s' % type(raw_data)

1777

result = []

1778

offset = 0

1779

for size in sizes:

1780

p_offset, p_length = self.container_writer.add_bytes_record(

1781

raw_data[offset:offset+size], [])

1782

offset += size

1783

result.append((self.write_index, p_offset, p_length))

1784

return result

1785

1786

def create(self):

1787

"""Pack based knits do not get individually created."""

1788

1789

def get_raw_records(self, memos_for_retrieval):

1790

"""Get the raw bytes for a records.

1791

1792

:param memos_for_retrieval: An iterable containing the (index, pos,

1793

length) memo for retrieving the bytes. The Pack access method

1794

looks up the pack to use for a given record in its index_to_pack

1795

map.

1796

:return: An iterator over the bytes of the records.

1797

"""

1798

# first pass, group into same-index requests

1799

request_lists = []

1800

current_index = None

1801

for (index, offset, length) in memos_for_retrieval:

1802

if current_index == index:

1803

current_list.append((offset, length))

1804

else:

1805

if current_index is not None:

1806

request_lists.append((current_index, current_list))

1807

current_index = index

1808

current_list = [(offset, length)]

1809

# handle the last entry

1810

if current_index is not None:

1811

request_lists.append((current_index, current_list))

1812

for index, offsets in request_lists:

1813

transport, path = self.indices[index]

1814

reader = pack.make_readv_reader(transport, path, offsets)

1815

for names, read_func in reader.iter_records():

1816

yield read_func(None)

1817

1818

def open_file(self):

1819

"""Pack based knits have no single file."""

1820

return None

1821

1822

def set_writer(self, writer, index, (transport, packname)):

1823

"""Set a writer to use for adding data."""

1824

self.indices[index] = (transport, packname)

1825

self.container_writer = writer

1826

self.write_index = index

1827

1828

1829

class _KnitData(object):

1830

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1831

1832

The KnitData class provides the logic for parsing and using knit records,

1833

making use of an access method for the low level read and write operations.

1834

"""

1835

1836

def __init__(self, access):

1837

"""Create a KnitData object.

1838

1839

:param access: The access method to use. Access methods such as

1840

_KnitAccess manage the insertion of raw records and the subsequent

1841

retrieval of the same.

1842

"""

1843

self._access = access

1844

self._checked = False

1845

# TODO: jam 20060713 conceptually, this could spill to disk

1846

# if the cached size gets larger than a certain amount

1847

# but it complicates the model a bit, so for now just use

1848

# a simple dictionary

1849

self._cache = {}

1850

self._do_cache = False

1851

1852

def enable_cache(self):

1853

"""Enable caching of reads."""

1854

self._do_cache = True

1855

1856

def clear_cache(self):

1857

"""Clear the record cache."""

1858

self._do_cache = False

1859

self._cache = {}

1860

1861

def _open_file(self):

1862

return self._access.open_file()

1863

1864

def _record_to_data(self, version_id, digest, lines):

1865

"""Convert version_id, digest, lines into a raw data block.

1866

1867

:return: (len, a StringIO instance with the raw data ready to read.)

1868

"""

1869

sio = StringIO()

1870

data_file = GzipFile(None, mode='wb', fileobj=sio)

1871

1872

assert isinstance(version_id, str)

1873

data_file.writelines(chain(

1874

["version %s %d %s\n" % (version_id,

1875

len(lines),

1876

digest)],

1877

lines,

1878

["end %s\n" % version_id]))

1879

data_file.close()

1880

length= sio.tell()

1881

1882

sio.seek(0)

1883

return length, sio

1884

1885

def add_raw_records(self, sizes, raw_data):

1886

"""Append a prepared record to the data file.

1887

1888

:param sizes: An iterable containing the size of each raw data segment.

1889

:param raw_data: A bytestring containing the data.

1890

:return: a list of index data for the way the data was stored.

1891

See the access method add_raw_records documentation for more

1892

details.

1893

"""

1894

return self._access.add_raw_records(sizes, raw_data)

1895

1896

def add_record(self, version_id, digest, lines):

1897

"""Write new text record to disk.

1898

1899

Returns index data for retrieving it later, as per add_raw_records.

1900

"""

1901

size, sio = self._record_to_data(version_id, digest, lines)

1902

result = self.add_raw_records([size], sio.getvalue())

1903

if self._do_cache:

1904

self._cache[version_id] = sio.getvalue()

1905

return result[0]

1906

1907

def _parse_record_header(self, version_id, raw_data):

1908

"""Parse a record header for consistency.

1909

1910

:return: the header and the decompressor stream.

1911

as (stream, header_record)

1912

"""

1913

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1914

try:

1915

rec = self._check_header(version_id, df.readline())

1916

except Exception, e:

1917

raise KnitCorrupt(self._access,

1918

"While reading {%s} got %s(%s)"

1919

% (version_id, e.__class__.__name__, str(e)))

1920

return df, rec

1921

1922

def _check_header(self, version_id, line):

1923

rec = line.split()

1924

if len(rec) != 4:

1925

raise KnitCorrupt(self._access,

1926

'unexpected number of elements in record header')

1927

if rec[1] != version_id:

1928

raise KnitCorrupt(self._access,

1929

'unexpected version, wanted %r, got %r'

1930

% (version_id, rec[1]))

1931

return rec

1932

1933

def _parse_record(self, version_id, data):

1934

# profiling notes:

1935

# 4168 calls in 2880 217 internal

1936

# 4168 calls to _parse_record_header in 2121

1937

# 4168 calls to readlines in 330

1938

df = GzipFile(mode='rb', fileobj=StringIO(data))

1939

1940

try:

1941

record_contents = df.readlines()

1942

except Exception, e:

1943

raise KnitCorrupt(self._access,

1944

"While reading {%s} got %s(%s)"

1945

% (version_id, e.__class__.__name__, str(e)))

1946

header = record_contents.pop(0)

1947

rec = self._check_header(version_id, header)

1948

1949

last_line = record_contents.pop()

1950

if len(record_contents) != int(rec[2]):

1951

raise KnitCorrupt(self._access,

1952

'incorrect number of lines %s != %s'

1953

' for version {%s}'

1954

% (len(record_contents), int(rec[2]),

1955

version_id))

1956

if last_line != 'end %s\n' % rec[1]:

1957

raise KnitCorrupt(self._access,

1958

'unexpected version end line %r, wanted %r'

1959

% (last_line, version_id))

1960

df.close()

1961

return record_contents, rec[3]

1962

1963

def read_records_iter_raw(self, records):

1964

"""Read text records from data file and yield raw data.

1965

1966

This unpacks enough of the text record to validate the id is

1967

as expected but thats all.

1968

"""

1969

# setup an iterator of the external records:

1970

# uses readv so nice and fast we hope.

1971

if len(records):

1972

# grab the disk data needed.

1973

if self._cache:

1974

# Don't check _cache if it is empty

1975

needed_offsets = [index_memo for version_id, index_memo

1976

in records

1977

if version_id not in self._cache]

1978

else:

1979

needed_offsets = [index_memo for version_id, index_memo

1980

in records]

1981

1982

raw_records = self._access.get_raw_records(needed_offsets)

1983

1984

for version_id, index_memo in records:

1985

if version_id in self._cache:

1986

# This data has already been validated

1987

data = self._cache[version_id]

1988

else:

1989

data = raw_records.next()

1990

if self._do_cache:

1991

self._cache[version_id] = data

1992

1993

# validate the header

1994

df, rec = self._parse_record_header(version_id, data)

1995

df.close()

1996

yield version_id, data

1997

1998

def read_records_iter(self, records):

1999

"""Read text records from data file and yield result.

2000

2001

The result will be returned in whatever is the fastest to read.

2002

Not by the order requested. Also, multiple requests for the same

2003

record will only yield 1 response.

2004

:param records: A list of (version_id, pos, len) entries

2005

:return: Yields (version_id, contents, digest) in the order

2006

read, not the order requested

2007

"""

2008

if not records:

2009

return

2010

2011

if self._cache:

2012

# Skip records we have alread seen

2013

yielded_records = set()

2014

needed_records = set()

2015

for record in records:

2016

if record[0] in self._cache:

2017

if record[0] in yielded_records:

2018

continue

2019

yielded_records.add(record[0])

2020

data = self._cache[record[0]]

2021

content, digest = self._parse_record(record[0], data)

2022

yield (record[0], content, digest)

2023

else:

2024

needed_records.add(record)

2025

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2026

else:

2027

needed_records = sorted(set(records), key=operator.itemgetter(1))

2028

2029

if not needed_records:

2030

return

2031

2032

# The transport optimizes the fetching as well

2033

# (ie, reads continuous ranges.)

2034

raw_data = self._access.get_raw_records(

2035

[index_memo for version_id, index_memo in needed_records])

2036

2037

for (version_id, index_memo), data in \

2038

izip(iter(needed_records), raw_data):

2039

content, digest = self._parse_record(version_id, data)

2040

if self._do_cache:

2041

self._cache[version_id] = data

2042

yield version_id, content, digest

2043

2044

def read_records(self, records):

2045

"""Read records into a dictionary."""

2046

components = {}

2047

for record_id, content, digest in \

2048

self.read_records_iter(records):

2049

components[record_id] = (content, digest)

2050

return components

2051

2052

2053

class InterKnit(InterVersionedFile):

2054

"""Optimised code paths for knit to knit operations."""

2055

2056

_matching_file_from_factory = KnitVersionedFile

2057

_matching_file_to_factory = KnitVersionedFile

2058

2059

@staticmethod

2060

def is_compatible(source, target):

2061

"""Be compatible with knits. """

2062

try:

2063

return (isinstance(source, KnitVersionedFile) and

2064

isinstance(target, KnitVersionedFile))

2065

except AttributeError:

2066

return False

2067

2068

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2069

"""See InterVersionedFile.join."""

2070

assert isinstance(self.source, KnitVersionedFile)

2071

assert isinstance(self.target, KnitVersionedFile)

2072

2073

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2074

2075

if not version_ids:

2076

return 0

2077

2078

pb = ui.ui_factory.nested_progress_bar()

2079

try:

2080

version_ids = list(version_ids)

2081

if None in version_ids:

2082

version_ids.remove(None)

2083

2084

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2085

this_versions = set(self.target._index.get_versions())

2086

needed_versions = self.source_ancestry - this_versions

2087

cross_check_versions = self.source_ancestry.intersection(this_versions)

2088

mismatched_versions = set()

2089

for version in cross_check_versions:

2090

# scan to include needed parents.

2091

n1 = set(self.target.get_parents_with_ghosts(version))

2092

n2 = set(self.source.get_parents_with_ghosts(version))

2093

if n1 != n2:

2094

# FIXME TEST this check for cycles being introduced works

2095

# the logic is we have a cycle if in our graph we are an

2096

# ancestor of any of the n2 revisions.

2097

for parent in n2:

2098

if parent in n1:

2099

# safe

2100

continue

2101

else:

2102

parent_ancestors = self.source.get_ancestry(parent)

2103

if version in parent_ancestors:

2104

raise errors.GraphCycleError([parent, version])

2105

# ensure this parent will be available later.

2106

new_parents = n2.difference(n1)

2107

needed_versions.update(new_parents.difference(this_versions))

2108

mismatched_versions.add(version)

2109

2110

if not needed_versions and not mismatched_versions:

2111

return 0

2112

full_list = topo_sort(self.source.get_graph())

2113

2114

version_list = [i for i in full_list if (not self.target.has_version(i)

2115

and i in needed_versions)]

2116

2117

# plan the join:

2118

copy_queue = []

2119

copy_queue_records = []

2120

copy_set = set()

2121

for version_id in version_list:

2122

options = self.source._index.get_options(version_id)

2123

parents = self.source._index.get_parents_with_ghosts(version_id)

2124

# check that its will be a consistent copy:

2125

for parent in parents:

2126

# if source has the parent, we must :

2127

# * already have it or

2128

# * have it scheduled already

2129

# otherwise we don't care

2130

assert (self.target.has_version(parent) or

2131

parent in copy_set or

2132

not self.source.has_version(parent))

2133

index_memo = self.source._index.get_position(version_id)

2134

copy_queue_records.append((version_id, index_memo))

2135

copy_queue.append((version_id, options, parents))

2136

copy_set.add(version_id)

2137

2138

# data suck the join:

2139

count = 0

2140

total = len(version_list)

2141

raw_datum = []

2142

raw_records = []

2143

for (version_id, raw_data), \

2144

(version_id2, options, parents) in \

2145

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2146

copy_queue):

2147

assert version_id == version_id2, 'logic error, inconsistent results'

2148

count = count + 1

2149

pb.update("Joining knit", count, total)

2150

raw_records.append((version_id, options, parents, len(raw_data)))

2151

raw_datum.append(raw_data)

2152

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2153

2154

for version in mismatched_versions:

2155

# FIXME RBC 20060309 is this needed?

2156

n1 = set(self.target.get_parents_with_ghosts(version))

2157

n2 = set(self.source.get_parents_with_ghosts(version))

2158

# write a combined record to our history preserving the current

2159

# parents as first in the list

2160

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2161

self.target.fix_parents(version, new_parents)

2162

return count

2163

finally:

2164

pb.finished()

2165

2166

2167

InterVersionedFile.register_optimiser(InterKnit)

2168

2169

2170

class WeaveToKnit(InterVersionedFile):

2171

"""Optimised code paths for weave to knit operations."""

2172

2173

_matching_file_from_factory = bzrlib.weave.WeaveFile

2174

_matching_file_to_factory = KnitVersionedFile

2175

2176

@staticmethod

2177

def is_compatible(source, target):

2178

"""Be compatible with weaves to knits."""

2179

try:

2180

return (isinstance(source, bzrlib.weave.Weave) and

2181

isinstance(target, KnitVersionedFile))

2182

except AttributeError:

2183

return False

2184

2185

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2186

"""See InterVersionedFile.join."""

2187

assert isinstance(self.source, bzrlib.weave.Weave)

2188

assert isinstance(self.target, KnitVersionedFile)

2189

2190

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2191

2192

if not version_ids:

2193

return 0

2194

2195

pb = ui.ui_factory.nested_progress_bar()

2196

try:

2197

version_ids = list(version_ids)

2198

2199

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2200

this_versions = set(self.target._index.get_versions())

2201

needed_versions = self.source_ancestry - this_versions

2202

cross_check_versions = self.source_ancestry.intersection(this_versions)

2203

mismatched_versions = set()

2204

for version in cross_check_versions:

2205

# scan to include needed parents.

2206

n1 = set(self.target.get_parents_with_ghosts(version))

2207

n2 = set(self.source.get_parents(version))

2208

# if all of n2's parents are in n1, then its fine.

2209

if n2.difference(n1):

2210

# FIXME TEST this check for cycles being introduced works

2211

# the logic is we have a cycle if in our graph we are an

2212

# ancestor of any of the n2 revisions.

2213

for parent in n2:

2214

if parent in n1:

2215

# safe

2216

continue

2217

else:

2218

parent_ancestors = self.source.get_ancestry(parent)

2219

if version in parent_ancestors:

2220

raise errors.GraphCycleError([parent, version])

2221

# ensure this parent will be available later.

2222

new_parents = n2.difference(n1)

2223

needed_versions.update(new_parents.difference(this_versions))

2224

mismatched_versions.add(version)

2225

2226

if not needed_versions and not mismatched_versions:

2227

return 0

2228

full_list = topo_sort(self.source.get_graph())

2229

2230

version_list = [i for i in full_list if (not self.target.has_version(i)

2231

and i in needed_versions)]

2232

2233

# do the join:

2234

count = 0

2235

total = len(version_list)

2236

for version_id in version_list:

2237

pb.update("Converting to knit", count, total)

2238

parents = self.source.get_parents(version_id)

2239

# check that its will be a consistent copy:

2240

for parent in parents:

2241

# if source has the parent, we must already have it

2242

assert (self.target.has_version(parent))

2243

self.target.add_lines(

2244

version_id, parents, self.source.get_lines(version_id))

2245

count = count + 1

2246

2247

for version in mismatched_versions:

2248

# FIXME RBC 20060309 is this needed?

2249

n1 = set(self.target.get_parents_with_ghosts(version))

2250

n2 = set(self.source.get_parents(version))

2251

# write a combined record to our history preserving the current

2252

# parents as first in the list

2253

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2254

self.target.fix_parents(version, new_parents)

2255

return count

2256

finally:

2257

pb.finished()

2258

2259

2260

InterVersionedFile.register_optimiser(WeaveToKnit)

2261

2262

2263

class KnitSequenceMatcher(difflib.SequenceMatcher):

2264

"""Knit tuned sequence matcher.

2265

2266

This is based on profiling of difflib which indicated some improvements

2267

for our usage pattern.

2268

"""

2269

2270

def find_longest_match(self, alo, ahi, blo, bhi):

2271

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2272

2273

If isjunk is not defined:

2274

2275

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2276

alo <= i <= i+k <= ahi

2277

blo <= j <= j+k <= bhi

2278

and for all (i',j',k') meeting those conditions,

2279

k >= k'

2280

i <= i'

2281

and if i == i', j <= j'

2282

2283

In other words, of all maximal matching blocks, return one that

2284

starts earliest in a, and of all those maximal matching blocks that

2285

start earliest in a, return the one that starts earliest in b.

2286

2287

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2288

>>> s.find_longest_match(0, 5, 0, 9)

2289

(0, 4, 5)

2290

2291

If isjunk is defined, first the longest matching block is

2292

determined as above, but with the additional restriction that no

2293

junk element appears in the block. Then that block is extended as

2294

far as possible by matching (only) junk elements on both sides. So

2295

the resulting block never matches on junk except as identical junk

2296

happens to be adjacent to an "interesting" match.

2297

2298

Here's the same example as before, but considering blanks to be

2299

junk. That prevents " abcd" from matching the " abcd" at the tail

2300

end of the second sequence directly. Instead only the "abcd" can

2301

match, and matches the leftmost "abcd" in the second sequence:

2302

2303

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2304

>>> s.find_longest_match(0, 5, 0, 9)

2305

(1, 0, 4)

2306

2307

If no blocks match, return (alo, blo, 0).

2308

2309

>>> s = SequenceMatcher(None, "ab", "c")

2310

>>> s.find_longest_match(0, 2, 0, 1)

2311

(0, 0, 0)

2312

"""

2313

2314

# CAUTION: stripping common prefix or suffix would be incorrect.

2315

# E.g.,

2316

# ab

2317

# acab

2318

# Longest matching block is "ab", but if common prefix is

2319

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2320

# strip, so ends up claiming that ab is changed to acab by

2321

# inserting "ca" in the middle. That's minimal but unintuitive:

2322

# "it's obvious" that someone inserted "ac" at the front.

2323

# Windiff ends up at the same place as diff, but by pairing up

2324

# the unique 'b's and then matching the first two 'a's.

2325

2326

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2327

besti, bestj, bestsize = alo, blo, 0

2328

# find longest junk-free match

2329

# during an iteration of the loop, j2len[j] = length of longest

2330

# junk-free match ending with a[i-1] and b[j]

2331

j2len = {}

2332

# nothing = []

2333

b2jget = b2j.get

2334

for i in xrange(alo, ahi):

2335

# look at all instances of a[i] in b; note that because

2336

# b2j has no junk keys, the loop is skipped if a[i] is junk

2337

j2lenget = j2len.get

2338

newj2len = {}

2339

2340

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2341

# following improvement

2342

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2343

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2344

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2345

# to

2346

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2347

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2348

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2349

2350

try:

2351

js = b2j[a[i]]

2352

except KeyError:

2353

pass

2354

else:

2355

for j in js:

2356

# a[i] matches b[j]

2357

if j >= blo:

2358

if j >= bhi:

2359

break

2360

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2361

if k > bestsize:

2362

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2363

j2len = newj2len

2364

2365

# Extend the best by non-junk elements on each end. In particular,

2366

# "popular" non-junk elements aren't in b2j, which greatly speeds

2367

# the inner loop above, but also means "the best" match so far

2368

# doesn't contain any junk *or* popular non-junk elements.

2369

while besti > alo and bestj > blo and \

2370

not isbjunk(b[bestj-1]) and \

2371

a[besti-1] == b[bestj-1]:

2372

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2373

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2374

not isbjunk(b[bestj+bestsize]) and \

2375

a[besti+bestsize] == b[bestj+bestsize]:

2376

bestsize += 1

2377

2378

# Now that we have a wholly interesting match (albeit possibly

2379

# empty!), we may as well suck up the matching junk on each

2380

# side of it too. Can't think of a good reason not to, and it

2381

# saves post-processing the (possibly considerable) expense of

2382

# figuring out what to do with it. In the case of an empty

2383

# interesting match, this is clearly the right thing to do,

2384

# because no other kind of match is possible in the regions.

2385

while besti > alo and bestj > blo and \

2386

isbjunk(b[bestj-1]) and \

2387

a[besti-1] == b[bestj-1]:

2388

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2389

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2390

isbjunk(b[bestj+bestsize]) and \

2391

a[besti+bestsize] == b[bestj+bestsize]:

2392

bestsize = bestsize + 1

2393

2394

return besti, bestj, bestsize

2395

2396

2397

try:

2398

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2399

except ImportError:

2400

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »