/brz/remove-bazaar : revision 2670.3.4

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Andrew Bennetts
Date: 2007-08-30 08:11:54 UTC
mfrom: (2766 +trunk)
mto: (2535.3.55 repo-refactor)
mto: This revision was merged to the branch mainline in revision 2772.
Revision ID: andrew.bennetts@canonical.com-20070830081154-16hebp2xwr15x2hc

Merge from bzr.dev.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

RevisionNotPresent,

100

RevisionAlreadyPresent,

101

)

102

from bzrlib.tuned_gzip import GzipFile

103

from bzrlib.osutils import (

104

contains_whitespace,

105

contains_linebreaks,

106

sha_strings,

107

)

108

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

109

from bzrlib.tsort import topo_sort

110

import bzrlib.ui

111

from bzrlib.util import bencode

112

import bzrlib.weave

113

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

114

115

116

# TODO: Split out code specific to this format into an associated object.

117

118

# TODO: Can we put in some kind of value to check that the index and data

119

# files belong together?

120

121

# TODO: accommodate binaries, perhaps by storing a byte count

122

123

# TODO: function to check whole file

124

125

# TODO: atomically append data, then measure backwards from the cursor

126

# position after writing to work out where it was located. we may need to

127

# bypass python file buffering.

128

129

DATA_SUFFIX = '.knit'

130

INDEX_SUFFIX = '.kndx'

131

132

133

class KnitContent(object):

134

"""Content of a knit version to which deltas can be applied."""

135

136

def __init__(self, lines):

137

self._lines = lines

138

139

def annotate_iter(self):

140

"""Yield tuples of (origin, text) for each content line."""

141

return iter(self._lines)

142

143

def annotate(self):

144

"""Return a list of (origin, text) tuples."""

145

return list(self.annotate_iter())

146

147

def line_delta_iter(self, new_lines):

148

"""Generate line-based delta from this content to new_lines."""

149

new_texts = new_lines.text()

150

old_texts = self.text()

151

s = KnitSequenceMatcher(None, old_texts, new_texts)

152

for tag, i1, i2, j1, j2 in s.get_opcodes():

153

if tag == 'equal':

154

continue

155

# ofrom, oto, length, data

156

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

157

158

def line_delta(self, new_lines):

159

return list(self.line_delta_iter(new_lines))

160

161

def text(self):

162

return [text for origin, text in self._lines]

163

164

def copy(self):

165

return KnitContent(self._lines[:])

166

167

@staticmethod

168

def get_line_delta_blocks(knit_delta, source, target):

169

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

170

target_len = len(target)

171

s_pos = 0

172

t_pos = 0

173

for s_begin, s_end, t_len, new_text in knit_delta:

174

true_n = s_begin - s_pos

175

n = true_n

176

if n > 0:

177

# knit deltas do not provide reliable info about whether the

178

# last line of a file matches, due to eol handling.

179

if source[s_pos + n -1] != target[t_pos + n -1]:

180

n-=1

181

if n > 0:

182

yield s_pos, t_pos, n

183

t_pos += t_len + true_n

184

s_pos = s_end

185

n = target_len - t_pos

186

if n > 0:

187

if source[s_pos + n -1] != target[t_pos + n -1]:

188

n-=1

189

if n > 0:

190

yield s_pos, t_pos, n

191

yield s_pos + (target_len - t_pos), target_len, 0

192

193

194

class _KnitFactory(object):

195

"""Base factory for creating content objects."""

196

197

def make(self, lines, version_id):

198

num_lines = len(lines)

199

return KnitContent(zip([version_id] * num_lines, lines))

200

201

202

class KnitAnnotateFactory(_KnitFactory):

203

"""Factory for creating annotated Content objects."""

204

205

annotated = True

206

207

def parse_fulltext(self, content, version_id):

208

"""Convert fulltext to internal representation

209

210

fulltext content is of the format

211

revid(utf8) plaintext\n

212

internal representation is of the format:

213

(revid, plaintext)

214

"""

215

# TODO: jam 20070209 The tests expect this to be returned as tuples,

216

# but the code itself doesn't really depend on that.

217

# Figure out a way to not require the overhead of turning the

218

# list back into tuples.

219

lines = [tuple(line.split(' ', 1)) for line in content]

220

return KnitContent(lines)

221

222

def parse_line_delta_iter(self, lines):

223

return iter(self.parse_line_delta(lines))

224

225

def parse_line_delta(self, lines, version_id):

226

"""Convert a line based delta into internal representation.

227

228

line delta is in the form of:

229

intstart intend intcount

230

1..count lines:

231

revid(utf8) newline\n

232

internal representation is

233

(start, end, count, [1..count tuples (revid, newline)])

234

"""

235

result = []

236

lines = iter(lines)

237

next = lines.next

238

239

cache = {}

240

def cache_and_return(line):

241

origin, text = line.split(' ', 1)

242

return cache.setdefault(origin, origin), text

243

244

# walk through the lines parsing.

245

for header in lines:

246

start, end, count = [int(n) for n in header.split(',')]

247

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

248

result.append((start, end, count, contents))

249

return result

250

251

def get_fulltext_content(self, lines):

252

"""Extract just the content lines from a fulltext."""

253

return (line.split(' ', 1)[1] for line in lines)

254

255

def get_linedelta_content(self, lines):

256

"""Extract just the content from a line delta.

257

258

This doesn't return all of the extra information stored in a delta.

259

Only the actual content lines.

260

"""

261

lines = iter(lines)

262

next = lines.next

263

for header in lines:

264

header = header.split(',')

265

count = int(header[2])

266

for i in xrange(count):

267

origin, text = next().split(' ', 1)

268

yield text

269

270

def lower_fulltext(self, content):

271

"""convert a fulltext content record into a serializable form.

272

273

see parse_fulltext which this inverts.

274

"""

275

# TODO: jam 20070209 We only do the caching thing to make sure that

276

# the origin is a valid utf-8 line, eventually we could remove it

277

return ['%s %s' % (o, t) for o, t in content._lines]

278

279

def lower_line_delta(self, delta):

280

"""convert a delta into a serializable form.

281

282

See parse_line_delta which this inverts.

283

"""

284

# TODO: jam 20070209 We only do the caching thing to make sure that

285

# the origin is a valid utf-8 line, eventually we could remove it

286

out = []

287

for start, end, c, lines in delta:

288

out.append('%d,%d,%d\n' % (start, end, c))

289

out.extend(origin + ' ' + text

290

for origin, text in lines)

291

return out

292

293

294

class KnitPlainFactory(_KnitFactory):

295

"""Factory for creating plain Content objects."""

296

297

annotated = False

298

299

def parse_fulltext(self, content, version_id):

300

"""This parses an unannotated fulltext.

301

302

Note that this is not a noop - the internal representation

303

has (versionid, line) - its just a constant versionid.

304

"""

305

return self.make(content, version_id)

306

307

def parse_line_delta_iter(self, lines, version_id):

308

cur = 0

309

num_lines = len(lines)

310

while cur < num_lines:

311

header = lines[cur]

312

cur += 1

313

start, end, c = [int(n) for n in header.split(',')]

314

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

315

cur += c

316

317

def parse_line_delta(self, lines, version_id):

318

return list(self.parse_line_delta_iter(lines, version_id))

319

320

def get_fulltext_content(self, lines):

321

"""Extract just the content lines from a fulltext."""

322

return iter(lines)

323

324

def get_linedelta_content(self, lines):

325

"""Extract just the content from a line delta.

326

327

This doesn't return all of the extra information stored in a delta.

328

Only the actual content lines.

329

"""

330

lines = iter(lines)

331

next = lines.next

332

for header in lines:

333

header = header.split(',')

334

count = int(header[2])

335

for i in xrange(count):

336

yield next()

337

338

def lower_fulltext(self, content):

339

return content.text()

340

341

def lower_line_delta(self, delta):

342

out = []

343

for start, end, c, lines in delta:

344

out.append('%d,%d,%d\n' % (start, end, c))

345

out.extend([text for origin, text in lines])

346

return out

347

348

349

def make_empty_knit(transport, relpath):

350

"""Construct a empty knit at the specified location."""

351

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

352

353

354

class KnitVersionedFile(VersionedFile):

355

"""Weave-like structure with faster random access.

356

357

A knit stores a number of texts and a summary of the relationships

358

between them. Texts are identified by a string version-id. Texts

359

are normally stored and retrieved as a series of lines, but can

360

also be passed as single strings.

361

362

Lines are stored with the trailing newline (if any) included, to

363

avoid special cases for files with no final newline. Lines are

364

composed of 8-bit characters, not unicode. The combination of

365

these approaches should mean any 'binary' file can be safely

366

stored and retrieved.

367

"""

368

369

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

370

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

371

create=False, create_parent_dir=False, delay_create=False,

372

dir_mode=None, index=None, access_method=None):

373

"""Construct a knit at location specified by relpath.

374

375

:param create: If not True, only open an existing knit.

376

:param create_parent_dir: If True, create the parent directory if

377

creating the file fails. (This is used for stores with

378

hash-prefixes that may not exist yet)

379

:param delay_create: The calling code is aware that the knit won't

380

actually be created until the first data is stored.

381

:param index: An index to use for the knit.

382

"""

383

if deprecated_passed(basis_knit):

384

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

385

" deprecated as of bzr 0.9.",

386

DeprecationWarning, stacklevel=2)

387

if access_mode is None:

388

access_mode = 'w'

389

super(KnitVersionedFile, self).__init__(access_mode)

390

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

391

self.transport = transport

392

self.filename = relpath

393

self.factory = factory or KnitAnnotateFactory()

394

self.writable = (access_mode == 'w')

395

self.delta = delta

396

397

self._max_delta_chain = 200

398

399

if index is None:

400

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

401

access_mode, create=create, file_mode=file_mode,

402

create_parent_dir=create_parent_dir, delay_create=delay_create,

403

dir_mode=dir_mode)

404

else:

405

self._index = index

406

if access_method is None:

407

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

408

((create and not len(self)) and delay_create), create_parent_dir)

409

else:

410

_access = access_method

411

if create and not len(self) and not delay_create:

412

_access.create()

413

self._data = _KnitData(_access)

414

415

def __repr__(self):

416

return '%s(%s)' % (self.__class__.__name__,

417

self.transport.abspath(self.filename))

418

419

def _check_should_delta(self, first_parents):

420

"""Iterate back through the parent listing, looking for a fulltext.

421

422

This is used when we want to decide whether to add a delta or a new

423

fulltext. It searches for _max_delta_chain parents. When it finds a

424

fulltext parent, it sees if the total size of the deltas leading up to

425

it is large enough to indicate that we want a new full text anyway.

426

427

Return True if we should create a new delta, False if we should use a

428

full text.

429

"""

430

delta_size = 0

431

fulltext_size = None

432

delta_parents = first_parents

433

for count in xrange(self._max_delta_chain):

434

parent = delta_parents[0]

435

method = self._index.get_method(parent)

436

index, pos, size = self._index.get_position(parent)

437

if method == 'fulltext':

438

fulltext_size = size

439

break

440

delta_size += size

441

delta_parents = self._index.get_parents(parent)

442

else:

443

# We couldn't find a fulltext, so we must create a new one

444

return False

445

446

return fulltext_size > delta_size

447

448

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

449

"""See VersionedFile._add_delta()."""

450

self._check_add(version_id, []) # should we check the lines ?

451

self._check_versions_present(parents)

452

present_parents = []

453

ghosts = []

454

parent_texts = {}

455

for parent in parents:

456

if not self.has_version(parent):

457

ghosts.append(parent)

458

else:

459

present_parents.append(parent)

460

461

if delta_parent is None:

462

# reconstitute as full text.

463

assert len(delta) == 1 or len(delta) == 0

464

if len(delta):

465

assert delta[0][0] == 0

466

assert delta[0][1] == 0, delta[0][1]

467

return super(KnitVersionedFile, self)._add_delta(version_id,

468

parents,

469

delta_parent,

470

sha1,

471

noeol,

472

delta)

473

474

digest = sha1

475

476

options = []

477

if noeol:

478

options.append('no-eol')

479

480

if delta_parent is not None:

481

# determine the current delta chain length.

482

# To speed the extract of texts the delta chain is limited

483

# to a fixed number of deltas. This should minimize both

484

# I/O and the time spend applying deltas.

485

# The window was changed to a maximum of 200 deltas, but also added

486

# was a check that the total compressed size of the deltas is

487

# smaller than the compressed size of the fulltext.

488

if not self._check_should_delta([delta_parent]):

489

# We don't want a delta here, just do a normal insertion.

490

return super(KnitVersionedFile, self)._add_delta(version_id,

491

parents,

492

delta_parent,

493

sha1,

494

noeol,

495

delta)

496

497

options.append('line-delta')

498

store_lines = self.factory.lower_line_delta(delta)

499

500

access_memo = self._data.add_record(version_id, digest, store_lines)

501

self._index.add_version(version_id, options, access_memo, parents)

502

503

def _add_raw_records(self, records, data):

504

"""Add all the records 'records' with data pre-joined in 'data'.

505

506

:param records: A list of tuples(version_id, options, parents, size).

507

:param data: The data for the records. When it is written, the records

508

are adjusted to have pos pointing into data by the sum of

509

the preceding records sizes.

510

"""

511

# write all the data

512

raw_record_sizes = [record[3] for record in records]

513

positions = self._data.add_raw_records(raw_record_sizes, data)

514

offset = 0

515

index_entries = []

516

for (version_id, options, parents, size), access_memo in zip(

517

records, positions):

518

index_entries.append((version_id, options, access_memo, parents))

519

if self._data._do_cache:

520

self._data._cache[version_id] = data[offset:offset+size]

521

offset += size

522

self._index.add_versions(index_entries)

523

524

def enable_cache(self):

525

"""Start caching data for this knit"""

526

self._data.enable_cache()

527

528

def clear_cache(self):

529

"""Clear the data cache only."""

530

self._data.clear_cache()

531

532

def copy_to(self, name, transport):

533

"""See VersionedFile.copy_to()."""

534

# copy the current index to a temp index to avoid racing with local

535

# writes

536

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

537

self.transport.get(self._index._filename))

538

# copy the data file

539

f = self._data._open_file()

540

try:

541

transport.put_file(name + DATA_SUFFIX, f)

542

finally:

543

f.close()

544

# move the copied index into place

545

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

546

547

def create_empty(self, name, transport, mode=None):

548

return KnitVersionedFile(name, transport, factory=self.factory,

549

delta=self.delta, create=True)

550

551

def _fix_parents(self, version_id, new_parents):

552

"""Fix the parents list for version.

553

554

This is done by appending a new version to the index

555

with identical data except for the parents list.

556

the parents list must be a superset of the current

557

list.

558

"""

559

current_values = self._index._cache[version_id]

560

assert set(current_values[4]).difference(set(new_parents)) == set()

561

self._index.add_version(version_id,

562

current_values[1],

563

(None, current_values[2], current_values[3]),

564

new_parents)

565

566

def get_data_stream(self, required_versions):

567

"""Get a data stream for the specified versions.

568

569

Versions may be returned in any order, not necessarily the order

570

specified.

571

572

:param required_versions: the exact set of versions to be returned, i.e.

573

not a transitive closure.

574

575

:returns: format_signature, list of (version, options, length, parents),

576

reader_callable.

577

"""

578

required_versions = set([osutils.safe_revision_id(v) for v in

579

required_versions])

580

# we don't care about inclusions, the caller cares.

581

# but we need to setup a list of records to visit.

582

for version_id in required_versions:

583

if not self.has_version(version_id):

584

raise RevisionNotPresent(version_id, self.filename)

585

# Pick the desired versions out of the index in oldest-to-newest order

586

version_list = []

587

for version_id in self.versions():

588

if version_id in required_versions:

589

version_list.append(version_id)

590

591

# create the list of version information for the result

592

copy_queue_records = []

593

copy_set = set()

594

result_version_list = []

595

for version_id in version_list:

596

options = self._index.get_options(version_id)

597

parents = self._index.get_parents_with_ghosts(version_id)

598

index_memo = self._index.get_position(version_id)

599

copy_queue_records.append((version_id, index_memo))

600

none, data_pos, data_size = index_memo

601

copy_set.add(version_id)

602

# version, options, length, parents

603

result_version_list.append((version_id, options, data_size,

604

parents))

605

606

# Read the compressed record data.

607

# XXX:

608

# From here down to the return should really be logic in the returned

609

# callable -- in a class that adapts read_records_iter_raw to read

610

# requests.

611

raw_datum = []

612

for (version_id, raw_data), \

613

(version_id2, options, _, parents) in \

614

izip(self._data.read_records_iter_raw(copy_queue_records),

615

result_version_list):

616

assert version_id == version_id2, 'logic error, inconsistent results'

617

raw_datum.append(raw_data)

618

pseudo_file = StringIO(''.join(raw_datum))

619

def read(length):

620

if length is None:

621

return pseudo_file.read()

622

else:

623

return pseudo_file.read(length)

624

return (self.get_format_signature(), result_version_list, read)

625

626

def get_stream_as_bytes(self, required_versions):

627

"""Generate a serialised data stream.

628

629

The format is a bencoding of a list. The first element of the list is a

630

string of the format signature, then each subsequent element is a list

631

corresponding to a record. Those lists contain:

632

633

* a version id

634

* a list of options

635

* a list of parents

636

* the bytes

637

638

:returns: a bencoded list.

639

"""

640

knit_stream = self.get_data_stream(required_versions)

641

format_signature, data_list, callable = knit_stream

642

data = []

643

data.append(format_signature)

644

for version, options, length, parents in data_list:

645

data.append([version, options, parents, callable(length)])

646

return bencode.bencode(data)

647

648

def _extract_blocks(self, version_id, source, target):

649

if self._index.get_method(version_id) != 'line-delta':

650

return None

651

parent, sha1, noeol, delta = self.get_delta(version_id)

652

return KnitContent.get_line_delta_blocks(delta, source, target)

653

654

def get_delta(self, version_id):

655

"""Get a delta for constructing version from some other version."""

656

version_id = osutils.safe_revision_id(version_id)

657

self.check_not_reserved_id(version_id)

658

if not self.has_version(version_id):

659

raise RevisionNotPresent(version_id, self.filename)

660

661

parents = self.get_parents(version_id)

662

if len(parents):

663

parent = parents[0]

664

else:

665

parent = None

666

index_memo = self._index.get_position(version_id)

667

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

668

noeol = 'no-eol' in self._index.get_options(version_id)

669

if 'fulltext' == self._index.get_method(version_id):

670

new_content = self.factory.parse_fulltext(data, version_id)

671

if parent is not None:

672

reference_content = self._get_content(parent)

673

old_texts = reference_content.text()

674

else:

675

old_texts = []

676

new_texts = new_content.text()

677

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

678

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

679

else:

680

delta = self.factory.parse_line_delta(data, version_id)

681

return parent, sha1, noeol, delta

682

683

def get_format_signature(self):

684

"""See VersionedFile.get_format_signature()."""

685

if self.factory.annotated:

686

annotated_part = "annotated"

687

else:

688

annotated_part = "plain"

689

return "knit-%s" % (annotated_part,)

690

691

def get_graph_with_ghosts(self):

692

"""See VersionedFile.get_graph_with_ghosts()."""

693

graph_items = self._index.get_graph()

694

return dict(graph_items)

695

696

def get_sha1(self, version_id):

697

return self.get_sha1s([version_id])[0]

698

699

def get_sha1s(self, version_ids):

700

"""See VersionedFile.get_sha1()."""

701

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

702

record_map = self._get_record_map(version_ids)

703

# record entry 2 is the 'digest'.

704

return [record_map[v][2] for v in version_ids]

705

706

@staticmethod

707

def get_suffixes():

708

"""See VersionedFile.get_suffixes()."""

709

return [DATA_SUFFIX, INDEX_SUFFIX]

710

711

def has_ghost(self, version_id):

712

"""True if there is a ghost reference in the file to version_id."""

713

version_id = osutils.safe_revision_id(version_id)

714

# maybe we have it

715

if self.has_version(version_id):

716

return False

717

# optimisable if needed by memoising the _ghosts set.

718

items = self._index.get_graph()

719

for node, parents in items:

720

for parent in parents:

721

if parent not in self._index._cache:

722

if parent == version_id:

723

return True

724

return False

725

726

def insert_data_stream(self, (format, data_list, reader_callable)):

727

"""Insert knit records from a data stream into this knit.

728

729

If a version in the stream is already present in this knit, it will not

730

be inserted a second time. It will be checked for consistency with the

731

stored version however, and may cause a KnitCorrupt error to be raised

732

if the data in the stream disagrees with the already stored data.

733

734

:seealso: get_data_stream

735

"""

736

if format != self.get_format_signature():

737

mutter('incompatible format signature inserting to %r', self)

738

raise KnitDataStreamIncompatible(

739

format, self.get_format_signature())

740

741

for version_id, options, length, parents in data_list:

742

if self.has_version(version_id):

743

# First check: the list of parents.

744

my_parents = self.get_parents_with_ghosts(version_id)

745

if my_parents != parents:

746

# XXX: KnitCorrupt is not quite the right exception here.

747

raise KnitCorrupt(

748

self.filename,

749

'parents list %r from data stream does not match '

750

'already recorded parents %r for %s'

751

% (parents, my_parents, version_id))

752

753

# Also check the SHA-1 of the fulltext this content will

754

# produce.

755

raw_data = reader_callable(length)

756

my_fulltext_sha1 = self.get_sha1(version_id)

757

df, rec = self._data._parse_record_header(version_id, raw_data)

758

stream_fulltext_sha1 = rec[3]

759

if my_fulltext_sha1 != stream_fulltext_sha1:

760

# Actually, we don't know if it's this knit that's corrupt,

761

# or the data stream we're trying to insert.

762

raise KnitCorrupt(

763

self.filename, 'sha-1 does not match %s' % version_id)

764

else:

765

self._add_raw_records(

766

[(version_id, options, parents, length)],

767

reader_callable(length))

768

769

def versions(self):

770

"""See VersionedFile.versions."""

771

if 'evil' in debug.debug_flags:

772

trace.mutter_callsite(2, "versions scales with size of history")

773

return self._index.get_versions()

774

775

def has_version(self, version_id):

776

"""See VersionedFile.has_version."""

777

if 'evil' in debug.debug_flags:

778

trace.mutter_callsite(2, "has_version is a LBYL scenario")

779

version_id = osutils.safe_revision_id(version_id)

780

return self._index.has_version(version_id)

781

782

__contains__ = has_version

783

784

def _merge_annotations(self, content, parents, parent_texts={},

785

delta=None, annotated=None,

786

left_matching_blocks=None):

787

"""Merge annotations for content. This is done by comparing

788

the annotations based on changed to the text.

789

"""

790

if left_matching_blocks is not None:

791

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

792

else:

793

delta_seq = None

794

if annotated:

795

for parent_id in parents:

796

merge_content = self._get_content(parent_id, parent_texts)

797

if (parent_id == parents[0] and delta_seq is not None):

798

seq = delta_seq

799

else:

800

seq = patiencediff.PatienceSequenceMatcher(

801

None, merge_content.text(), content.text())

802

for i, j, n in seq.get_matching_blocks():

803

if n == 0:

804

continue

805

# this appears to copy (origin, text) pairs across to the

806

# new content for any line that matches the last-checked

807

# parent.

808

content._lines[j:j+n] = merge_content._lines[i:i+n]

809

if delta:

810

if delta_seq is None:

811

reference_content = self._get_content(parents[0], parent_texts)

812

new_texts = content.text()

813

old_texts = reference_content.text()

814

delta_seq = patiencediff.PatienceSequenceMatcher(

815

None, old_texts, new_texts)

816

return self._make_line_delta(delta_seq, content)

817

818

def _make_line_delta(self, delta_seq, new_content):

819

"""Generate a line delta from delta_seq and new_content."""

820

diff_hunks = []

821

for op in delta_seq.get_opcodes():

822

if op[0] == 'equal':

823

continue

824

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

825

return diff_hunks

826

827

def _get_components_positions(self, version_ids):

828

"""Produce a map of position data for the components of versions.

829

830

This data is intended to be used for retrieving the knit records.

831

832

A dict of version_id to (method, data_pos, data_size, next) is

833

returned.

834

method is the way referenced data should be applied.

835

data_pos is the position of the data in the knit.

836

data_size is the size of the data in the knit.

837

next is the build-parent of the version, or None for fulltexts.

838

"""

839

component_data = {}

840

for version_id in version_ids:

841

cursor = version_id

842

843

while cursor is not None and cursor not in component_data:

844

method = self._index.get_method(cursor)

845

if method == 'fulltext':

846

next = None

847

else:

848

next = self.get_parents(cursor)[0]

849

index_memo = self._index.get_position(cursor)

850

component_data[cursor] = (method, index_memo, next)

851

cursor = next

852

return component_data

853

854

def _get_content(self, version_id, parent_texts={}):

855

"""Returns a content object that makes up the specified

856

version."""

857

if not self.has_version(version_id):

858

raise RevisionNotPresent(version_id, self.filename)

859

860

cached_version = parent_texts.get(version_id, None)

861

if cached_version is not None:

862

return cached_version

863

864

text_map, contents_map = self._get_content_maps([version_id])

865

return contents_map[version_id]

866

867

def _check_versions_present(self, version_ids):

868

"""Check that all specified versions are present."""

869

self._index.check_versions_present(version_ids)

870

871

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

872

"""See VersionedFile.add_lines_with_ghosts()."""

873

self._check_add(version_id, lines)

874

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

875

876

def _add_lines(self, version_id, parents, lines, parent_texts,

877

left_matching_blocks=None):

878

"""See VersionedFile.add_lines."""

879

self._check_add(version_id, lines)

880

self._check_versions_present(parents)

881

return self._add(version_id, lines[:], parents, self.delta,

882

parent_texts, left_matching_blocks)

883

884

def _check_add(self, version_id, lines):

885

"""check that version_id and lines are safe to add."""

886

assert self.writable, "knit is not opened for write"

887

### FIXME escape. RBC 20060228

888

if contains_whitespace(version_id):

889

raise InvalidRevisionId(version_id, self.filename)

890

self.check_not_reserved_id(version_id)

891

if self.has_version(version_id):

892

raise RevisionAlreadyPresent(version_id, self.filename)

893

self._check_lines_not_unicode(lines)

894

self._check_lines_are_lines(lines)

895

896

def _add(self, version_id, lines, parents, delta, parent_texts,

897

left_matching_blocks=None):

898

"""Add a set of lines on top of version specified by parents.

899

900

If delta is true, compress the text as a line-delta against

901

the first parent.

902

903

Any versions not present will be converted into ghosts.

904

"""

905

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

906

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

907

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

908

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

909

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

910

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

911

# +1383 0 8.0370 8.0370 +<len>

912

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

913

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

914

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

915

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

916

917

present_parents = []

918

ghosts = []

919

if parent_texts is None:

920

parent_texts = {}

921

for parent in parents:

922

if not self.has_version(parent):

923

ghosts.append(parent)

924

else:

925

present_parents.append(parent)

926

927

if delta and not len(present_parents):

928

delta = False

929

930

digest = sha_strings(lines)

931

options = []

932

if lines:

933

if lines[-1][-1] != '\n':

934

options.append('no-eol')

935

lines[-1] = lines[-1] + '\n'

936

937

if len(present_parents) and delta:

938

# To speed the extract of texts the delta chain is limited

939

# to a fixed number of deltas. This should minimize both

940

# I/O and the time spend applying deltas.

941

delta = self._check_should_delta(present_parents)

942

943

assert isinstance(version_id, str)

944

lines = self.factory.make(lines, version_id)

945

if delta or (self.factory.annotated and len(present_parents) > 0):

946

# Merge annotations from parent texts if so is needed.

947

delta_hunks = self._merge_annotations(lines, present_parents,

948

parent_texts, delta, self.factory.annotated,

949

left_matching_blocks)

950

951

if delta:

952

options.append('line-delta')

953

store_lines = self.factory.lower_line_delta(delta_hunks)

954

else:

955

options.append('fulltext')

956

store_lines = self.factory.lower_fulltext(lines)

957

958

access_memo = self._data.add_record(version_id, digest, store_lines)

959

self._index.add_version(version_id, options, access_memo, parents)

960

return lines

961

962

def check(self, progress_bar=None):

963

"""See VersionedFile.check()."""

964

965

def _clone_text(self, new_version_id, old_version_id, parents):

966

"""See VersionedFile.clone_text()."""

967

# FIXME RBC 20060228 make fast by only inserting an index with null

968

# delta.

969

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

970

971

def get_lines(self, version_id):

972

"""See VersionedFile.get_lines()."""

973

return self.get_line_list([version_id])[0]

974

975

def _get_record_map(self, version_ids):

976

"""Produce a dictionary of knit records.

977

978

The keys are version_ids, the values are tuples of (method, content,

979

digest, next).

980

method is the way the content should be applied.

981

content is a KnitContent object.

982

digest is the SHA1 digest of this version id after all steps are done

983

next is the build-parent of the version, i.e. the leftmost ancestor.

984

If the method is fulltext, next will be None.

985

"""

986

position_map = self._get_components_positions(version_ids)

987

# c = component_id, m = method, i_m = index_memo, n = next

988

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

989

record_map = {}

990

for component_id, content, digest in \

991

self._data.read_records_iter(records):

992

method, index_memo, next = position_map[component_id]

993

record_map[component_id] = method, content, digest, next

994

995

return record_map

996

997

def get_text(self, version_id):

998

"""See VersionedFile.get_text"""

999

return self.get_texts([version_id])[0]

1000

1001

def get_texts(self, version_ids):

1002

return [''.join(l) for l in self.get_line_list(version_ids)]

1003

1004

def get_line_list(self, version_ids):

1005

"""Return the texts of listed versions as a list of strings."""

1006

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1007

for version_id in version_ids:

1008

self.check_not_reserved_id(version_id)

1009

text_map, content_map = self._get_content_maps(version_ids)

1010

return [text_map[v] for v in version_ids]

1011

1012

_get_lf_split_line_list = get_line_list

1013

1014

def _get_content_maps(self, version_ids):

1015

"""Produce maps of text and KnitContents

1016

1017

:return: (text_map, content_map) where text_map contains the texts for

1018

the requested versions and content_map contains the KnitContents.

1019

Both dicts take version_ids as their keys.

1020

"""

1021

for version_id in version_ids:

1022

if not self.has_version(version_id):

1023

raise RevisionNotPresent(version_id, self.filename)

1024

record_map = self._get_record_map(version_ids)

1025

1026

text_map = {}

1027

content_map = {}

1028

final_content = {}

1029

for version_id in version_ids:

1030

components = []

1031

cursor = version_id

1032

while cursor is not None:

1033

method, data, digest, next = record_map[cursor]

1034

components.append((cursor, method, data, digest))

1035

if cursor in content_map:

1036

break

1037

cursor = next

1038

1039

content = None

1040

for component_id, method, data, digest in reversed(components):

1041

if component_id in content_map:

1042

content = content_map[component_id]

1043

else:

1044

if method == 'fulltext':

1045

assert content is None

1046

content = self.factory.parse_fulltext(data, version_id)

1047

elif method == 'line-delta':

1048

delta = self.factory.parse_line_delta(data, version_id)

1049

content = content.copy()

1050

content._lines = self._apply_delta(content._lines,

1051

delta)

1052

content_map[component_id] = content

1053

1054

if 'no-eol' in self._index.get_options(version_id):

1055

content = content.copy()

1056

line = content._lines[-1][1].rstrip('\n')

1057

content._lines[-1] = (content._lines[-1][0], line)

1058

final_content[version_id] = content

1059

1060

# digest here is the digest from the last applied component.

1061

text = content.text()

1062

if sha_strings(text) != digest:

1063

raise KnitCorrupt(self.filename,

1064

'sha-1 does not match %s' % version_id)

1065

1066

text_map[version_id] = text

1067

return text_map, final_content

1068

1069

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1070

pb=None):

1071

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1072

if version_ids is None:

1073

version_ids = self.versions()

1074

else:

1075

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1076

if pb is None:

1077

pb = progress.DummyProgress()

1078

# we don't care about inclusions, the caller cares.

1079

# but we need to setup a list of records to visit.

1080

# we need version_id, position, length

1081

version_id_records = []

1082

requested_versions = set(version_ids)

1083

# filter for available versions

1084

for version_id in requested_versions:

1085

if not self.has_version(version_id):

1086

raise RevisionNotPresent(version_id, self.filename)

1087

# get a in-component-order queue:

1088

for version_id in self.versions():

1089

if version_id in requested_versions:

1090

index_memo = self._index.get_position(version_id)

1091

version_id_records.append((version_id, index_memo))

1092

1093

total = len(version_id_records)

1094

for version_idx, (version_id, data, sha_value) in \

1095

enumerate(self._data.read_records_iter(version_id_records)):

1096

pb.update('Walking content.', version_idx, total)

1097

method = self._index.get_method(version_id)

1098

1099

assert method in ('fulltext', 'line-delta')

1100

if method == 'fulltext':

1101

line_iterator = self.factory.get_fulltext_content(data)

1102

else:

1103

line_iterator = self.factory.get_linedelta_content(data)

1104

for line in line_iterator:

1105

yield line

1106

1107

pb.update('Walking content.', total, total)

1108

1109

def iter_parents(self, version_ids):

1110

"""Iterate through the parents for many version ids.

1111

1112

:param version_ids: An iterable yielding version_ids.

1113

:return: An iterator that yields (version_id, parents). Requested

1114

version_ids not present in the versioned file are simply skipped.

1115

The order is undefined, allowing for different optimisations in

1116

the underlying implementation.

1117

"""

1118

version_ids = [osutils.safe_revision_id(version_id) for

1119

version_id in version_ids]

1120

return self._index.iter_parents(version_ids)

1121

1122

def num_versions(self):

1123

"""See VersionedFile.num_versions()."""

1124

return self._index.num_versions()

1125

1126

__len__ = num_versions

1127

1128

def annotate_iter(self, version_id):

1129

"""See VersionedFile.annotate_iter."""

1130

version_id = osutils.safe_revision_id(version_id)

1131

content = self._get_content(version_id)

1132

for origin, text in content.annotate_iter():

1133

yield origin, text

1134

1135

def get_parents(self, version_id):

1136

"""See VersionedFile.get_parents."""

1137

# perf notes:

1138

# optimism counts!

1139

# 52554 calls in 1264 872 internal down from 3674

1140

version_id = osutils.safe_revision_id(version_id)

1141

try:

1142

return self._index.get_parents(version_id)

1143

except KeyError:

1144

raise RevisionNotPresent(version_id, self.filename)

1145

1146

def get_parents_with_ghosts(self, version_id):

1147

"""See VersionedFile.get_parents."""

1148

version_id = osutils.safe_revision_id(version_id)

1149

try:

1150

return self._index.get_parents_with_ghosts(version_id)

1151

except KeyError:

1152

raise RevisionNotPresent(version_id, self.filename)

1153

1154

def get_ancestry(self, versions, topo_sorted=True):

1155

"""See VersionedFile.get_ancestry."""

1156

if isinstance(versions, basestring):

1157

versions = [versions]

1158

if not versions:

1159

return []

1160

versions = [osutils.safe_revision_id(v) for v in versions]

1161

return self._index.get_ancestry(versions, topo_sorted)

1162

1163

def get_ancestry_with_ghosts(self, versions):

1164

"""See VersionedFile.get_ancestry_with_ghosts."""

1165

if isinstance(versions, basestring):

1166

versions = [versions]

1167

if not versions:

1168

return []

1169

versions = [osutils.safe_revision_id(v) for v in versions]

1170

return self._index.get_ancestry_with_ghosts(versions)

1171

1172

def plan_merge(self, ver_a, ver_b):

1173

"""See VersionedFile.plan_merge."""

1174

ver_a = osutils.safe_revision_id(ver_a)

1175

ver_b = osutils.safe_revision_id(ver_b)

1176

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1177

1178

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1179

annotated_a = self.annotate(ver_a)

1180

annotated_b = self.annotate(ver_b)

1181

return merge._plan_annotate_merge(annotated_a, annotated_b,

1182

ancestors_a, ancestors_b)

1183

1184

1185

class _KnitComponentFile(object):

1186

"""One of the files used to implement a knit database"""

1187

1188

def __init__(self, transport, filename, mode, file_mode=None,

1189

create_parent_dir=False, dir_mode=None):

1190

self._transport = transport

1191

self._filename = filename

1192

self._mode = mode

1193

self._file_mode = file_mode

1194

self._dir_mode = dir_mode

1195

self._create_parent_dir = create_parent_dir

1196

self._need_to_create = False

1197

1198

def _full_path(self):

1199

"""Return the full path to this file."""

1200

return self._transport.base + self._filename

1201

1202

def check_header(self, fp):

1203

line = fp.readline()

1204

if line == '':

1205

# An empty file can actually be treated as though the file doesn't

1206

# exist yet.

1207

raise errors.NoSuchFile(self._full_path())

1208

if line != self.HEADER:

1209

raise KnitHeaderError(badline=line,

1210

filename=self._transport.abspath(self._filename))

1211

1212

def __repr__(self):

1213

return '%s(%s)' % (self.__class__.__name__, self._filename)

1214

1215

1216

class _KnitIndex(_KnitComponentFile):

1217

"""Manages knit index file.

1218

1219

The index is already kept in memory and read on startup, to enable

1220

fast lookups of revision information. The cursor of the index

1221

file is always pointing to the end, making it easy to append

1222

entries.

1223

1224

_cache is a cache for fast mapping from version id to a Index

1225

object.

1226

1227

_history is a cache for fast mapping from indexes to version ids.

1228

1229

The index data format is dictionary compressed when it comes to

1230

parent references; a index entry may only have parents that with a

1231

lover index number. As a result, the index is topological sorted.

1232

1233

Duplicate entries may be written to the index for a single version id

1234

if this is done then the latter one completely replaces the former:

1235

this allows updates to correct version and parent information.

1236

Note that the two entries may share the delta, and that successive

1237

annotations and references MUST point to the first entry.

1238

1239

The index file on disc contains a header, followed by one line per knit

1240

record. The same revision can be present in an index file more than once.

1241

The first occurrence gets assigned a sequence number starting from 0.

1242

1243

The format of a single line is

1244

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1245

REVISION_ID is a utf8-encoded revision id

1246

FLAGS is a comma separated list of flags about the record. Values include

1247

no-eol, line-delta, fulltext.

1248

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1249

that the the compressed data starts at.

1250

LENGTH is the ascii representation of the length of the data file.

1251

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1252

REVISION_ID.

1253

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1254

revision id already in the knit that is a parent of REVISION_ID.

1255

The ' :' marker is the end of record marker.

1256

1257

partial writes:

1258

when a write is interrupted to the index file, it will result in a line

1259

that does not end in ' :'. If the ' :' is not present at the end of a line,

1260

or at the end of the file, then the record that is missing it will be

1261

ignored by the parser.

1262

1263

When writing new records to the index file, the data is preceded by '\n'

1264

to ensure that records always start on new lines even if the last write was

1265

interrupted. As a result its normal for the last line in the index to be

1266

missing a trailing newline. One can be added with no harmful effects.

1267

"""

1268

1269

HEADER = "# bzr knit index 8\n"

1270

1271

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1272

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1273

1274

def _cache_version(self, version_id, options, pos, size, parents):

1275

"""Cache a version record in the history array and index cache.

1276

1277

This is inlined into _load_data for performance. KEEP IN SYNC.

1278

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1279

indexes).

1280

"""

1281

# only want the _history index to reference the 1st index entry

1282

# for version_id

1283

if version_id not in self._cache:

1284

index = len(self._history)

1285

self._history.append(version_id)

1286

else:

1287

index = self._cache[version_id][5]

1288

self._cache[version_id] = (version_id,

1289

options,

1290

pos,

1291

size,

1292

parents,

1293

index)

1294

1295

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1296

create_parent_dir=False, delay_create=False, dir_mode=None):

1297

_KnitComponentFile.__init__(self, transport, filename, mode,

1298

file_mode=file_mode,

1299

create_parent_dir=create_parent_dir,

1300

dir_mode=dir_mode)

1301

self._cache = {}

1302

# position in _history is the 'official' index for a revision

1303

# but the values may have come from a newer entry.

1304

# so - wc -l of a knit index is != the number of unique names

1305

# in the knit.

1306

self._history = []

1307

try:

1308

fp = self._transport.get(self._filename)

1309

try:

1310

# _load_data may raise NoSuchFile if the target knit is

1311

# completely empty.

1312

_load_data(self, fp)

1313

finally:

1314

fp.close()

1315

except NoSuchFile:

1316

if mode != 'w' or not create:

1317

raise

1318

elif delay_create:

1319

self._need_to_create = True

1320

else:

1321

self._transport.put_bytes_non_atomic(

1322

self._filename, self.HEADER, mode=self._file_mode)

1323

1324

def get_graph(self):

1325

"""Return a list of the node:parents lists from this knit index."""

1326

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1327

1328

def get_ancestry(self, versions, topo_sorted=True):

1329

"""See VersionedFile.get_ancestry."""

1330

# get a graph of all the mentioned versions:

1331

graph = {}

1332

pending = set(versions)

1333

cache = self._cache

1334

while pending:

1335

version = pending.pop()

1336

# trim ghosts

1337

try:

1338

parents = [p for p in cache[version][4] if p in cache]

1339

except KeyError:

1340

raise RevisionNotPresent(version, self._filename)

1341

# if not completed and not a ghost

1342

pending.update([p for p in parents if p not in graph])

1343

graph[version] = parents

1344

if not topo_sorted:

1345

return graph.keys()

1346

return topo_sort(graph.items())

1347

1348

def get_ancestry_with_ghosts(self, versions):

1349

"""See VersionedFile.get_ancestry_with_ghosts."""

1350

# get a graph of all the mentioned versions:

1351

self.check_versions_present(versions)

1352

cache = self._cache

1353

graph = {}

1354

pending = set(versions)

1355

while pending:

1356

version = pending.pop()

1357

try:

1358

parents = cache[version][4]

1359

except KeyError:

1360

# ghost, fake it

1361

graph[version] = []

1362

else:

1363

# if not completed

1364

pending.update([p for p in parents if p not in graph])

1365

graph[version] = parents

1366

return topo_sort(graph.items())

1367

1368

def iter_parents(self, version_ids):

1369

"""Iterate through the parents for many version ids.

1370

1371

:param version_ids: An iterable yielding version_ids.

1372

:return: An iterator that yields (version_id, parents). Requested

1373

version_ids not present in the versioned file are simply skipped.

1374

The order is undefined, allowing for different optimisations in

1375

the underlying implementation.

1376

"""

1377

for version_id in version_ids:

1378

try:

1379

yield version_id, tuple(self.get_parents(version_id))

1380

except KeyError:

1381

pass

1382

1383

def num_versions(self):

1384

return len(self._history)

1385

1386

__len__ = num_versions

1387

1388

def get_versions(self):

1389

"""Get all the versions in the file. not topologically sorted."""

1390

return self._history

1391

1392

def _version_list_to_index(self, versions):

1393

result_list = []

1394

cache = self._cache

1395

for version in versions:

1396

if version in cache:

1397

# -- inlined lookup() --

1398

result_list.append(str(cache[version][5]))

1399

# -- end lookup () --

1400

else:

1401

result_list.append('.' + version)

1402

return ' '.join(result_list)

1403

1404

def add_version(self, version_id, options, index_memo, parents):

1405

"""Add a version record to the index."""

1406

self.add_versions(((version_id, options, index_memo, parents),))

1407

1408

def add_versions(self, versions):

1409

"""Add multiple versions to the index.

1410

1411

:param versions: a list of tuples:

1412

(version_id, options, pos, size, parents).

1413

"""

1414

lines = []

1415

orig_history = self._history[:]

1416

orig_cache = self._cache.copy()

1417

1418

try:

1419

for version_id, options, (index, pos, size), parents in versions:

1420

line = "\n%s %s %s %s %s :" % (version_id,

1421

','.join(options),

1422

pos,

1423

size,

1424

self._version_list_to_index(parents))

1425

assert isinstance(line, str), \

1426

'content must be utf-8 encoded: %r' % (line,)

1427

lines.append(line)

1428

self._cache_version(version_id, options, pos, size, parents)

1429

if not self._need_to_create:

1430

self._transport.append_bytes(self._filename, ''.join(lines))

1431

else:

1432

sio = StringIO()

1433

sio.write(self.HEADER)

1434

sio.writelines(lines)

1435

sio.seek(0)

1436

self._transport.put_file_non_atomic(self._filename, sio,

1437

create_parent_dir=self._create_parent_dir,

1438

mode=self._file_mode,

1439

dir_mode=self._dir_mode)

1440

self._need_to_create = False

1441

except:

1442

# If any problems happen, restore the original values and re-raise

1443

self._history = orig_history

1444

self._cache = orig_cache

1445

raise

1446

1447

def has_version(self, version_id):

1448

"""True if the version is in the index."""

1449

return version_id in self._cache

1450

1451

def get_position(self, version_id):

1452

"""Return details needed to access the version.

1453

1454

.kndx indices do not support split-out data, so return None for the

1455

index field.

1456

1457

:return: a tuple (None, data position, size) to hand to the access

1458

logic to get the record.

1459

"""

1460

entry = self._cache[version_id]

1461

return None, entry[2], entry[3]

1462

1463

def get_method(self, version_id):

1464

"""Return compression method of specified version."""

1465

options = self._cache[version_id][1]

1466

if 'fulltext' in options:

1467

return 'fulltext'

1468

else:

1469

if 'line-delta' not in options:

1470

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1471

return 'line-delta'

1472

1473

def get_options(self, version_id):

1474

"""Return a string represention options.

1475

1476

e.g. foo,bar

1477

"""

1478

return self._cache[version_id][1]

1479

1480

def get_parents(self, version_id):

1481

"""Return parents of specified version ignoring ghosts."""

1482

return [parent for parent in self._cache[version_id][4]

1483

if parent in self._cache]

1484

1485

def get_parents_with_ghosts(self, version_id):

1486

"""Return parents of specified version with ghosts."""

1487

return self._cache[version_id][4]

1488

1489

def check_versions_present(self, version_ids):

1490

"""Check that all specified versions are present."""

1491

cache = self._cache

1492

for version_id in version_ids:

1493

if version_id not in cache:

1494

raise RevisionNotPresent(version_id, self._filename)

1495

1496

1497

class KnitGraphIndex(object):

1498

"""A knit index that builds on GraphIndex."""

1499

1500

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1501

"""Construct a KnitGraphIndex on a graph_index.

1502

1503

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1504

:param deltas: Allow delta-compressed records.

1505

:param add_callback: If not None, allow additions to the index and call

1506

this callback with a list of added GraphIndex nodes:

1507

[(node, value, node_refs), ...]

1508

:param parents: If True, record knits parents, if not do not record

1509

parents.

1510

"""

1511

self._graph_index = graph_index

1512

self._deltas = deltas

1513

self._add_callback = add_callback

1514

self._parents = parents

1515

if deltas and not parents:

1516

raise KnitCorrupt(self, "Cannot do delta compression without "

1517

"parent tracking.")

1518

1519

def _get_entries(self, keys, check_present=False):

1520

"""Get the entries for keys.

1521

1522

:param keys: An iterable of index keys, - 1-tuples.

1523

"""

1524

keys = set(keys)

1525

found_keys = set()

1526

if self._parents:

1527

for node in self._graph_index.iter_entries(keys):

1528

yield node

1529

found_keys.add(node[1])

1530

else:

1531

# adapt parentless index to the rest of the code.

1532

for node in self._graph_index.iter_entries(keys):

1533

yield node[0], node[1], node[2], ()

1534

found_keys.add(node[1])

1535

if check_present:

1536

missing_keys = keys.difference(found_keys)

1537

if missing_keys:

1538

raise RevisionNotPresent(missing_keys.pop(), self)

1539

1540

def _present_keys(self, version_ids):

1541

return set([

1542

node[1] for node in self._get_entries(version_ids)])

1543

1544

def _parentless_ancestry(self, versions):

1545

"""Honour the get_ancestry API for parentless knit indices."""

1546

wanted_keys = self._version_ids_to_keys(versions)

1547

present_keys = self._present_keys(wanted_keys)

1548

missing = set(wanted_keys).difference(present_keys)

1549

if missing:

1550

raise RevisionNotPresent(missing.pop(), self)

1551

return list(self._keys_to_version_ids(present_keys))

1552

1553

def get_ancestry(self, versions, topo_sorted=True):

1554

"""See VersionedFile.get_ancestry."""

1555

if not self._parents:

1556

return self._parentless_ancestry(versions)

1557

# XXX: This will do len(history) index calls - perhaps

1558

# it should be altered to be a index core feature?

1559

# get a graph of all the mentioned versions:

1560

graph = {}

1561

ghosts = set()

1562

versions = self._version_ids_to_keys(versions)

1563

pending = set(versions)

1564

while pending:

1565

# get all pending nodes

1566

this_iteration = pending

1567

new_nodes = self._get_entries(this_iteration)

1568

found = set()

1569

pending = set()

1570

for (index, key, value, node_refs) in new_nodes:

1571

# dont ask for ghosties - otherwise

1572

# we we can end up looping with pending

1573

# being entirely ghosted.

1574

graph[key] = [parent for parent in node_refs[0]

1575

if parent not in ghosts]

1576

# queue parents

1577

for parent in graph[key]:

1578

# dont examine known nodes again

1579

if parent in graph:

1580

continue

1581

pending.add(parent)

1582

found.add(key)

1583

ghosts.update(this_iteration.difference(found))

1584

if versions.difference(graph):

1585

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1586

if topo_sorted:

1587

result_keys = topo_sort(graph.items())

1588

else:

1589

result_keys = graph.iterkeys()

1590

return [key[0] for key in result_keys]

1591

1592

def get_ancestry_with_ghosts(self, versions):

1593

"""See VersionedFile.get_ancestry."""

1594

if not self._parents:

1595

return self._parentless_ancestry(versions)

1596

# XXX: This will do len(history) index calls - perhaps

1597

# it should be altered to be a index core feature?

1598

# get a graph of all the mentioned versions:

1599

graph = {}

1600

versions = self._version_ids_to_keys(versions)

1601

pending = set(versions)

1602

while pending:

1603

# get all pending nodes

1604

this_iteration = pending

1605

new_nodes = self._get_entries(this_iteration)

1606

pending = set()

1607

for (index, key, value, node_refs) in new_nodes:

1608

graph[key] = node_refs[0]

1609

# queue parents

1610

for parent in graph[key]:

1611

# dont examine known nodes again

1612

if parent in graph:

1613

continue

1614

pending.add(parent)

1615

missing_versions = this_iteration.difference(graph)

1616

missing_needed = versions.intersection(missing_versions)

1617

if missing_needed:

1618

raise RevisionNotPresent(missing_needed.pop(), self)

1619

for missing_version in missing_versions:

1620

# add a key, no parents

1621

graph[missing_version] = []

1622

pending.discard(missing_version) # don't look for it

1623

result_keys = topo_sort(graph.items())

1624

return [key[0] for key in result_keys]

1625

1626

def get_graph(self):

1627

"""Return a list of the node:parents lists from this knit index."""

1628

if not self._parents:

1629

return [(key, ()) for key in self.get_versions()]

1630

result = []

1631

for index, key, value, refs in self._graph_index.iter_all_entries():

1632

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1633

return result

1634

1635

def iter_parents(self, version_ids):

1636

"""Iterate through the parents for many version ids.

1637

1638

:param version_ids: An iterable yielding version_ids.

1639

:return: An iterator that yields (version_id, parents). Requested

1640

version_ids not present in the versioned file are simply skipped.

1641

The order is undefined, allowing for different optimisations in

1642

the underlying implementation.

1643

"""

1644

if self._parents:

1645

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1646

all_parents = set()

1647

present_parents = set()

1648

for node in all_nodes:

1649

all_parents.update(node[3][0])

1650

# any node we are querying must be present

1651

present_parents.add(node[1])

1652

unknown_parents = all_parents.difference(present_parents)

1653

present_parents.update(self._present_keys(unknown_parents))

1654

for node in all_nodes:

1655

parents = []

1656

for parent in node[3][0]:

1657

if parent in present_parents:

1658

parents.append(parent[0])

1659

yield node[1][0], tuple(parents)

1660

else:

1661

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1662

yield node[1][0], ()

1663

1664

def num_versions(self):

1665

return len(list(self._graph_index.iter_all_entries()))

1666

1667

__len__ = num_versions

1668

1669

def get_versions(self):

1670

"""Get all the versions in the file. not topologically sorted."""

1671

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1672

1673

def has_version(self, version_id):

1674

"""True if the version is in the index."""

1675

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1676

1677

def _keys_to_version_ids(self, keys):

1678

return tuple(key[0] for key in keys)

1679

1680

def get_position(self, version_id):

1681

"""Return details needed to access the version.

1682

1683

:return: a tuple (index, data position, size) to hand to the access

1684

logic to get the record.

1685

"""

1686

node = self._get_node(version_id)

1687

bits = node[2][1:].split(' ')

1688

return node[0], int(bits[0]), int(bits[1])

1689

1690

def get_method(self, version_id):

1691

"""Return compression method of specified version."""

1692

if not self._deltas:

1693

return 'fulltext'

1694

return self._parent_compression(self._get_node(version_id)[3][1])

1695

1696

def _parent_compression(self, reference_list):

1697

# use the second reference list to decide if this is delta'd or not.

1698

if len(reference_list):

1699

return 'line-delta'

1700

else:

1701

return 'fulltext'

1702

1703

def _get_node(self, version_id):

1704

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1705

1706

def get_options(self, version_id):

1707

"""Return a string represention options.

1708

1709

e.g. foo,bar

1710

"""

1711

node = self._get_node(version_id)

1712

if not self._deltas:

1713

options = ['fulltext']

1714

else:

1715

options = [self._parent_compression(node[3][1])]

1716

if node[2][0] == 'N':

1717

options.append('no-eol')

1718

return options

1719

1720

def get_parents(self, version_id):

1721

"""Return parents of specified version ignoring ghosts."""

1722

parents = list(self.iter_parents([version_id]))

1723

if not parents:

1724

# missing key

1725

raise errors.RevisionNotPresent(version_id, self)

1726

return parents[0][1]

1727

1728

def get_parents_with_ghosts(self, version_id):

1729

"""Return parents of specified version with ghosts."""

1730

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1731

check_present=True))

1732

if not self._parents:

1733

return ()

1734

return self._keys_to_version_ids(nodes[0][3][0])

1735

1736

def check_versions_present(self, version_ids):

1737

"""Check that all specified versions are present."""

1738

keys = self._version_ids_to_keys(version_ids)

1739

present = self._present_keys(keys)

1740

missing = keys.difference(present)

1741

if missing:

1742

raise RevisionNotPresent(missing.pop(), self)

1743

1744

def add_version(self, version_id, options, access_memo, parents):

1745

"""Add a version record to the index."""

1746

return self.add_versions(((version_id, options, access_memo, parents),))

1747

1748

def add_versions(self, versions):

1749

"""Add multiple versions to the index.

1750

1751

This function does not insert data into the Immutable GraphIndex

1752

backing the KnitGraphIndex, instead it prepares data for insertion by

1753

the caller and checks that it is safe to insert then calls

1754

self._add_callback with the prepared GraphIndex nodes.

1755

1756

:param versions: a list of tuples:

1757

(version_id, options, pos, size, parents).

1758

"""

1759

if not self._add_callback:

1760

raise errors.ReadOnlyError(self)

1761

# we hope there are no repositories with inconsistent parentage

1762

# anymore.

1763

# check for dups

1764

1765

keys = {}

1766

for (version_id, options, access_memo, parents) in versions:

1767

index, pos, size = access_memo

1768

key = (version_id, )

1769

parents = tuple((parent, ) for parent in parents)

1770

if 'no-eol' in options:

1771

value = 'N'

1772

else:

1773

value = ' '

1774

value += "%d %d" % (pos, size)

1775

if not self._deltas:

1776

if 'line-delta' in options:

1777

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1778

if self._parents:

1779

if self._deltas:

1780

if 'line-delta' in options:

1781

node_refs = (parents, (parents[0],))

1782

else:

1783

node_refs = (parents, ())

1784

else:

1785

node_refs = (parents, )

1786

else:

1787

if parents:

1788

raise KnitCorrupt(self, "attempt to add node with parents "

1789

"in parentless index.")

1790

node_refs = ()

1791

keys[key] = (value, node_refs)

1792

present_nodes = self._get_entries(keys)

1793

for (index, key, value, node_refs) in present_nodes:

1794

if (value, node_refs) != keys[key]:

1795

raise KnitCorrupt(self, "inconsistent details in add_versions"

1796

": %s %s" % ((value, node_refs), keys[key]))

1797

del keys[key]

1798

result = []

1799

if self._parents:

1800

for key, (value, node_refs) in keys.iteritems():

1801

result.append((key, value, node_refs))

1802

else:

1803

for key, (value, node_refs) in keys.iteritems():

1804

result.append((key, value))

1805

self._add_callback(result)

1806

1807

def _version_ids_to_keys(self, version_ids):

1808

return set((version_id, ) for version_id in version_ids)

1809

1810

1811

class _KnitAccess(object):

1812

"""Access to knit records in a .knit file."""

1813

1814

def __init__(self, transport, filename, _file_mode, _dir_mode,

1815

_need_to_create, _create_parent_dir):

1816

"""Create a _KnitAccess for accessing and inserting data.

1817

1818

:param transport: The transport the .knit is located on.

1819

:param filename: The filename of the .knit.

1820

"""

1821

self._transport = transport

1822

self._filename = filename

1823

self._file_mode = _file_mode

1824

self._dir_mode = _dir_mode

1825

self._need_to_create = _need_to_create

1826

self._create_parent_dir = _create_parent_dir

1827

1828

def add_raw_records(self, sizes, raw_data):

1829

"""Add raw knit bytes to a storage area.

1830

1831

The data is spooled to whereever the access method is storing data.

1832

1833

:param sizes: An iterable containing the size of each raw data segment.

1834

:param raw_data: A bytestring containing the data.

1835

:return: A list of memos to retrieve the record later. Each memo is a

1836

tuple - (index, pos, length), where the index field is always None

1837

for the .knit access method.

1838

"""

1839

assert type(raw_data) == str, \

1840

'data must be plain bytes was %s' % type(raw_data)

1841

if not self._need_to_create:

1842

base = self._transport.append_bytes(self._filename, raw_data)

1843

else:

1844

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1845

create_parent_dir=self._create_parent_dir,

1846

mode=self._file_mode,

1847

dir_mode=self._dir_mode)

1848

self._need_to_create = False

1849

base = 0

1850

result = []

1851

for size in sizes:

1852

result.append((None, base, size))

1853

base += size

1854

return result

1855

1856

def create(self):

1857

"""IFF this data access has its own storage area, initialise it.

1858

1859

:return: None.

1860

"""

1861

self._transport.put_bytes_non_atomic(self._filename, '',

1862

mode=self._file_mode)

1863

1864

def open_file(self):

1865

"""IFF this data access can be represented as a single file, open it.

1866

1867

For knits that are not mapped to a single file on disk this will

1868

always return None.

1869

1870

:return: None or a file handle.

1871

"""

1872

try:

1873

return self._transport.get(self._filename)

1874

except NoSuchFile:

1875

pass

1876

return None

1877

1878

def get_raw_records(self, memos_for_retrieval):

1879

"""Get the raw bytes for a records.

1880

1881

:param memos_for_retrieval: An iterable containing the (index, pos,

1882

length) memo for retrieving the bytes. The .knit method ignores

1883

the index as there is always only a single file.

1884

:return: An iterator over the bytes of the records.

1885

"""

1886

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1887

for pos, data in self._transport.readv(self._filename, read_vector):

1888

yield data

1889

1890

1891

class _PackAccess(object):

1892

"""Access to knit records via a collection of packs."""

1893

1894

def __init__(self, index_to_packs, writer=None):

1895

"""Create a _PackAccess object.

1896

1897

:param index_to_packs: A dict mapping index objects to the transport

1898

and file names for obtaining data.

1899

:param writer: A tuple (pack.ContainerWriter, write_index) which

1900

contains the pack to write, and the index that reads from it will

1901

be associated with.

1902

"""

1903

if writer:

1904

self.container_writer = writer[0]

1905

self.write_index = writer[1]

1906

else:

1907

self.container_writer = None

1908

self.write_index = None

1909

self.indices = index_to_packs

1910

1911

def add_raw_records(self, sizes, raw_data):

1912

"""Add raw knit bytes to a storage area.

1913

1914

The data is spooled to the container writer in one bytes-record per

1915

raw data item.

1916

1917

:param sizes: An iterable containing the size of each raw data segment.

1918

:param raw_data: A bytestring containing the data.

1919

:return: A list of memos to retrieve the record later. Each memo is a

1920

tuple - (index, pos, length), where the index field is the

1921

write_index object supplied to the PackAccess object.

1922

"""

1923

assert type(raw_data) == str, \

1924

'data must be plain bytes was %s' % type(raw_data)

1925

result = []

1926

offset = 0

1927

for size in sizes:

1928

p_offset, p_length = self.container_writer.add_bytes_record(

1929

raw_data[offset:offset+size], [])

1930

offset += size

1931

result.append((self.write_index, p_offset, p_length))

1932

return result

1933

1934

def create(self):

1935

"""Pack based knits do not get individually created."""

1936

1937

def get_raw_records(self, memos_for_retrieval):

1938

"""Get the raw bytes for a records.

1939

1940

:param memos_for_retrieval: An iterable containing the (index, pos,

1941

length) memo for retrieving the bytes. The Pack access method

1942

looks up the pack to use for a given record in its index_to_pack

1943

map.

1944

:return: An iterator over the bytes of the records.

1945

"""

1946

# first pass, group into same-index requests

1947

request_lists = []

1948

current_index = None

1949

for (index, offset, length) in memos_for_retrieval:

1950

if current_index == index:

1951

current_list.append((offset, length))

1952

else:

1953

if current_index is not None:

1954

request_lists.append((current_index, current_list))

1955

current_index = index

1956

current_list = [(offset, length)]

1957

# handle the last entry

1958

if current_index is not None:

1959

request_lists.append((current_index, current_list))

1960

for index, offsets in request_lists:

1961

transport, path = self.indices[index]

1962

reader = pack.make_readv_reader(transport, path, offsets)

1963

for names, read_func in reader.iter_records():

1964

yield read_func(None)

1965

1966

def open_file(self):

1967

"""Pack based knits have no single file."""

1968

return None

1969

1970

def set_writer(self, writer, index, (transport, packname)):

1971

"""Set a writer to use for adding data."""

1972

self.indices[index] = (transport, packname)

1973

self.container_writer = writer

1974

self.write_index = index

1975

1976

1977

class _KnitData(object):

1978

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1979

1980

The KnitData class provides the logic for parsing and using knit records,

1981

making use of an access method for the low level read and write operations.

1982

"""

1983

1984

def __init__(self, access):

1985

"""Create a KnitData object.

1986

1987

:param access: The access method to use. Access methods such as

1988

_KnitAccess manage the insertion of raw records and the subsequent

1989

retrieval of the same.

1990

"""

1991

self._access = access

1992

self._checked = False

1993

# TODO: jam 20060713 conceptually, this could spill to disk

1994

# if the cached size gets larger than a certain amount

1995

# but it complicates the model a bit, so for now just use

1996

# a simple dictionary

1997

self._cache = {}

1998

self._do_cache = False

1999

2000

def enable_cache(self):

2001

"""Enable caching of reads."""

2002

self._do_cache = True

2003

2004

def clear_cache(self):

2005

"""Clear the record cache."""

2006

self._do_cache = False

2007

self._cache = {}

2008

2009

def _open_file(self):

2010

return self._access.open_file()

2011

2012

def _record_to_data(self, version_id, digest, lines):

2013

"""Convert version_id, digest, lines into a raw data block.

2014

2015

:return: (len, a StringIO instance with the raw data ready to read.)

2016

"""

2017

sio = StringIO()

2018

data_file = GzipFile(None, mode='wb', fileobj=sio)

2019

2020

assert isinstance(version_id, str)

2021

data_file.writelines(chain(

2022

["version %s %d %s\n" % (version_id,

2023

len(lines),

2024

digest)],

2025

lines,

2026

["end %s\n" % version_id]))

2027

data_file.close()

2028

length= sio.tell()

2029

2030

sio.seek(0)

2031

return length, sio

2032

2033

def add_raw_records(self, sizes, raw_data):

2034

"""Append a prepared record to the data file.

2035

2036

:param sizes: An iterable containing the size of each raw data segment.

2037

:param raw_data: A bytestring containing the data.

2038

:return: a list of index data for the way the data was stored.

2039

See the access method add_raw_records documentation for more

2040

details.

2041

"""

2042

return self._access.add_raw_records(sizes, raw_data)

2043

2044

def add_record(self, version_id, digest, lines):

2045

"""Write new text record to disk.

2046

2047

Returns index data for retrieving it later, as per add_raw_records.

2048

"""

2049

size, sio = self._record_to_data(version_id, digest, lines)

2050

result = self.add_raw_records([size], sio.getvalue())

2051

if self._do_cache:

2052

self._cache[version_id] = sio.getvalue()

2053

return result[0]

2054

2055

def _parse_record_header(self, version_id, raw_data):

2056

"""Parse a record header for consistency.

2057

2058

:return: the header and the decompressor stream.

2059

as (stream, header_record)

2060

"""

2061

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2062

try:

2063

rec = self._check_header(version_id, df.readline())

2064

except Exception, e:

2065

raise KnitCorrupt(self._access,

2066

"While reading {%s} got %s(%s)"

2067

% (version_id, e.__class__.__name__, str(e)))

2068

return df, rec

2069

2070

def _check_header(self, version_id, line):

2071

rec = line.split()

2072

if len(rec) != 4:

2073

raise KnitCorrupt(self._access,

2074

'unexpected number of elements in record header')

2075

if rec[1] != version_id:

2076

raise KnitCorrupt(self._access,

2077

'unexpected version, wanted %r, got %r'

2078

% (version_id, rec[1]))

2079

return rec

2080

2081

def _parse_record(self, version_id, data):

2082

# profiling notes:

2083

# 4168 calls in 2880 217 internal

2084

# 4168 calls to _parse_record_header in 2121

2085

# 4168 calls to readlines in 330

2086

df = GzipFile(mode='rb', fileobj=StringIO(data))

2087

2088

try:

2089

record_contents = df.readlines()

2090

except Exception, e:

2091

raise KnitCorrupt(self._access,

2092

"While reading {%s} got %s(%s)"

2093

% (version_id, e.__class__.__name__, str(e)))

2094

header = record_contents.pop(0)

2095

rec = self._check_header(version_id, header)

2096

2097

last_line = record_contents.pop()

2098

if len(record_contents) != int(rec[2]):

2099

raise KnitCorrupt(self._access,

2100

'incorrect number of lines %s != %s'

2101

' for version {%s}'

2102

% (len(record_contents), int(rec[2]),

2103

version_id))

2104

if last_line != 'end %s\n' % rec[1]:

2105

raise KnitCorrupt(self._access,

2106

'unexpected version end line %r, wanted %r'

2107

% (last_line, version_id))

2108

df.close()

2109

return record_contents, rec[3]

2110

2111

def read_records_iter_raw(self, records):

2112

"""Read text records from data file and yield raw data.

2113

2114

This unpacks enough of the text record to validate the id is

2115

as expected but thats all.

2116

"""

2117

# setup an iterator of the external records:

2118

# uses readv so nice and fast we hope.

2119

if len(records):

2120

# grab the disk data needed.

2121

if self._cache:

2122

# Don't check _cache if it is empty

2123

needed_offsets = [index_memo for version_id, index_memo

2124

in records

2125

if version_id not in self._cache]

2126

else:

2127

needed_offsets = [index_memo for version_id, index_memo

2128

in records]

2129

2130

raw_records = self._access.get_raw_records(needed_offsets)

2131

2132

for version_id, index_memo in records:

2133

if version_id in self._cache:

2134

# This data has already been validated

2135

data = self._cache[version_id]

2136

else:

2137

data = raw_records.next()

2138

if self._do_cache:

2139

self._cache[version_id] = data

2140

2141

# validate the header

2142

df, rec = self._parse_record_header(version_id, data)

2143

df.close()

2144

yield version_id, data

2145

2146

def read_records_iter(self, records):

2147

"""Read text records from data file and yield result.

2148

2149

The result will be returned in whatever is the fastest to read.

2150

Not by the order requested. Also, multiple requests for the same

2151

record will only yield 1 response.

2152

:param records: A list of (version_id, pos, len) entries

2153

:return: Yields (version_id, contents, digest) in the order

2154

read, not the order requested

2155

"""

2156

if not records:

2157

return

2158

2159

if self._cache:

2160

# Skip records we have alread seen

2161

yielded_records = set()

2162

needed_records = set()

2163

for record in records:

2164

if record[0] in self._cache:

2165

if record[0] in yielded_records:

2166

continue

2167

yielded_records.add(record[0])

2168

data = self._cache[record[0]]

2169

content, digest = self._parse_record(record[0], data)

2170

yield (record[0], content, digest)

2171

else:

2172

needed_records.add(record)

2173

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2174

else:

2175

needed_records = sorted(set(records), key=operator.itemgetter(1))

2176

2177

if not needed_records:

2178

return

2179

2180

# The transport optimizes the fetching as well

2181

# (ie, reads continuous ranges.)

2182

raw_data = self._access.get_raw_records(

2183

[index_memo for version_id, index_memo in needed_records])

2184

2185

for (version_id, index_memo), data in \

2186

izip(iter(needed_records), raw_data):

2187

content, digest = self._parse_record(version_id, data)

2188

if self._do_cache:

2189

self._cache[version_id] = data

2190

yield version_id, content, digest

2191

2192

def read_records(self, records):

2193

"""Read records into a dictionary."""

2194

components = {}

2195

for record_id, content, digest in \

2196

self.read_records_iter(records):

2197

components[record_id] = (content, digest)

2198

return components

2199

2200

2201

class InterKnit(InterVersionedFile):

2202

"""Optimised code paths for knit to knit operations."""

2203

2204

_matching_file_from_factory = KnitVersionedFile

2205

_matching_file_to_factory = KnitVersionedFile

2206

2207

@staticmethod

2208

def is_compatible(source, target):

2209

"""Be compatible with knits. """

2210

try:

2211

return (isinstance(source, KnitVersionedFile) and

2212

isinstance(target, KnitVersionedFile))

2213

except AttributeError:

2214

return False

2215

2216

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2217

"""See InterVersionedFile.join."""

2218

assert isinstance(self.source, KnitVersionedFile)

2219

assert isinstance(self.target, KnitVersionedFile)

2220

2221

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2222

2223

if not version_ids:

2224

return 0

2225

2226

pb = ui.ui_factory.nested_progress_bar()

2227

try:

2228

version_ids = list(version_ids)

2229

if None in version_ids:

2230

version_ids.remove(None)

2231

2232

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2233

this_versions = set(self.target._index.get_versions())

2234

needed_versions = self.source_ancestry - this_versions

2235

cross_check_versions = self.source_ancestry.intersection(this_versions)

2236

mismatched_versions = set()

2237

for version in cross_check_versions:

2238

# scan to include needed parents.

2239

n1 = set(self.target.get_parents_with_ghosts(version))

2240

n2 = set(self.source.get_parents_with_ghosts(version))

2241

if n1 != n2:

2242

# FIXME TEST this check for cycles being introduced works

2243

# the logic is we have a cycle if in our graph we are an

2244

# ancestor of any of the n2 revisions.

2245

for parent in n2:

2246

if parent in n1:

2247

# safe

2248

continue

2249

else:

2250

parent_ancestors = self.source.get_ancestry(parent)

2251

if version in parent_ancestors:

2252

raise errors.GraphCycleError([parent, version])

2253

# ensure this parent will be available later.

2254

new_parents = n2.difference(n1)

2255

needed_versions.update(new_parents.difference(this_versions))

2256

mismatched_versions.add(version)

2257

2258

if not needed_versions and not mismatched_versions:

2259

return 0

2260

full_list = topo_sort(self.source.get_graph())

2261

2262

version_list = [i for i in full_list if (not self.target.has_version(i)

2263

and i in needed_versions)]

2264

2265

# plan the join:

2266

copy_queue = []

2267

copy_queue_records = []

2268

copy_set = set()

2269

for version_id in version_list:

2270

options = self.source._index.get_options(version_id)

2271

parents = self.source._index.get_parents_with_ghosts(version_id)

2272

# check that its will be a consistent copy:

2273

for parent in parents:

2274

# if source has the parent, we must :

2275

# * already have it or

2276

# * have it scheduled already

2277

# otherwise we don't care

2278

assert (self.target.has_version(parent) or

2279

parent in copy_set or

2280

not self.source.has_version(parent))

2281

index_memo = self.source._index.get_position(version_id)

2282

copy_queue_records.append((version_id, index_memo))

2283

copy_queue.append((version_id, options, parents))

2284

copy_set.add(version_id)

2285

2286

# data suck the join:

2287

count = 0

2288

total = len(version_list)

2289

raw_datum = []

2290

raw_records = []

2291

for (version_id, raw_data), \

2292

(version_id2, options, parents) in \

2293

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2294

copy_queue):

2295

assert version_id == version_id2, 'logic error, inconsistent results'

2296

count = count + 1

2297

pb.update("Joining knit", count, total)

2298

raw_records.append((version_id, options, parents, len(raw_data)))

2299

raw_datum.append(raw_data)

2300

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2301

2302

for version in mismatched_versions:

2303

# FIXME RBC 20060309 is this needed?

2304

n1 = set(self.target.get_parents_with_ghosts(version))

2305

n2 = set(self.source.get_parents_with_ghosts(version))

2306

# write a combined record to our history preserving the current

2307

# parents as first in the list

2308

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2309

self.target.fix_parents(version, new_parents)

2310

return count

2311

finally:

2312

pb.finished()

2313

2314

2315

InterVersionedFile.register_optimiser(InterKnit)

2316

2317

2318

class WeaveToKnit(InterVersionedFile):

2319

"""Optimised code paths for weave to knit operations."""

2320

2321

_matching_file_from_factory = bzrlib.weave.WeaveFile

2322

_matching_file_to_factory = KnitVersionedFile

2323

2324

@staticmethod

2325

def is_compatible(source, target):

2326

"""Be compatible with weaves to knits."""

2327

try:

2328

return (isinstance(source, bzrlib.weave.Weave) and

2329

isinstance(target, KnitVersionedFile))

2330

except AttributeError:

2331

return False

2332

2333

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2334

"""See InterVersionedFile.join."""

2335

assert isinstance(self.source, bzrlib.weave.Weave)

2336

assert isinstance(self.target, KnitVersionedFile)

2337

2338

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2339

2340

if not version_ids:

2341

return 0

2342

2343

pb = ui.ui_factory.nested_progress_bar()

2344

try:

2345

version_ids = list(version_ids)

2346

2347

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2348

this_versions = set(self.target._index.get_versions())

2349

needed_versions = self.source_ancestry - this_versions

2350

cross_check_versions = self.source_ancestry.intersection(this_versions)

2351

mismatched_versions = set()

2352

for version in cross_check_versions:

2353

# scan to include needed parents.

2354

n1 = set(self.target.get_parents_with_ghosts(version))

2355

n2 = set(self.source.get_parents(version))

2356

# if all of n2's parents are in n1, then its fine.

2357

if n2.difference(n1):

2358

# FIXME TEST this check for cycles being introduced works

2359

# the logic is we have a cycle if in our graph we are an

2360

# ancestor of any of the n2 revisions.

2361

for parent in n2:

2362

if parent in n1:

2363

# safe

2364

continue

2365

else:

2366

parent_ancestors = self.source.get_ancestry(parent)

2367

if version in parent_ancestors:

2368

raise errors.GraphCycleError([parent, version])

2369

# ensure this parent will be available later.

2370

new_parents = n2.difference(n1)

2371

needed_versions.update(new_parents.difference(this_versions))

2372

mismatched_versions.add(version)

2373

2374

if not needed_versions and not mismatched_versions:

2375

return 0

2376

full_list = topo_sort(self.source.get_graph())

2377

2378

version_list = [i for i in full_list if (not self.target.has_version(i)

2379

and i in needed_versions)]

2380

2381

# do the join:

2382

count = 0

2383

total = len(version_list)

2384

for version_id in version_list:

2385

pb.update("Converting to knit", count, total)

2386

parents = self.source.get_parents(version_id)

2387

# check that its will be a consistent copy:

2388

for parent in parents:

2389

# if source has the parent, we must already have it

2390

assert (self.target.has_version(parent))

2391

self.target.add_lines(

2392

version_id, parents, self.source.get_lines(version_id))

2393

count = count + 1

2394

2395

for version in mismatched_versions:

2396

# FIXME RBC 20060309 is this needed?

2397

n1 = set(self.target.get_parents_with_ghosts(version))

2398

n2 = set(self.source.get_parents(version))

2399

# write a combined record to our history preserving the current

2400

# parents as first in the list

2401

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2402

self.target.fix_parents(version, new_parents)

2403

return count

2404

finally:

2405

pb.finished()

2406

2407

2408

InterVersionedFile.register_optimiser(WeaveToKnit)

2409

2410

2411

class KnitSequenceMatcher(difflib.SequenceMatcher):

2412

"""Knit tuned sequence matcher.

2413

2414

This is based on profiling of difflib which indicated some improvements

2415

for our usage pattern.

2416

"""

2417

2418

def find_longest_match(self, alo, ahi, blo, bhi):

2419

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2420

2421

If isjunk is not defined:

2422

2423

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2424

alo <= i <= i+k <= ahi

2425

blo <= j <= j+k <= bhi

2426

and for all (i',j',k') meeting those conditions,

2427

k >= k'

2428

i <= i'

2429

and if i == i', j <= j'

2430

2431

In other words, of all maximal matching blocks, return one that

2432

starts earliest in a, and of all those maximal matching blocks that

2433

start earliest in a, return the one that starts earliest in b.

2434

2435

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2436

>>> s.find_longest_match(0, 5, 0, 9)

2437

(0, 4, 5)

2438

2439

If isjunk is defined, first the longest matching block is

2440

determined as above, but with the additional restriction that no

2441

junk element appears in the block. Then that block is extended as

2442

far as possible by matching (only) junk elements on both sides. So

2443

the resulting block never matches on junk except as identical junk

2444

happens to be adjacent to an "interesting" match.

2445

2446

Here's the same example as before, but considering blanks to be

2447

junk. That prevents " abcd" from matching the " abcd" at the tail

2448

end of the second sequence directly. Instead only the "abcd" can

2449

match, and matches the leftmost "abcd" in the second sequence:

2450

2451

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2452

>>> s.find_longest_match(0, 5, 0, 9)

2453

(1, 0, 4)

2454

2455

If no blocks match, return (alo, blo, 0).

2456

2457

>>> s = SequenceMatcher(None, "ab", "c")

2458

>>> s.find_longest_match(0, 2, 0, 1)

2459

(0, 0, 0)

2460

"""

2461

2462

# CAUTION: stripping common prefix or suffix would be incorrect.

2463

# E.g.,

2464

# ab

2465

# acab

2466

# Longest matching block is "ab", but if common prefix is

2467

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2468

# strip, so ends up claiming that ab is changed to acab by

2469

# inserting "ca" in the middle. That's minimal but unintuitive:

2470

# "it's obvious" that someone inserted "ac" at the front.

2471

# Windiff ends up at the same place as diff, but by pairing up

2472

# the unique 'b's and then matching the first two 'a's.

2473

2474

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2475

besti, bestj, bestsize = alo, blo, 0

2476

# find longest junk-free match

2477

# during an iteration of the loop, j2len[j] = length of longest

2478

# junk-free match ending with a[i-1] and b[j]

2479

j2len = {}

2480

# nothing = []

2481

b2jget = b2j.get

2482

for i in xrange(alo, ahi):

2483

# look at all instances of a[i] in b; note that because

2484

# b2j has no junk keys, the loop is skipped if a[i] is junk

2485

j2lenget = j2len.get

2486

newj2len = {}

2487

2488

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2489

# following improvement

2490

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2491

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2492

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2493

# to

2494

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2495

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2496

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2497

2498

try:

2499

js = b2j[a[i]]

2500

except KeyError:

2501

pass

2502

else:

2503

for j in js:

2504

# a[i] matches b[j]

2505

if j >= blo:

2506

if j >= bhi:

2507

break

2508

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2509

if k > bestsize:

2510

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2511

j2len = newj2len

2512

2513

# Extend the best by non-junk elements on each end. In particular,

2514

# "popular" non-junk elements aren't in b2j, which greatly speeds

2515

# the inner loop above, but also means "the best" match so far

2516

# doesn't contain any junk *or* popular non-junk elements.

2517

while besti > alo and bestj > blo and \

2518

not isbjunk(b[bestj-1]) and \

2519

a[besti-1] == b[bestj-1]:

2520

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2521

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2522

not isbjunk(b[bestj+bestsize]) and \

2523

a[besti+bestsize] == b[bestj+bestsize]:

2524

bestsize += 1

2525

2526

# Now that we have a wholly interesting match (albeit possibly

2527

# empty!), we may as well suck up the matching junk on each

2528

# side of it too. Can't think of a good reason not to, and it

2529

# saves post-processing the (possibly considerable) expense of

2530

# figuring out what to do with it. In the case of an empty

2531

# interesting match, this is clearly the right thing to do,

2532

# because no other kind of match is possible in the regions.

2533

while besti > alo and bestj > blo and \

2534

isbjunk(b[bestj-1]) and \

2535

a[besti-1] == b[bestj-1]:

2536

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2537

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2538

isbjunk(b[bestj+bestsize]) and \

2539

a[besti+bestsize] == b[bestj+bestsize]:

2540

bestsize = bestsize + 1

2541

2542

return besti, bestj, bestsize

2543

2544

2545

try:

2546

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2547

except ImportError:

2548

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »