/brz/remove-bazaar : revision 2840.1.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Ian Clatworthy
Date: 2007-09-21 00:22:35 UTC
mfrom: (2825.5.2 record-entry-returns-status)
mto: This revision was merged to the branch mainline in revision 2842.
Revision ID: ian.clatworthy@internode.on.net-20070921002235-u5lbs3wog6na1qxg

faster pointless commit detection (Robert Collins)

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.tuned_gzip import GzipFile

104

from bzrlib.osutils import (

105

contains_whitespace,

106

contains_linebreaks,

107

sha_strings,

108

)

109

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

110

from bzrlib.tsort import topo_sort

111

import bzrlib.ui

112

import bzrlib.weave

113

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

114

115

116

# TODO: Split out code specific to this format into an associated object.

117

118

# TODO: Can we put in some kind of value to check that the index and data

119

# files belong together?

120

121

# TODO: accommodate binaries, perhaps by storing a byte count

122

123

# TODO: function to check whole file

124

125

# TODO: atomically append data, then measure backwards from the cursor

126

# position after writing to work out where it was located. we may need to

127

# bypass python file buffering.

128

129

DATA_SUFFIX = '.knit'

130

INDEX_SUFFIX = '.kndx'

131

132

133

class KnitContent(object):

134

"""Content of a knit version to which deltas can be applied."""

135

136

def annotate(self):

137

"""Return a list of (origin, text) tuples."""

138

return list(self.annotate_iter())

139

140

def line_delta_iter(self, new_lines):

141

"""Generate line-based delta from this content to new_lines."""

142

new_texts = new_lines.text()

143

old_texts = self.text()

144

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

145

for tag, i1, i2, j1, j2 in s.get_opcodes():

146

if tag == 'equal':

147

continue

148

# ofrom, oto, length, data

149

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

150

151

def line_delta(self, new_lines):

152

return list(self.line_delta_iter(new_lines))

153

154

@staticmethod

155

def get_line_delta_blocks(knit_delta, source, target):

156

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

157

target_len = len(target)

158

s_pos = 0

159

t_pos = 0

160

for s_begin, s_end, t_len, new_text in knit_delta:

161

true_n = s_begin - s_pos

162

n = true_n

163

if n > 0:

164

# knit deltas do not provide reliable info about whether the

165

# last line of a file matches, due to eol handling.

166

if source[s_pos + n -1] != target[t_pos + n -1]:

167

n-=1

168

if n > 0:

169

yield s_pos, t_pos, n

170

t_pos += t_len + true_n

171

s_pos = s_end

172

n = target_len - t_pos

173

if n > 0:

174

if source[s_pos + n -1] != target[t_pos + n -1]:

175

n-=1

176

if n > 0:

177

yield s_pos, t_pos, n

178

yield s_pos + (target_len - t_pos), target_len, 0

179

180

181

class AnnotatedKnitContent(KnitContent):

182

"""Annotated content."""

183

184

def __init__(self, lines):

185

self._lines = lines

186

187

def annotate_iter(self):

188

"""Yield tuples of (origin, text) for each content line."""

189

return iter(self._lines)

190

191

def strip_last_line_newline(self):

192

line = self._lines[-1][1].rstrip('\n')

193

self._lines[-1] = (self._lines[-1][0], line)

194

195

def text(self):

196

return [text for origin, text in self._lines]

197

198

def copy(self):

199

return AnnotatedKnitContent(self._lines[:])

200

201

202

class PlainKnitContent(KnitContent):

203

"""Unannotated content.

204

205

When annotate[_iter] is called on this content, the same version is reported

206

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

207

objects.

208

"""

209

210

def __init__(self, lines, version_id):

211

self._lines = lines

212

self._version_id = version_id

213

214

def annotate_iter(self):

215

"""Yield tuples of (origin, text) for each content line."""

216

for line in self._lines:

217

yield self._version_id, line

218

219

def copy(self):

220

return PlainKnitContent(self._lines[:], self._version_id)

221

222

def strip_last_line_newline(self):

223

self._lines[-1] = self._lines[-1].rstrip('\n')

224

225

def text(self):

226

return self._lines

227

228

229

class KnitAnnotateFactory(object):

230

"""Factory for creating annotated Content objects."""

231

232

annotated = True

233

234

def make(self, lines, version_id):

235

num_lines = len(lines)

236

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

237

238

def parse_fulltext(self, content, version_id):

239

"""Convert fulltext to internal representation

240

241

fulltext content is of the format

242

revid(utf8) plaintext\n

243

internal representation is of the format:

244

(revid, plaintext)

245

"""

246

# TODO: jam 20070209 The tests expect this to be returned as tuples,

247

# but the code itself doesn't really depend on that.

248

# Figure out a way to not require the overhead of turning the

249

# list back into tuples.

250

lines = [tuple(line.split(' ', 1)) for line in content]

251

return AnnotatedKnitContent(lines)

252

253

def parse_line_delta_iter(self, lines):

254

return iter(self.parse_line_delta(lines))

255

256

def parse_line_delta(self, lines, version_id):

257

"""Convert a line based delta into internal representation.

258

259

line delta is in the form of:

260

intstart intend intcount

261

1..count lines:

262

revid(utf8) newline\n

263

internal representation is

264

(start, end, count, [1..count tuples (revid, newline)])

265

"""

266

result = []

267

lines = iter(lines)

268

next = lines.next

269

270

cache = {}

271

def cache_and_return(line):

272

origin, text = line.split(' ', 1)

273

return cache.setdefault(origin, origin), text

274

275

# walk through the lines parsing.

276

for header in lines:

277

start, end, count = [int(n) for n in header.split(',')]

278

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

279

result.append((start, end, count, contents))

280

return result

281

282

def get_fulltext_content(self, lines):

283

"""Extract just the content lines from a fulltext."""

284

return (line.split(' ', 1)[1] for line in lines)

285

286

def get_linedelta_content(self, lines):

287

"""Extract just the content from a line delta.

288

289

This doesn't return all of the extra information stored in a delta.

290

Only the actual content lines.

291

"""

292

lines = iter(lines)

293

next = lines.next

294

for header in lines:

295

header = header.split(',')

296

count = int(header[2])

297

for i in xrange(count):

298

origin, text = next().split(' ', 1)

299

yield text

300

301

def lower_fulltext(self, content):

302

"""convert a fulltext content record into a serializable form.

303

304

see parse_fulltext which this inverts.

305

"""

306

# TODO: jam 20070209 We only do the caching thing to make sure that

307

# the origin is a valid utf-8 line, eventually we could remove it

308

return ['%s %s' % (o, t) for o, t in content._lines]

309

310

def lower_line_delta(self, delta):

311

"""convert a delta into a serializable form.

312

313

See parse_line_delta which this inverts.

314

"""

315

# TODO: jam 20070209 We only do the caching thing to make sure that

316

# the origin is a valid utf-8 line, eventually we could remove it

317

out = []

318

for start, end, c, lines in delta:

319

out.append('%d,%d,%d\n' % (start, end, c))

320

out.extend(origin + ' ' + text

321

for origin, text in lines)

322

return out

323

324

def annotate_iter(self, knit, version_id):

325

content = knit._get_content(version_id)

326

return content.annotate_iter()

327

328

329

class KnitPlainFactory(object):

330

"""Factory for creating plain Content objects."""

331

332

annotated = False

333

334

def make(self, lines, version_id):

335

return PlainKnitContent(lines, version_id)

336

337

def parse_fulltext(self, content, version_id):

338

"""This parses an unannotated fulltext.

339

340

Note that this is not a noop - the internal representation

341

has (versionid, line) - its just a constant versionid.

342

"""

343

return self.make(content, version_id)

344

345

def parse_line_delta_iter(self, lines, version_id):

346

cur = 0

347

num_lines = len(lines)

348

while cur < num_lines:

349

header = lines[cur]

350

cur += 1

351

start, end, c = [int(n) for n in header.split(',')]

352

yield start, end, c, lines[cur:cur+c]

353

cur += c

354

355

def parse_line_delta(self, lines, version_id):

356

return list(self.parse_line_delta_iter(lines, version_id))

357

358

def get_fulltext_content(self, lines):

359

"""Extract just the content lines from a fulltext."""

360

return iter(lines)

361

362

def get_linedelta_content(self, lines):

363

"""Extract just the content from a line delta.

364

365

This doesn't return all of the extra information stored in a delta.

366

Only the actual content lines.

367

"""

368

lines = iter(lines)

369

next = lines.next

370

for header in lines:

371

header = header.split(',')

372

count = int(header[2])

373

for i in xrange(count):

374

yield next()

375

376

def lower_fulltext(self, content):

377

return content.text()

378

379

def lower_line_delta(self, delta):

380

out = []

381

for start, end, c, lines in delta:

382

out.append('%d,%d,%d\n' % (start, end, c))

383

out.extend(lines)

384

return out

385

386

def annotate_iter(self, knit, version_id):

387

return annotate_knit(knit, version_id)

388

389

390

def make_empty_knit(transport, relpath):

391

"""Construct a empty knit at the specified location."""

392

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

393

394

395

class KnitVersionedFile(VersionedFile):

396

"""Weave-like structure with faster random access.

397

398

A knit stores a number of texts and a summary of the relationships

399

between them. Texts are identified by a string version-id. Texts

400

are normally stored and retrieved as a series of lines, but can

401

also be passed as single strings.

402

403

Lines are stored with the trailing newline (if any) included, to

404

avoid special cases for files with no final newline. Lines are

405

composed of 8-bit characters, not unicode. The combination of

406

these approaches should mean any 'binary' file can be safely

407

stored and retrieved.

408

"""

409

410

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

411

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

412

create=False, create_parent_dir=False, delay_create=False,

413

dir_mode=None, index=None, access_method=None):

414

"""Construct a knit at location specified by relpath.

415

416

:param create: If not True, only open an existing knit.

417

:param create_parent_dir: If True, create the parent directory if

418

creating the file fails. (This is used for stores with

419

hash-prefixes that may not exist yet)

420

:param delay_create: The calling code is aware that the knit won't

421

actually be created until the first data is stored.

422

:param index: An index to use for the knit.

423

"""

424

if deprecated_passed(basis_knit):

425

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

426

" deprecated as of bzr 0.9.",

427

DeprecationWarning, stacklevel=2)

428

if access_mode is None:

429

access_mode = 'w'

430

super(KnitVersionedFile, self).__init__(access_mode)

431

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

432

self.transport = transport

433

self.filename = relpath

434

self.factory = factory or KnitAnnotateFactory()

435

self.writable = (access_mode == 'w')

436

self.delta = delta

437

438

self._max_delta_chain = 200

439

440

if index is None:

441

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

442

access_mode, create=create, file_mode=file_mode,

443

create_parent_dir=create_parent_dir, delay_create=delay_create,

444

dir_mode=dir_mode)

445

else:

446

self._index = index

447

if access_method is None:

448

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

449

((create and not len(self)) and delay_create), create_parent_dir)

450

else:

451

_access = access_method

452

if create and not len(self) and not delay_create:

453

_access.create()

454

self._data = _KnitData(_access)

455

456

def __repr__(self):

457

return '%s(%s)' % (self.__class__.__name__,

458

self.transport.abspath(self.filename))

459

460

def _check_should_delta(self, first_parents):

461

"""Iterate back through the parent listing, looking for a fulltext.

462

463

This is used when we want to decide whether to add a delta or a new

464

fulltext. It searches for _max_delta_chain parents. When it finds a

465

fulltext parent, it sees if the total size of the deltas leading up to

466

it is large enough to indicate that we want a new full text anyway.

467

468

Return True if we should create a new delta, False if we should use a

469

full text.

470

"""

471

delta_size = 0

472

fulltext_size = None

473

delta_parents = first_parents

474

for count in xrange(self._max_delta_chain):

475

parent = delta_parents[0]

476

method = self._index.get_method(parent)

477

index, pos, size = self._index.get_position(parent)

478

if method == 'fulltext':

479

fulltext_size = size

480

break

481

delta_size += size

482

delta_parents = self._index.get_parents(parent)

483

else:

484

# We couldn't find a fulltext, so we must create a new one

485

return False

486

487

return fulltext_size > delta_size

488

489

def _add_raw_records(self, records, data):

490

"""Add all the records 'records' with data pre-joined in 'data'.

491

492

:param records: A list of tuples(version_id, options, parents, size).

493

:param data: The data for the records. When it is written, the records

494

are adjusted to have pos pointing into data by the sum of

495

the preceding records sizes.

496

"""

497

# write all the data

498

raw_record_sizes = [record[3] for record in records]

499

positions = self._data.add_raw_records(raw_record_sizes, data)

500

offset = 0

501

index_entries = []

502

for (version_id, options, parents, size), access_memo in zip(

503

records, positions):

504

index_entries.append((version_id, options, access_memo, parents))

505

if self._data._do_cache:

506

self._data._cache[version_id] = data[offset:offset+size]

507

offset += size

508

self._index.add_versions(index_entries)

509

510

def enable_cache(self):

511

"""Start caching data for this knit"""

512

self._data.enable_cache()

513

514

def clear_cache(self):

515

"""Clear the data cache only."""

516

self._data.clear_cache()

517

518

def copy_to(self, name, transport):

519

"""See VersionedFile.copy_to()."""

520

# copy the current index to a temp index to avoid racing with local

521

# writes

522

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

523

self.transport.get(self._index._filename))

524

# copy the data file

525

f = self._data._open_file()

526

try:

527

transport.put_file(name + DATA_SUFFIX, f)

528

finally:

529

f.close()

530

# move the copied index into place

531

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

532

533

def create_empty(self, name, transport, mode=None):

534

return KnitVersionedFile(name, transport, factory=self.factory,

535

delta=self.delta, create=True)

536

537

def get_data_stream(self, required_versions):

538

"""Get a data stream for the specified versions.

539

540

Versions may be returned in any order, not necessarily the order

541

specified.

542

543

:param required_versions: The exact set of versions to be extracted.

544

Unlike some other knit methods, this is not used to generate a

545

transitive closure, rather it is used precisely as given.

546

547

:returns: format_signature, list of (version, options, length, parents),

548

reader_callable.

549

"""

550

required_versions = set([osutils.safe_revision_id(v) for v in

551

required_versions])

552

# we don't care about inclusions, the caller cares.

553

# but we need to setup a list of records to visit.

554

for version_id in required_versions:

555

if not self.has_version(version_id):

556

raise RevisionNotPresent(version_id, self.filename)

557

# Pick the desired versions out of the index in oldest-to-newest order

558

version_list = []

559

for version_id in self.versions():

560

if version_id in required_versions:

561

version_list.append(version_id)

562

563

# create the list of version information for the result

564

copy_queue_records = []

565

copy_set = set()

566

result_version_list = []

567

for version_id in version_list:

568

options = self._index.get_options(version_id)

569

parents = self._index.get_parents_with_ghosts(version_id)

570

index_memo = self._index.get_position(version_id)

571

copy_queue_records.append((version_id, index_memo))

572

none, data_pos, data_size = index_memo

573

copy_set.add(version_id)

574

# version, options, length, parents

575

result_version_list.append((version_id, options, data_size,

576

parents))

577

578

# Read the compressed record data.

579

# XXX:

580

# From here down to the return should really be logic in the returned

581

# callable -- in a class that adapts read_records_iter_raw to read

582

# requests.

583

raw_datum = []

584

for (version_id, raw_data), \

585

(version_id2, options, _, parents) in \

586

izip(self._data.read_records_iter_raw(copy_queue_records),

587

result_version_list):

588

assert version_id == version_id2, 'logic error, inconsistent results'

589

raw_datum.append(raw_data)

590

pseudo_file = StringIO(''.join(raw_datum))

591

def read(length):

592

if length is None:

593

return pseudo_file.read()

594

else:

595

return pseudo_file.read(length)

596

return (self.get_format_signature(), result_version_list, read)

597

598

def _extract_blocks(self, version_id, source, target):

599

if self._index.get_method(version_id) != 'line-delta':

600

return None

601

parent, sha1, noeol, delta = self.get_delta(version_id)

602

return KnitContent.get_line_delta_blocks(delta, source, target)

603

604

def get_delta(self, version_id):

605

"""Get a delta for constructing version from some other version."""

606

version_id = osutils.safe_revision_id(version_id)

607

self.check_not_reserved_id(version_id)

608

parents = self.get_parents(version_id)

609

if len(parents):

610

parent = parents[0]

611

else:

612

parent = None

613

index_memo = self._index.get_position(version_id)

614

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

615

noeol = 'no-eol' in self._index.get_options(version_id)

616

if 'fulltext' == self._index.get_method(version_id):

617

new_content = self.factory.parse_fulltext(data, version_id)

618

if parent is not None:

619

reference_content = self._get_content(parent)

620

old_texts = reference_content.text()

621

else:

622

old_texts = []

623

new_texts = new_content.text()

624

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

625

new_texts)

626

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

627

else:

628

delta = self.factory.parse_line_delta(data, version_id)

629

return parent, sha1, noeol, delta

630

631

def get_format_signature(self):

632

"""See VersionedFile.get_format_signature()."""

633

if self.factory.annotated:

634

annotated_part = "annotated"

635

else:

636

annotated_part = "plain"

637

return "knit-%s" % (annotated_part,)

638

639

def get_graph_with_ghosts(self):

640

"""See VersionedFile.get_graph_with_ghosts()."""

641

graph_items = self._index.get_graph()

642

return dict(graph_items)

643

644

def get_sha1(self, version_id):

645

return self.get_sha1s([version_id])[0]

646

647

def get_sha1s(self, version_ids):

648

"""See VersionedFile.get_sha1()."""

649

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

650

record_map = self._get_record_map(version_ids)

651

# record entry 2 is the 'digest'.

652

return [record_map[v][2] for v in version_ids]

653

654

@staticmethod

655

def get_suffixes():

656

"""See VersionedFile.get_suffixes()."""

657

return [DATA_SUFFIX, INDEX_SUFFIX]

658

659

def has_ghost(self, version_id):

660

"""True if there is a ghost reference in the file to version_id."""

661

version_id = osutils.safe_revision_id(version_id)

662

# maybe we have it

663

if self.has_version(version_id):

664

return False

665

# optimisable if needed by memoising the _ghosts set.

666

items = self._index.get_graph()

667

for node, parents in items:

668

for parent in parents:

669

if parent not in self._index._cache:

670

if parent == version_id:

671

return True

672

return False

673

674

def insert_data_stream(self, (format, data_list, reader_callable)):

675

"""Insert knit records from a data stream into this knit.

676

677

If a version in the stream is already present in this knit, it will not

678

be inserted a second time. It will be checked for consistency with the

679

stored version however, and may cause a KnitCorrupt error to be raised

680

if the data in the stream disagrees with the already stored data.

681

682

:seealso: get_data_stream

683

"""

684

if format != self.get_format_signature():

685

trace.mutter('incompatible format signature inserting to %r', self)

686

raise KnitDataStreamIncompatible(

687

format, self.get_format_signature())

688

689

for version_id, options, length, parents in data_list:

690

if self.has_version(version_id):

691

# First check: the list of parents.

692

my_parents = self.get_parents_with_ghosts(version_id)

693

if my_parents != parents:

694

# XXX: KnitCorrupt is not quite the right exception here.

695

raise KnitCorrupt(

696

self.filename,

697

'parents list %r from data stream does not match '

698

'already recorded parents %r for %s'

699

% (parents, my_parents, version_id))

700

701

# Also check the SHA-1 of the fulltext this content will

702

# produce.

703

raw_data = reader_callable(length)

704

my_fulltext_sha1 = self.get_sha1(version_id)

705

df, rec = self._data._parse_record_header(version_id, raw_data)

706

stream_fulltext_sha1 = rec[3]

707

if my_fulltext_sha1 != stream_fulltext_sha1:

708

# Actually, we don't know if it's this knit that's corrupt,

709

# or the data stream we're trying to insert.

710

raise KnitCorrupt(

711

self.filename, 'sha-1 does not match %s' % version_id)

712

else:

713

self._add_raw_records(

714

[(version_id, options, parents, length)],

715

reader_callable(length))

716

717

def versions(self):

718

"""See VersionedFile.versions."""

719

if 'evil' in debug.debug_flags:

720

trace.mutter_callsite(2, "versions scales with size of history")

721

return self._index.get_versions()

722

723

def has_version(self, version_id):

724

"""See VersionedFile.has_version."""

725

if 'evil' in debug.debug_flags:

726

trace.mutter_callsite(2, "has_version is a LBYL scenario")

727

version_id = osutils.safe_revision_id(version_id)

728

return self._index.has_version(version_id)

729

730

__contains__ = has_version

731

732

def _merge_annotations(self, content, parents, parent_texts={},

733

delta=None, annotated=None,

734

left_matching_blocks=None):

735

"""Merge annotations for content. This is done by comparing

736

the annotations based on changed to the text.

737

"""

738

if left_matching_blocks is not None:

739

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

740

else:

741

delta_seq = None

742

if annotated:

743

for parent_id in parents:

744

merge_content = self._get_content(parent_id, parent_texts)

745

if (parent_id == parents[0] and delta_seq is not None):

746

seq = delta_seq

747

else:

748

seq = patiencediff.PatienceSequenceMatcher(

749

None, merge_content.text(), content.text())

750

for i, j, n in seq.get_matching_blocks():

751

if n == 0:

752

continue

753

# this appears to copy (origin, text) pairs across to the

754

# new content for any line that matches the last-checked

755

# parent.

756

content._lines[j:j+n] = merge_content._lines[i:i+n]

757

if delta:

758

if delta_seq is None:

759

reference_content = self._get_content(parents[0], parent_texts)

760

new_texts = content.text()

761

old_texts = reference_content.text()

762

delta_seq = patiencediff.PatienceSequenceMatcher(

763

None, old_texts, new_texts)

764

return self._make_line_delta(delta_seq, content)

765

766

def _make_line_delta(self, delta_seq, new_content):

767

"""Generate a line delta from delta_seq and new_content."""

768

diff_hunks = []

769

for op in delta_seq.get_opcodes():

770

if op[0] == 'equal':

771

continue

772

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

773

return diff_hunks

774

775

def _get_components_positions(self, version_ids):

776

"""Produce a map of position data for the components of versions.

777

778

This data is intended to be used for retrieving the knit records.

779

780

A dict of version_id to (method, data_pos, data_size, next) is

781

returned.

782

method is the way referenced data should be applied.

783

data_pos is the position of the data in the knit.

784

data_size is the size of the data in the knit.

785

next is the build-parent of the version, or None for fulltexts.

786

"""

787

component_data = {}

788

for version_id in version_ids:

789

cursor = version_id

790

791

while cursor is not None and cursor not in component_data:

792

method = self._index.get_method(cursor)

793

if method == 'fulltext':

794

next = None

795

else:

796

next = self.get_parents(cursor)[0]

797

index_memo = self._index.get_position(cursor)

798

component_data[cursor] = (method, index_memo, next)

799

cursor = next

800

return component_data

801

802

def _get_content(self, version_id, parent_texts={}):

803

"""Returns a content object that makes up the specified

804

version."""

805

cached_version = parent_texts.get(version_id, None)

806

if cached_version is not None:

807

if not self.has_version(version_id):

808

raise RevisionNotPresent(version_id, self.filename)

809

return cached_version

810

811

text_map, contents_map = self._get_content_maps([version_id])

812

return contents_map[version_id]

813

814

def _check_versions_present(self, version_ids):

815

"""Check that all specified versions are present."""

816

self._index.check_versions_present(version_ids)

817

818

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

819

nostore_sha, random_id, check_content):

820

"""See VersionedFile.add_lines_with_ghosts()."""

821

self._check_add(version_id, lines, random_id, check_content)

822

return self._add(version_id, lines, parents, self.delta,

823

parent_texts, None, nostore_sha)

824

825

def _add_lines(self, version_id, parents, lines, parent_texts,

826

left_matching_blocks, nostore_sha, random_id, check_content):

827

"""See VersionedFile.add_lines."""

828

self._check_add(version_id, lines, random_id, check_content)

829

self._check_versions_present(parents)

830

return self._add(version_id, lines[:], parents, self.delta,

831

parent_texts, left_matching_blocks, nostore_sha)

832

833

def _check_add(self, version_id, lines, random_id, check_content):

834

"""check that version_id and lines are safe to add."""

835

if contains_whitespace(version_id):

836

raise InvalidRevisionId(version_id, self.filename)

837

self.check_not_reserved_id(version_id)

838

# Technically this could be avoided if we are happy to allow duplicate

839

# id insertion when other things than bzr core insert texts, but it

840

# seems useful for folk using the knit api directly to have some safety

841

# blanket that we can disable.

842

if not random_id and self.has_version(version_id):

843

raise RevisionAlreadyPresent(version_id, self.filename)

844

if check_content:

845

self._check_lines_not_unicode(lines)

846

self._check_lines_are_lines(lines)

847

848

def _add(self, version_id, lines, parents, delta, parent_texts,

849

left_matching_blocks, nostore_sha):

850

"""Add a set of lines on top of version specified by parents.

851

852

If delta is true, compress the text as a line-delta against

853

the first parent.

854

855

Any versions not present will be converted into ghosts.

856

"""

857

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

858

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

859

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

860

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

861

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

862

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

863

# +1383 0 8.0370 8.0370 +<len>

864

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

865

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

866

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

867

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

868

869

present_parents = []

870

if parent_texts is None:

871

parent_texts = {}

872

for parent in parents:

873

if self.has_version(parent):

874

present_parents.append(parent)

875

876

# can only compress against the left most present parent.

877

if (delta and

878

(len(present_parents) == 0 or

879

present_parents[0] != parents[0])):

880

delta = False

881

882

digest = sha_strings(lines)

883

if nostore_sha == digest:

884

raise errors.ExistingContent

885

text_length = sum(map(len, lines))

886

options = []

887

if lines:

888

if lines[-1][-1] != '\n':

889

# copy the contents of lines.

890

lines = lines[:]

891

options.append('no-eol')

892

lines[-1] = lines[-1] + '\n'

893

894

if delta:

895

# To speed the extract of texts the delta chain is limited

896

# to a fixed number of deltas. This should minimize both

897

# I/O and the time spend applying deltas.

898

delta = self._check_should_delta(present_parents)

899

900

assert isinstance(version_id, str)

901

content = self.factory.make(lines, version_id)

902

if delta or (self.factory.annotated and len(present_parents) > 0):

903

# Merge annotations from parent texts if needed.

904

delta_hunks = self._merge_annotations(content, present_parents,

905

parent_texts, delta, self.factory.annotated,

906

left_matching_blocks)

907

908

if delta:

909

options.append('line-delta')

910

store_lines = self.factory.lower_line_delta(delta_hunks)

911

else:

912

options.append('fulltext')

913

store_lines = self.factory.lower_fulltext(content)

914

915

access_memo = self._data.add_record(version_id, digest, store_lines)

916

self._index.add_version(version_id, options, access_memo, parents)

917

return digest, text_length, content

918

919

def check(self, progress_bar=None):

920

"""See VersionedFile.check()."""

921

922

def _clone_text(self, new_version_id, old_version_id, parents):

923

"""See VersionedFile.clone_text()."""

924

# FIXME RBC 20060228 make fast by only inserting an index with null

925

# delta.

926

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

927

928

def get_lines(self, version_id):

929

"""See VersionedFile.get_lines()."""

930

return self.get_line_list([version_id])[0]

931

932

def _get_record_map(self, version_ids):

933

"""Produce a dictionary of knit records.

934

935

The keys are version_ids, the values are tuples of (method, content,

936

digest, next).

937

method is the way the content should be applied.

938

content is a KnitContent object.

939

digest is the SHA1 digest of this version id after all steps are done

940

next is the build-parent of the version, i.e. the leftmost ancestor.

941

If the method is fulltext, next will be None.

942

"""

943

position_map = self._get_components_positions(version_ids)

944

# c = component_id, m = method, i_m = index_memo, n = next

945

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

946

record_map = {}

947

for component_id, content, digest in \

948

self._data.read_records_iter(records):

949

method, index_memo, next = position_map[component_id]

950

record_map[component_id] = method, content, digest, next

951

952

return record_map

953

954

def get_text(self, version_id):

955

"""See VersionedFile.get_text"""

956

return self.get_texts([version_id])[0]

957

958

def get_texts(self, version_ids):

959

return [''.join(l) for l in self.get_line_list(version_ids)]

960

961

def get_line_list(self, version_ids):

962

"""Return the texts of listed versions as a list of strings."""

963

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

964

for version_id in version_ids:

965

self.check_not_reserved_id(version_id)

966

text_map, content_map = self._get_content_maps(version_ids)

967

return [text_map[v] for v in version_ids]

968

969

_get_lf_split_line_list = get_line_list

970

971

def _get_content_maps(self, version_ids):

972

"""Produce maps of text and KnitContents

973

974

:return: (text_map, content_map) where text_map contains the texts for

975

the requested versions and content_map contains the KnitContents.

976

Both dicts take version_ids as their keys.

977

"""

978

for version_id in version_ids:

979

if not self.has_version(version_id):

980

raise RevisionNotPresent(version_id, self.filename)

981

record_map = self._get_record_map(version_ids)

982

983

text_map = {}

984

content_map = {}

985

final_content = {}

986

for version_id in version_ids:

987

components = []

988

cursor = version_id

989

while cursor is not None:

990

method, data, digest, next = record_map[cursor]

991

components.append((cursor, method, data, digest))

992

if cursor in content_map:

993

break

994

cursor = next

995

996

content = None

997

for component_id, method, data, digest in reversed(components):

998

if component_id in content_map:

999

content = content_map[component_id]

1000

else:

1001

if method == 'fulltext':

1002

assert content is None

1003

content = self.factory.parse_fulltext(data, version_id)

1004

elif method == 'line-delta':

1005

delta = self.factory.parse_line_delta(data, version_id)

1006

content = content.copy()

1007

content._lines = self._apply_delta(content._lines,

1008

delta)

1009

content_map[component_id] = content

1010

1011

if 'no-eol' in self._index.get_options(version_id):

1012

content = content.copy()

1013

content.strip_last_line_newline()

1014

final_content[version_id] = content

1015

1016

# digest here is the digest from the last applied component.

1017

text = content.text()

1018

if sha_strings(text) != digest:

1019

raise KnitCorrupt(self.filename,

1020

'sha-1 does not match %s' % version_id)

1021

1022

text_map[version_id] = text

1023

return text_map, final_content

1024

1025

@staticmethod

1026

def _apply_delta(lines, delta):

1027

"""Apply delta to lines."""

1028

lines = list(lines)

1029

offset = 0

1030

for start, end, count, delta_lines in delta:

1031

lines[offset+start:offset+end] = delta_lines

1032

offset = offset + (start - end) + count

1033

return lines

1034

1035

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1036

pb=None):

1037

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1038

if version_ids is None:

1039

version_ids = self.versions()

1040

else:

1041

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1042

if pb is None:

1043

pb = progress.DummyProgress()

1044

# we don't care about inclusions, the caller cares.

1045

# but we need to setup a list of records to visit.

1046

# we need version_id, position, length

1047

version_id_records = []

1048

requested_versions = set(version_ids)

1049

# filter for available versions

1050

for version_id in requested_versions:

1051

if not self.has_version(version_id):

1052

raise RevisionNotPresent(version_id, self.filename)

1053

# get a in-component-order queue:

1054

for version_id in self.versions():

1055

if version_id in requested_versions:

1056

index_memo = self._index.get_position(version_id)

1057

version_id_records.append((version_id, index_memo))

1058

1059

total = len(version_id_records)

1060

for version_idx, (version_id, data, sha_value) in \

1061

enumerate(self._data.read_records_iter(version_id_records)):

1062

pb.update('Walking content.', version_idx, total)

1063

method = self._index.get_method(version_id)

1064

1065

assert method in ('fulltext', 'line-delta')

1066

if method == 'fulltext':

1067

line_iterator = self.factory.get_fulltext_content(data)

1068

else:

1069

line_iterator = self.factory.get_linedelta_content(data)

1070

for line in line_iterator:

1071

yield line

1072

1073

pb.update('Walking content.', total, total)

1074

1075

def iter_parents(self, version_ids):

1076

"""Iterate through the parents for many version ids.

1077

1078

:param version_ids: An iterable yielding version_ids.

1079

:return: An iterator that yields (version_id, parents). Requested

1080

version_ids not present in the versioned file are simply skipped.

1081

The order is undefined, allowing for different optimisations in

1082

the underlying implementation.

1083

"""

1084

version_ids = [osutils.safe_revision_id(version_id) for

1085

version_id in version_ids]

1086

return self._index.iter_parents(version_ids)

1087

1088

def num_versions(self):

1089

"""See VersionedFile.num_versions()."""

1090

return self._index.num_versions()

1091

1092

__len__ = num_versions

1093

1094

def annotate_iter(self, version_id):

1095

"""See VersionedFile.annotate_iter."""

1096

version_id = osutils.safe_revision_id(version_id)

1097

return self.factory.annotate_iter(self, version_id)

1098

1099

def get_parents(self, version_id):

1100

"""See VersionedFile.get_parents."""

1101

# perf notes:

1102

# optimism counts!

1103

# 52554 calls in 1264 872 internal down from 3674

1104

version_id = osutils.safe_revision_id(version_id)

1105

try:

1106

return self._index.get_parents(version_id)

1107

except KeyError:

1108

raise RevisionNotPresent(version_id, self.filename)

1109

1110

def get_parents_with_ghosts(self, version_id):

1111

"""See VersionedFile.get_parents."""

1112

version_id = osutils.safe_revision_id(version_id)

1113

try:

1114

return self._index.get_parents_with_ghosts(version_id)

1115

except KeyError:

1116

raise RevisionNotPresent(version_id, self.filename)

1117

1118

def get_ancestry(self, versions, topo_sorted=True):

1119

"""See VersionedFile.get_ancestry."""

1120

if isinstance(versions, basestring):

1121

versions = [versions]

1122

if not versions:

1123

return []

1124

versions = [osutils.safe_revision_id(v) for v in versions]

1125

return self._index.get_ancestry(versions, topo_sorted)

1126

1127

def get_ancestry_with_ghosts(self, versions):

1128

"""See VersionedFile.get_ancestry_with_ghosts."""

1129

if isinstance(versions, basestring):

1130

versions = [versions]

1131

if not versions:

1132

return []

1133

versions = [osutils.safe_revision_id(v) for v in versions]

1134

return self._index.get_ancestry_with_ghosts(versions)

1135

1136

def plan_merge(self, ver_a, ver_b):

1137

"""See VersionedFile.plan_merge."""

1138

ver_a = osutils.safe_revision_id(ver_a)

1139

ver_b = osutils.safe_revision_id(ver_b)

1140

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1141

1142

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1143

annotated_a = self.annotate(ver_a)

1144

annotated_b = self.annotate(ver_b)

1145

return merge._plan_annotate_merge(annotated_a, annotated_b,

1146

ancestors_a, ancestors_b)

1147

1148

1149

class _KnitComponentFile(object):

1150

"""One of the files used to implement a knit database"""

1151

1152

def __init__(self, transport, filename, mode, file_mode=None,

1153

create_parent_dir=False, dir_mode=None):

1154

self._transport = transport

1155

self._filename = filename

1156

self._mode = mode

1157

self._file_mode = file_mode

1158

self._dir_mode = dir_mode

1159

self._create_parent_dir = create_parent_dir

1160

self._need_to_create = False

1161

1162

def _full_path(self):

1163

"""Return the full path to this file."""

1164

return self._transport.base + self._filename

1165

1166

def check_header(self, fp):

1167

line = fp.readline()

1168

if line == '':

1169

# An empty file can actually be treated as though the file doesn't

1170

# exist yet.

1171

raise errors.NoSuchFile(self._full_path())

1172

if line != self.HEADER:

1173

raise KnitHeaderError(badline=line,

1174

filename=self._transport.abspath(self._filename))

1175

1176

def __repr__(self):

1177

return '%s(%s)' % (self.__class__.__name__, self._filename)

1178

1179

1180

class _KnitIndex(_KnitComponentFile):

1181

"""Manages knit index file.

1182

1183

The index is already kept in memory and read on startup, to enable

1184

fast lookups of revision information. The cursor of the index

1185

file is always pointing to the end, making it easy to append

1186

entries.

1187

1188

_cache is a cache for fast mapping from version id to a Index

1189

object.

1190

1191

_history is a cache for fast mapping from indexes to version ids.

1192

1193

The index data format is dictionary compressed when it comes to

1194

parent references; a index entry may only have parents that with a

1195

lover index number. As a result, the index is topological sorted.

1196

1197

Duplicate entries may be written to the index for a single version id

1198

if this is done then the latter one completely replaces the former:

1199

this allows updates to correct version and parent information.

1200

Note that the two entries may share the delta, and that successive

1201

annotations and references MUST point to the first entry.

1202

1203

The index file on disc contains a header, followed by one line per knit

1204

record. The same revision can be present in an index file more than once.

1205

The first occurrence gets assigned a sequence number starting from 0.

1206

1207

The format of a single line is

1208

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1209

REVISION_ID is a utf8-encoded revision id

1210

FLAGS is a comma separated list of flags about the record. Values include

1211

no-eol, line-delta, fulltext.

1212

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1213

that the the compressed data starts at.

1214

LENGTH is the ascii representation of the length of the data file.

1215

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1216

REVISION_ID.

1217

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1218

revision id already in the knit that is a parent of REVISION_ID.

1219

The ' :' marker is the end of record marker.

1220

1221

partial writes:

1222

when a write is interrupted to the index file, it will result in a line

1223

that does not end in ' :'. If the ' :' is not present at the end of a line,

1224

or at the end of the file, then the record that is missing it will be

1225

ignored by the parser.

1226

1227

When writing new records to the index file, the data is preceded by '\n'

1228

to ensure that records always start on new lines even if the last write was

1229

interrupted. As a result its normal for the last line in the index to be

1230

missing a trailing newline. One can be added with no harmful effects.

1231

"""

1232

1233

HEADER = "# bzr knit index 8\n"

1234

1235

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1236

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1237

1238

def _cache_version(self, version_id, options, pos, size, parents):

1239

"""Cache a version record in the history array and index cache.

1240

1241

This is inlined into _load_data for performance. KEEP IN SYNC.

1242

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1243

indexes).

1244

"""

1245

# only want the _history index to reference the 1st index entry

1246

# for version_id

1247

if version_id not in self._cache:

1248

index = len(self._history)

1249

self._history.append(version_id)

1250

else:

1251

index = self._cache[version_id][5]

1252

self._cache[version_id] = (version_id,

1253

options,

1254

pos,

1255

size,

1256

parents,

1257

index)

1258

1259

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1260

create_parent_dir=False, delay_create=False, dir_mode=None):

1261

_KnitComponentFile.__init__(self, transport, filename, mode,

1262

file_mode=file_mode,

1263

create_parent_dir=create_parent_dir,

1264

dir_mode=dir_mode)

1265

self._cache = {}

1266

# position in _history is the 'official' index for a revision

1267

# but the values may have come from a newer entry.

1268

# so - wc -l of a knit index is != the number of unique names

1269

# in the knit.

1270

self._history = []

1271

try:

1272

fp = self._transport.get(self._filename)

1273

try:

1274

# _load_data may raise NoSuchFile if the target knit is

1275

# completely empty.

1276

_load_data(self, fp)

1277

finally:

1278

fp.close()

1279

except NoSuchFile:

1280

if mode != 'w' or not create:

1281

raise

1282

elif delay_create:

1283

self._need_to_create = True

1284

else:

1285

self._transport.put_bytes_non_atomic(

1286

self._filename, self.HEADER, mode=self._file_mode)

1287

1288

def get_graph(self):

1289

"""Return a list of the node:parents lists from this knit index."""

1290

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1291

1292

def get_ancestry(self, versions, topo_sorted=True):

1293

"""See VersionedFile.get_ancestry."""

1294

# get a graph of all the mentioned versions:

1295

graph = {}

1296

pending = set(versions)

1297

cache = self._cache

1298

while pending:

1299

version = pending.pop()

1300

# trim ghosts

1301

try:

1302

parents = [p for p in cache[version][4] if p in cache]

1303

except KeyError:

1304

raise RevisionNotPresent(version, self._filename)

1305

# if not completed and not a ghost

1306

pending.update([p for p in parents if p not in graph])

1307

graph[version] = parents

1308

if not topo_sorted:

1309

return graph.keys()

1310

return topo_sort(graph.items())

1311

1312

def get_ancestry_with_ghosts(self, versions):

1313

"""See VersionedFile.get_ancestry_with_ghosts."""

1314

# get a graph of all the mentioned versions:

1315

self.check_versions_present(versions)

1316

cache = self._cache

1317

graph = {}

1318

pending = set(versions)

1319

while pending:

1320

version = pending.pop()

1321

try:

1322

parents = cache[version][4]

1323

except KeyError:

1324

# ghost, fake it

1325

graph[version] = []

1326

else:

1327

# if not completed

1328

pending.update([p for p in parents if p not in graph])

1329

graph[version] = parents

1330

return topo_sort(graph.items())

1331

1332

def iter_parents(self, version_ids):

1333

"""Iterate through the parents for many version ids.

1334

1335

:param version_ids: An iterable yielding version_ids.

1336

:return: An iterator that yields (version_id, parents). Requested

1337

version_ids not present in the versioned file are simply skipped.

1338

The order is undefined, allowing for different optimisations in

1339

the underlying implementation.

1340

"""

1341

for version_id in version_ids:

1342

try:

1343

yield version_id, tuple(self.get_parents(version_id))

1344

except KeyError:

1345

pass

1346

1347

def num_versions(self):

1348

return len(self._history)

1349

1350

__len__ = num_versions

1351

1352

def get_versions(self):

1353

"""Get all the versions in the file. not topologically sorted."""

1354

return self._history

1355

1356

def _version_list_to_index(self, versions):

1357

result_list = []

1358

cache = self._cache

1359

for version in versions:

1360

if version in cache:

1361

# -- inlined lookup() --

1362

result_list.append(str(cache[version][5]))

1363

# -- end lookup () --

1364

else:

1365

result_list.append('.' + version)

1366

return ' '.join(result_list)

1367

1368

def add_version(self, version_id, options, index_memo, parents):

1369

"""Add a version record to the index."""

1370

self.add_versions(((version_id, options, index_memo, parents),))

1371

1372

def add_versions(self, versions):

1373

"""Add multiple versions to the index.

1374

1375

:param versions: a list of tuples:

1376

(version_id, options, pos, size, parents).

1377

"""

1378

lines = []

1379

orig_history = self._history[:]

1380

orig_cache = self._cache.copy()

1381

1382

try:

1383

for version_id, options, (index, pos, size), parents in versions:

1384

line = "\n%s %s %s %s %s :" % (version_id,

1385

','.join(options),

1386

pos,

1387

size,

1388

self._version_list_to_index(parents))

1389

assert isinstance(line, str), \

1390

'content must be utf-8 encoded: %r' % (line,)

1391

lines.append(line)

1392

self._cache_version(version_id, options, pos, size, parents)

1393

if not self._need_to_create:

1394

self._transport.append_bytes(self._filename, ''.join(lines))

1395

else:

1396

sio = StringIO()

1397

sio.write(self.HEADER)

1398

sio.writelines(lines)

1399

sio.seek(0)

1400

self._transport.put_file_non_atomic(self._filename, sio,

1401

create_parent_dir=self._create_parent_dir,

1402

mode=self._file_mode,

1403

dir_mode=self._dir_mode)

1404

self._need_to_create = False

1405

except:

1406

# If any problems happen, restore the original values and re-raise

1407

self._history = orig_history

1408

self._cache = orig_cache

1409

raise

1410

1411

def has_version(self, version_id):

1412

"""True if the version is in the index."""

1413

return version_id in self._cache

1414

1415

def get_position(self, version_id):

1416

"""Return details needed to access the version.

1417

1418

.kndx indices do not support split-out data, so return None for the

1419

index field.

1420

1421

:return: a tuple (None, data position, size) to hand to the access

1422

logic to get the record.

1423

"""

1424

entry = self._cache[version_id]

1425

return None, entry[2], entry[3]

1426

1427

def get_method(self, version_id):

1428

"""Return compression method of specified version."""

1429

options = self._cache[version_id][1]

1430

if 'fulltext' in options:

1431

return 'fulltext'

1432

else:

1433

if 'line-delta' not in options:

1434

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1435

return 'line-delta'

1436

1437

def get_options(self, version_id):

1438

"""Return a string represention options.

1439

1440

e.g. foo,bar

1441

"""

1442

return self._cache[version_id][1]

1443

1444

def get_parents(self, version_id):

1445

"""Return parents of specified version ignoring ghosts."""

1446

return [parent for parent in self._cache[version_id][4]

1447

if parent in self._cache]

1448

1449

def get_parents_with_ghosts(self, version_id):

1450

"""Return parents of specified version with ghosts."""

1451

return self._cache[version_id][4]

1452

1453

def check_versions_present(self, version_ids):

1454

"""Check that all specified versions are present."""

1455

cache = self._cache

1456

for version_id in version_ids:

1457

if version_id not in cache:

1458

raise RevisionNotPresent(version_id, self._filename)

1459

1460

1461

class KnitGraphIndex(object):

1462

"""A knit index that builds on GraphIndex."""

1463

1464

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1465

"""Construct a KnitGraphIndex on a graph_index.

1466

1467

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1468

:param deltas: Allow delta-compressed records.

1469

:param add_callback: If not None, allow additions to the index and call

1470

this callback with a list of added GraphIndex nodes:

1471

[(node, value, node_refs), ...]

1472

:param parents: If True, record knits parents, if not do not record

1473

parents.

1474

"""

1475

self._graph_index = graph_index

1476

self._deltas = deltas

1477

self._add_callback = add_callback

1478

self._parents = parents

1479

if deltas and not parents:

1480

raise KnitCorrupt(self, "Cannot do delta compression without "

1481

"parent tracking.")

1482

1483

def _get_entries(self, keys, check_present=False):

1484

"""Get the entries for keys.

1485

1486

:param keys: An iterable of index keys, - 1-tuples.

1487

"""

1488

keys = set(keys)

1489

found_keys = set()

1490

if self._parents:

1491

for node in self._graph_index.iter_entries(keys):

1492

yield node

1493

found_keys.add(node[1])

1494

else:

1495

# adapt parentless index to the rest of the code.

1496

for node in self._graph_index.iter_entries(keys):

1497

yield node[0], node[1], node[2], ()

1498

found_keys.add(node[1])

1499

if check_present:

1500

missing_keys = keys.difference(found_keys)

1501

if missing_keys:

1502

raise RevisionNotPresent(missing_keys.pop(), self)

1503

1504

def _present_keys(self, version_ids):

1505

return set([

1506

node[1] for node in self._get_entries(version_ids)])

1507

1508

def _parentless_ancestry(self, versions):

1509

"""Honour the get_ancestry API for parentless knit indices."""

1510

wanted_keys = self._version_ids_to_keys(versions)

1511

present_keys = self._present_keys(wanted_keys)

1512

missing = set(wanted_keys).difference(present_keys)

1513

if missing:

1514

raise RevisionNotPresent(missing.pop(), self)

1515

return list(self._keys_to_version_ids(present_keys))

1516

1517

def get_ancestry(self, versions, topo_sorted=True):

1518

"""See VersionedFile.get_ancestry."""

1519

if not self._parents:

1520

return self._parentless_ancestry(versions)

1521

# XXX: This will do len(history) index calls - perhaps

1522

# it should be altered to be a index core feature?

1523

# get a graph of all the mentioned versions:

1524

graph = {}

1525

ghosts = set()

1526

versions = self._version_ids_to_keys(versions)

1527

pending = set(versions)

1528

while pending:

1529

# get all pending nodes

1530

this_iteration = pending

1531

new_nodes = self._get_entries(this_iteration)

1532

found = set()

1533

pending = set()

1534

for (index, key, value, node_refs) in new_nodes:

1535

# dont ask for ghosties - otherwise

1536

# we we can end up looping with pending

1537

# being entirely ghosted.

1538

graph[key] = [parent for parent in node_refs[0]

1539

if parent not in ghosts]

1540

# queue parents

1541

for parent in graph[key]:

1542

# dont examine known nodes again

1543

if parent in graph:

1544

continue

1545

pending.add(parent)

1546

found.add(key)

1547

ghosts.update(this_iteration.difference(found))

1548

if versions.difference(graph):

1549

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1550

if topo_sorted:

1551

result_keys = topo_sort(graph.items())

1552

else:

1553

result_keys = graph.iterkeys()

1554

return [key[0] for key in result_keys]

1555

1556

def get_ancestry_with_ghosts(self, versions):

1557

"""See VersionedFile.get_ancestry."""

1558

if not self._parents:

1559

return self._parentless_ancestry(versions)

1560

# XXX: This will do len(history) index calls - perhaps

1561

# it should be altered to be a index core feature?

1562

# get a graph of all the mentioned versions:

1563

graph = {}

1564

versions = self._version_ids_to_keys(versions)

1565

pending = set(versions)

1566

while pending:

1567

# get all pending nodes

1568

this_iteration = pending

1569

new_nodes = self._get_entries(this_iteration)

1570

pending = set()

1571

for (index, key, value, node_refs) in new_nodes:

1572

graph[key] = node_refs[0]

1573

# queue parents

1574

for parent in graph[key]:

1575

# dont examine known nodes again

1576

if parent in graph:

1577

continue

1578

pending.add(parent)

1579

missing_versions = this_iteration.difference(graph)

1580

missing_needed = versions.intersection(missing_versions)

1581

if missing_needed:

1582

raise RevisionNotPresent(missing_needed.pop(), self)

1583

for missing_version in missing_versions:

1584

# add a key, no parents

1585

graph[missing_version] = []

1586

pending.discard(missing_version) # don't look for it

1587

result_keys = topo_sort(graph.items())

1588

return [key[0] for key in result_keys]

1589

1590

def get_graph(self):

1591

"""Return a list of the node:parents lists from this knit index."""

1592

if not self._parents:

1593

return [(key, ()) for key in self.get_versions()]

1594

result = []

1595

for index, key, value, refs in self._graph_index.iter_all_entries():

1596

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1597

return result

1598

1599

def iter_parents(self, version_ids):

1600

"""Iterate through the parents for many version ids.

1601

1602

:param version_ids: An iterable yielding version_ids.

1603

:return: An iterator that yields (version_id, parents). Requested

1604

version_ids not present in the versioned file are simply skipped.

1605

The order is undefined, allowing for different optimisations in

1606

the underlying implementation.

1607

"""

1608

if self._parents:

1609

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1610

all_parents = set()

1611

present_parents = set()

1612

for node in all_nodes:

1613

all_parents.update(node[3][0])

1614

# any node we are querying must be present

1615

present_parents.add(node[1])

1616

unknown_parents = all_parents.difference(present_parents)

1617

present_parents.update(self._present_keys(unknown_parents))

1618

for node in all_nodes:

1619

parents = []

1620

for parent in node[3][0]:

1621

if parent in present_parents:

1622

parents.append(parent[0])

1623

yield node[1][0], tuple(parents)

1624

else:

1625

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1626

yield node[1][0], ()

1627

1628

def num_versions(self):

1629

return len(list(self._graph_index.iter_all_entries()))

1630

1631

__len__ = num_versions

1632

1633

def get_versions(self):

1634

"""Get all the versions in the file. not topologically sorted."""

1635

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1636

1637

def has_version(self, version_id):

1638

"""True if the version is in the index."""

1639

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1640

1641

def _keys_to_version_ids(self, keys):

1642

return tuple(key[0] for key in keys)

1643

1644

def get_position(self, version_id):

1645

"""Return details needed to access the version.

1646

1647

:return: a tuple (index, data position, size) to hand to the access

1648

logic to get the record.

1649

"""

1650

node = self._get_node(version_id)

1651

bits = node[2][1:].split(' ')

1652

return node[0], int(bits[0]), int(bits[1])

1653

1654

def get_method(self, version_id):

1655

"""Return compression method of specified version."""

1656

if not self._deltas:

1657

return 'fulltext'

1658

return self._parent_compression(self._get_node(version_id)[3][1])

1659

1660

def _parent_compression(self, reference_list):

1661

# use the second reference list to decide if this is delta'd or not.

1662

if len(reference_list):

1663

return 'line-delta'

1664

else:

1665

return 'fulltext'

1666

1667

def _get_node(self, version_id):

1668

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1669

1670

def get_options(self, version_id):

1671

"""Return a string represention options.

1672

1673

e.g. foo,bar

1674

"""

1675

node = self._get_node(version_id)

1676

if not self._deltas:

1677

options = ['fulltext']

1678

else:

1679

options = [self._parent_compression(node[3][1])]

1680

if node[2][0] == 'N':

1681

options.append('no-eol')

1682

return options

1683

1684

def get_parents(self, version_id):

1685

"""Return parents of specified version ignoring ghosts."""

1686

parents = list(self.iter_parents([version_id]))

1687

if not parents:

1688

# missing key

1689

raise errors.RevisionNotPresent(version_id, self)

1690

return parents[0][1]

1691

1692

def get_parents_with_ghosts(self, version_id):

1693

"""Return parents of specified version with ghosts."""

1694

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1695

check_present=True))

1696

if not self._parents:

1697

return ()

1698

return self._keys_to_version_ids(nodes[0][3][0])

1699

1700

def check_versions_present(self, version_ids):

1701

"""Check that all specified versions are present."""

1702

keys = self._version_ids_to_keys(version_ids)

1703

present = self._present_keys(keys)

1704

missing = keys.difference(present)

1705

if missing:

1706

raise RevisionNotPresent(missing.pop(), self)

1707

1708

def add_version(self, version_id, options, access_memo, parents):

1709

"""Add a version record to the index."""

1710

return self.add_versions(((version_id, options, access_memo, parents),))

1711

1712

def add_versions(self, versions):

1713

"""Add multiple versions to the index.

1714

1715

This function does not insert data into the Immutable GraphIndex

1716

backing the KnitGraphIndex, instead it prepares data for insertion by

1717

the caller and checks that it is safe to insert then calls

1718

self._add_callback with the prepared GraphIndex nodes.

1719

1720

:param versions: a list of tuples:

1721

(version_id, options, pos, size, parents).

1722

"""

1723

if not self._add_callback:

1724

raise errors.ReadOnlyError(self)

1725

# we hope there are no repositories with inconsistent parentage

1726

# anymore.

1727

# check for dups

1728

1729

keys = {}

1730

for (version_id, options, access_memo, parents) in versions:

1731

index, pos, size = access_memo

1732

key = (version_id, )

1733

parents = tuple((parent, ) for parent in parents)

1734

if 'no-eol' in options:

1735

value = 'N'

1736

else:

1737

value = ' '

1738

value += "%d %d" % (pos, size)

1739

if not self._deltas:

1740

if 'line-delta' in options:

1741

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1742

if self._parents:

1743

if self._deltas:

1744

if 'line-delta' in options:

1745

node_refs = (parents, (parents[0],))

1746

else:

1747

node_refs = (parents, ())

1748

else:

1749

node_refs = (parents, )

1750

else:

1751

if parents:

1752

raise KnitCorrupt(self, "attempt to add node with parents "

1753

"in parentless index.")

1754

node_refs = ()

1755

keys[key] = (value, node_refs)

1756

present_nodes = self._get_entries(keys)

1757

for (index, key, value, node_refs) in present_nodes:

1758

if (value, node_refs) != keys[key]:

1759

raise KnitCorrupt(self, "inconsistent details in add_versions"

1760

": %s %s" % ((value, node_refs), keys[key]))

1761

del keys[key]

1762

result = []

1763

if self._parents:

1764

for key, (value, node_refs) in keys.iteritems():

1765

result.append((key, value, node_refs))

1766

else:

1767

for key, (value, node_refs) in keys.iteritems():

1768

result.append((key, value))

1769

self._add_callback(result)

1770

1771

def _version_ids_to_keys(self, version_ids):

1772

return set((version_id, ) for version_id in version_ids)

1773

1774

1775

class _KnitAccess(object):

1776

"""Access to knit records in a .knit file."""

1777

1778

def __init__(self, transport, filename, _file_mode, _dir_mode,

1779

_need_to_create, _create_parent_dir):

1780

"""Create a _KnitAccess for accessing and inserting data.

1781

1782

:param transport: The transport the .knit is located on.

1783

:param filename: The filename of the .knit.

1784

"""

1785

self._transport = transport

1786

self._filename = filename

1787

self._file_mode = _file_mode

1788

self._dir_mode = _dir_mode

1789

self._need_to_create = _need_to_create

1790

self._create_parent_dir = _create_parent_dir

1791

1792

def add_raw_records(self, sizes, raw_data):

1793

"""Add raw knit bytes to a storage area.

1794

1795

The data is spooled to whereever the access method is storing data.

1796

1797

:param sizes: An iterable containing the size of each raw data segment.

1798

:param raw_data: A bytestring containing the data.

1799

:return: A list of memos to retrieve the record later. Each memo is a

1800

tuple - (index, pos, length), where the index field is always None

1801

for the .knit access method.

1802

"""

1803

assert type(raw_data) == str, \

1804

'data must be plain bytes was %s' % type(raw_data)

1805

if not self._need_to_create:

1806

base = self._transport.append_bytes(self._filename, raw_data)

1807

else:

1808

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1809

create_parent_dir=self._create_parent_dir,

1810

mode=self._file_mode,

1811

dir_mode=self._dir_mode)

1812

self._need_to_create = False

1813

base = 0

1814

result = []

1815

for size in sizes:

1816

result.append((None, base, size))

1817

base += size

1818

return result

1819

1820

def create(self):

1821

"""IFF this data access has its own storage area, initialise it.

1822

1823

:return: None.

1824

"""

1825

self._transport.put_bytes_non_atomic(self._filename, '',

1826

mode=self._file_mode)

1827

1828

def open_file(self):

1829

"""IFF this data access can be represented as a single file, open it.

1830

1831

For knits that are not mapped to a single file on disk this will

1832

always return None.

1833

1834

:return: None or a file handle.

1835

"""

1836

try:

1837

return self._transport.get(self._filename)

1838

except NoSuchFile:

1839

pass

1840

return None

1841

1842

def get_raw_records(self, memos_for_retrieval):

1843

"""Get the raw bytes for a records.

1844

1845

:param memos_for_retrieval: An iterable containing the (index, pos,

1846

length) memo for retrieving the bytes. The .knit method ignores

1847

the index as there is always only a single file.

1848

:return: An iterator over the bytes of the records.

1849

"""

1850

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1851

for pos, data in self._transport.readv(self._filename, read_vector):

1852

yield data

1853

1854

1855

class _PackAccess(object):

1856

"""Access to knit records via a collection of packs."""

1857

1858

def __init__(self, index_to_packs, writer=None):

1859

"""Create a _PackAccess object.

1860

1861

:param index_to_packs: A dict mapping index objects to the transport

1862

and file names for obtaining data.

1863

:param writer: A tuple (pack.ContainerWriter, write_index) which

1864

contains the pack to write, and the index that reads from it will

1865

be associated with.

1866

"""

1867

if writer:

1868

self.container_writer = writer[0]

1869

self.write_index = writer[1]

1870

else:

1871

self.container_writer = None

1872

self.write_index = None

1873

self.indices = index_to_packs

1874

1875

def add_raw_records(self, sizes, raw_data):

1876

"""Add raw knit bytes to a storage area.

1877

1878

The data is spooled to the container writer in one bytes-record per

1879

raw data item.

1880

1881

:param sizes: An iterable containing the size of each raw data segment.

1882

:param raw_data: A bytestring containing the data.

1883

:return: A list of memos to retrieve the record later. Each memo is a

1884

tuple - (index, pos, length), where the index field is the

1885

write_index object supplied to the PackAccess object.

1886

"""

1887

assert type(raw_data) == str, \

1888

'data must be plain bytes was %s' % type(raw_data)

1889

result = []

1890

offset = 0

1891

for size in sizes:

1892

p_offset, p_length = self.container_writer.add_bytes_record(

1893

raw_data[offset:offset+size], [])

1894

offset += size

1895

result.append((self.write_index, p_offset, p_length))

1896

return result

1897

1898

def create(self):

1899

"""Pack based knits do not get individually created."""

1900

1901

def get_raw_records(self, memos_for_retrieval):

1902

"""Get the raw bytes for a records.

1903

1904

:param memos_for_retrieval: An iterable containing the (index, pos,

1905

length) memo for retrieving the bytes. The Pack access method

1906

looks up the pack to use for a given record in its index_to_pack

1907

map.

1908

:return: An iterator over the bytes of the records.

1909

"""

1910

# first pass, group into same-index requests

1911

request_lists = []

1912

current_index = None

1913

for (index, offset, length) in memos_for_retrieval:

1914

if current_index == index:

1915

current_list.append((offset, length))

1916

else:

1917

if current_index is not None:

1918

request_lists.append((current_index, current_list))

1919

current_index = index

1920

current_list = [(offset, length)]

1921

# handle the last entry

1922

if current_index is not None:

1923

request_lists.append((current_index, current_list))

1924

for index, offsets in request_lists:

1925

transport, path = self.indices[index]

1926

reader = pack.make_readv_reader(transport, path, offsets)

1927

for names, read_func in reader.iter_records():

1928

yield read_func(None)

1929

1930

def open_file(self):

1931

"""Pack based knits have no single file."""

1932

return None

1933

1934

def set_writer(self, writer, index, (transport, packname)):

1935

"""Set a writer to use for adding data."""

1936

self.indices[index] = (transport, packname)

1937

self.container_writer = writer

1938

self.write_index = index

1939

1940

1941

class _KnitData(object):

1942

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1943

1944

The KnitData class provides the logic for parsing and using knit records,

1945

making use of an access method for the low level read and write operations.

1946

"""

1947

1948

def __init__(self, access):

1949

"""Create a KnitData object.

1950

1951

:param access: The access method to use. Access methods such as

1952

_KnitAccess manage the insertion of raw records and the subsequent

1953

retrieval of the same.

1954

"""

1955

self._access = access

1956

self._checked = False

1957

# TODO: jam 20060713 conceptually, this could spill to disk

1958

# if the cached size gets larger than a certain amount

1959

# but it complicates the model a bit, so for now just use

1960

# a simple dictionary

1961

self._cache = {}

1962

self._do_cache = False

1963

1964

def enable_cache(self):

1965

"""Enable caching of reads."""

1966

self._do_cache = True

1967

1968

def clear_cache(self):

1969

"""Clear the record cache."""

1970

self._do_cache = False

1971

self._cache = {}

1972

1973

def _open_file(self):

1974

return self._access.open_file()

1975

1976

def _record_to_data(self, version_id, digest, lines):

1977

"""Convert version_id, digest, lines into a raw data block.

1978

1979

:return: (len, a StringIO instance with the raw data ready to read.)

1980

"""

1981

sio = StringIO()

1982

data_file = GzipFile(None, mode='wb', fileobj=sio,

1983

compresslevel=Z_DEFAULT_COMPRESSION)

1984

1985

assert isinstance(version_id, str)

1986

data_file.writelines(chain(

1987

["version %s %d %s\n" % (version_id,

1988

len(lines),

1989

digest)],

1990

lines,

1991

["end %s\n" % version_id]))

1992

data_file.close()

1993

length= sio.tell()

1994

1995

sio.seek(0)

1996

return length, sio

1997

1998

def add_raw_records(self, sizes, raw_data):

1999

"""Append a prepared record to the data file.

2000

2001

:param sizes: An iterable containing the size of each raw data segment.

2002

:param raw_data: A bytestring containing the data.

2003

:return: a list of index data for the way the data was stored.

2004

See the access method add_raw_records documentation for more

2005

details.

2006

"""

2007

return self._access.add_raw_records(sizes, raw_data)

2008

2009

def add_record(self, version_id, digest, lines):

2010

"""Write new text record to disk.

2011

2012

Returns index data for retrieving it later, as per add_raw_records.

2013

"""

2014

size, sio = self._record_to_data(version_id, digest, lines)

2015

result = self.add_raw_records([size], sio.getvalue())

2016

if self._do_cache:

2017

self._cache[version_id] = sio.getvalue()

2018

return result[0]

2019

2020

def _parse_record_header(self, version_id, raw_data):

2021

"""Parse a record header for consistency.

2022

2023

:return: the header and the decompressor stream.

2024

as (stream, header_record)

2025

"""

2026

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2027

try:

2028

rec = self._check_header(version_id, df.readline())

2029

except Exception, e:

2030

raise KnitCorrupt(self._access,

2031

"While reading {%s} got %s(%s)"

2032

% (version_id, e.__class__.__name__, str(e)))

2033

return df, rec

2034

2035

def _check_header(self, version_id, line):

2036

rec = line.split()

2037

if len(rec) != 4:

2038

raise KnitCorrupt(self._access,

2039

'unexpected number of elements in record header')

2040

if rec[1] != version_id:

2041

raise KnitCorrupt(self._access,

2042

'unexpected version, wanted %r, got %r'

2043

% (version_id, rec[1]))

2044

return rec

2045

2046

def _parse_record(self, version_id, data):

2047

# profiling notes:

2048

# 4168 calls in 2880 217 internal

2049

# 4168 calls to _parse_record_header in 2121

2050

# 4168 calls to readlines in 330

2051

df = GzipFile(mode='rb', fileobj=StringIO(data))

2052

2053

try:

2054

record_contents = df.readlines()

2055

except Exception, e:

2056

raise KnitCorrupt(self._access,

2057

"While reading {%s} got %s(%s)"

2058

% (version_id, e.__class__.__name__, str(e)))

2059

header = record_contents.pop(0)

2060

rec = self._check_header(version_id, header)

2061

2062

last_line = record_contents.pop()

2063

if len(record_contents) != int(rec[2]):

2064

raise KnitCorrupt(self._access,

2065

'incorrect number of lines %s != %s'

2066

' for version {%s}'

2067

% (len(record_contents), int(rec[2]),

2068

version_id))

2069

if last_line != 'end %s\n' % rec[1]:

2070

raise KnitCorrupt(self._access,

2071

'unexpected version end line %r, wanted %r'

2072

% (last_line, version_id))

2073

df.close()

2074

return record_contents, rec[3]

2075

2076

def read_records_iter_raw(self, records):

2077

"""Read text records from data file and yield raw data.

2078

2079

This unpacks enough of the text record to validate the id is

2080

as expected but thats all.

2081

"""

2082

# setup an iterator of the external records:

2083

# uses readv so nice and fast we hope.

2084

if len(records):

2085

# grab the disk data needed.

2086

if self._cache:

2087

# Don't check _cache if it is empty

2088

needed_offsets = [index_memo for version_id, index_memo

2089

in records

2090

if version_id not in self._cache]

2091

else:

2092

needed_offsets = [index_memo for version_id, index_memo

2093

in records]

2094

2095

raw_records = self._access.get_raw_records(needed_offsets)

2096

2097

for version_id, index_memo in records:

2098

if version_id in self._cache:

2099

# This data has already been validated

2100

data = self._cache[version_id]

2101

else:

2102

data = raw_records.next()

2103

if self._do_cache:

2104

self._cache[version_id] = data

2105

2106

# validate the header

2107

df, rec = self._parse_record_header(version_id, data)

2108

df.close()

2109

yield version_id, data

2110

2111

def read_records_iter(self, records):

2112

"""Read text records from data file and yield result.

2113

2114

The result will be returned in whatever is the fastest to read.

2115

Not by the order requested. Also, multiple requests for the same

2116

record will only yield 1 response.

2117

:param records: A list of (version_id, pos, len) entries

2118

:return: Yields (version_id, contents, digest) in the order

2119

read, not the order requested

2120

"""

2121

if not records:

2122

return

2123

2124

if self._cache:

2125

# Skip records we have alread seen

2126

yielded_records = set()

2127

needed_records = set()

2128

for record in records:

2129

if record[0] in self._cache:

2130

if record[0] in yielded_records:

2131

continue

2132

yielded_records.add(record[0])

2133

data = self._cache[record[0]]

2134

content, digest = self._parse_record(record[0], data)

2135

yield (record[0], content, digest)

2136

else:

2137

needed_records.add(record)

2138

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2139

else:

2140

needed_records = sorted(set(records), key=operator.itemgetter(1))

2141

2142

if not needed_records:

2143

return

2144

2145

# The transport optimizes the fetching as well

2146

# (ie, reads continuous ranges.)

2147

raw_data = self._access.get_raw_records(

2148

[index_memo for version_id, index_memo in needed_records])

2149

2150

for (version_id, index_memo), data in \

2151

izip(iter(needed_records), raw_data):

2152

content, digest = self._parse_record(version_id, data)

2153

if self._do_cache:

2154

self._cache[version_id] = data

2155

yield version_id, content, digest

2156

2157

def read_records(self, records):

2158

"""Read records into a dictionary."""

2159

components = {}

2160

for record_id, content, digest in \

2161

self.read_records_iter(records):

2162

components[record_id] = (content, digest)

2163

return components

2164

2165

2166

class InterKnit(InterVersionedFile):

2167

"""Optimised code paths for knit to knit operations."""

2168

2169

_matching_file_from_factory = KnitVersionedFile

2170

_matching_file_to_factory = KnitVersionedFile

2171

2172

@staticmethod

2173

def is_compatible(source, target):

2174

"""Be compatible with knits. """

2175

try:

2176

return (isinstance(source, KnitVersionedFile) and

2177

isinstance(target, KnitVersionedFile))

2178

except AttributeError:

2179

return False

2180

2181

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2182

"""See InterVersionedFile.join."""

2183

assert isinstance(self.source, KnitVersionedFile)

2184

assert isinstance(self.target, KnitVersionedFile)

2185

2186

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2187

2188

if not version_ids:

2189

return 0

2190

2191

pb = ui.ui_factory.nested_progress_bar()

2192

try:

2193

version_ids = list(version_ids)

2194

if None in version_ids:

2195

version_ids.remove(None)

2196

2197

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2198

this_versions = set(self.target._index.get_versions())

2199

# XXX: For efficiency we should not look at the whole index,

2200

# we only need to consider the referenced revisions - they

2201

# must all be present, or the method must be full-text.

2202

# TODO, RBC 20070919

2203

needed_versions = self.source_ancestry - this_versions

2204

2205

if not needed_versions:

2206

return 0

2207

full_list = topo_sort(self.source.get_graph())

2208

2209

version_list = [i for i in full_list if (not self.target.has_version(i)

2210

and i in needed_versions)]

2211

2212

# plan the join:

2213

copy_queue = []

2214

copy_queue_records = []

2215

copy_set = set()

2216

for version_id in version_list:

2217

options = self.source._index.get_options(version_id)

2218

parents = self.source._index.get_parents_with_ghosts(version_id)

2219

# check that its will be a consistent copy:

2220

for parent in parents:

2221

# if source has the parent, we must :

2222

# * already have it or

2223

# * have it scheduled already

2224

# otherwise we don't care

2225

assert (self.target.has_version(parent) or

2226

parent in copy_set or

2227

not self.source.has_version(parent))

2228

index_memo = self.source._index.get_position(version_id)

2229

copy_queue_records.append((version_id, index_memo))

2230

copy_queue.append((version_id, options, parents))

2231

copy_set.add(version_id)

2232

2233

# data suck the join:

2234

count = 0

2235

total = len(version_list)

2236

raw_datum = []

2237

raw_records = []

2238

for (version_id, raw_data), \

2239

(version_id2, options, parents) in \

2240

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2241

copy_queue):

2242

assert version_id == version_id2, 'logic error, inconsistent results'

2243

count = count + 1

2244

pb.update("Joining knit", count, total)

2245

raw_records.append((version_id, options, parents, len(raw_data)))

2246

raw_datum.append(raw_data)

2247

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2248

return count

2249

finally:

2250

pb.finished()

2251

2252

2253

InterVersionedFile.register_optimiser(InterKnit)

2254

2255

2256

class WeaveToKnit(InterVersionedFile):

2257

"""Optimised code paths for weave to knit operations."""

2258

2259

_matching_file_from_factory = bzrlib.weave.WeaveFile

2260

_matching_file_to_factory = KnitVersionedFile

2261

2262

@staticmethod

2263

def is_compatible(source, target):

2264

"""Be compatible with weaves to knits."""

2265

try:

2266

return (isinstance(source, bzrlib.weave.Weave) and

2267

isinstance(target, KnitVersionedFile))

2268

except AttributeError:

2269

return False

2270

2271

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2272

"""See InterVersionedFile.join."""

2273

assert isinstance(self.source, bzrlib.weave.Weave)

2274

assert isinstance(self.target, KnitVersionedFile)

2275

2276

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2277

2278

if not version_ids:

2279

return 0

2280

2281

pb = ui.ui_factory.nested_progress_bar()

2282

try:

2283

version_ids = list(version_ids)

2284

2285

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2286

this_versions = set(self.target._index.get_versions())

2287

needed_versions = self.source_ancestry - this_versions

2288

2289

if not needed_versions:

2290

return 0

2291

full_list = topo_sort(self.source.get_graph())

2292

2293

version_list = [i for i in full_list if (not self.target.has_version(i)

2294

and i in needed_versions)]

2295

2296

# do the join:

2297

count = 0

2298

total = len(version_list)

2299

for version_id in version_list:

2300

pb.update("Converting to knit", count, total)

2301

parents = self.source.get_parents(version_id)

2302

# check that its will be a consistent copy:

2303

for parent in parents:

2304

# if source has the parent, we must already have it

2305

assert (self.target.has_version(parent))

2306

self.target.add_lines(

2307

version_id, parents, self.source.get_lines(version_id))

2308

count = count + 1

2309

return count

2310

finally:

2311

pb.finished()

2312

2313

2314

InterVersionedFile.register_optimiser(WeaveToKnit)

2315

2316

2317

# Deprecated, use PatienceSequenceMatcher instead

2318

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2319

2320

2321

def annotate_knit(knit, revision_id):

2322

"""Annotate a knit with no cached annotations.

2323

2324

This implementation is for knits with no cached annotations.

2325

It will work for knits with cached annotations, but this is not

2326

recommended.

2327

"""

2328

ancestry = knit.get_ancestry(revision_id)

2329

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2330

annotations = {}

2331

for candidate in ancestry:

2332

if candidate in annotations:

2333

continue

2334

parents = knit.get_parents(candidate)

2335

if len(parents) == 0:

2336

blocks = None

2337

elif knit._index.get_method(candidate) != 'line-delta':

2338

blocks = None

2339

else:

2340

parent, sha1, noeol, delta = knit.get_delta(candidate)

2341

blocks = KnitContent.get_line_delta_blocks(delta,

2342

fulltext[parents[0]], fulltext[candidate])

2343

annotations[candidate] = list(annotate.reannotate([annotations[p]

2344

for p in parents], fulltext[candidate], candidate, blocks))

2345

return iter(annotations[revision_id])

2346

2347

2348

try:

2349

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2350

except ImportError:

2351

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »