/brz/remove-bazaar : revision 2866.1.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Alexander Belchenko
Date: 2007-09-25 22:13:08 UTC
mfrom: (2831.8.1 bzr.dev)
mto: This revision was merged to the branch mainline in revision 2867.
Revision ID: bialix@ukr.net-20070925221308-aqaqq1u2qv6kpl2z

merge with bzr.dev

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

104

from bzrlib.osutils import (

105

contains_whitespace,

106

contains_linebreaks,

107

sha_string,

108

sha_strings,

109

)

110

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

111

from bzrlib.tsort import topo_sort

112

import bzrlib.ui

113

import bzrlib.weave

114

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

115

116

117

# TODO: Split out code specific to this format into an associated object.

118

119

# TODO: Can we put in some kind of value to check that the index and data

120

# files belong together?

121

122

# TODO: accommodate binaries, perhaps by storing a byte count

123

124

# TODO: function to check whole file

125

126

# TODO: atomically append data, then measure backwards from the cursor

127

# position after writing to work out where it was located. we may need to

128

# bypass python file buffering.

129

130

DATA_SUFFIX = '.knit'

131

INDEX_SUFFIX = '.kndx'

132

133

134

class KnitContent(object):

135

"""Content of a knit version to which deltas can be applied."""

136

137

def annotate(self):

138

"""Return a list of (origin, text) tuples."""

139

return list(self.annotate_iter())

140

141

def line_delta_iter(self, new_lines):

142

"""Generate line-based delta from this content to new_lines."""

143

new_texts = new_lines.text()

144

old_texts = self.text()

145

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

146

for tag, i1, i2, j1, j2 in s.get_opcodes():

147

if tag == 'equal':

148

continue

149

# ofrom, oto, length, data

150

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

151

152

def line_delta(self, new_lines):

153

return list(self.line_delta_iter(new_lines))

154

155

@staticmethod

156

def get_line_delta_blocks(knit_delta, source, target):

157

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

158

target_len = len(target)

159

s_pos = 0

160

t_pos = 0

161

for s_begin, s_end, t_len, new_text in knit_delta:

162

true_n = s_begin - s_pos

163

n = true_n

164

if n > 0:

165

# knit deltas do not provide reliable info about whether the

166

# last line of a file matches, due to eol handling.

167

if source[s_pos + n -1] != target[t_pos + n -1]:

168

n-=1

169

if n > 0:

170

yield s_pos, t_pos, n

171

t_pos += t_len + true_n

172

s_pos = s_end

173

n = target_len - t_pos

174

if n > 0:

175

if source[s_pos + n -1] != target[t_pos + n -1]:

176

n-=1

177

if n > 0:

178

yield s_pos, t_pos, n

179

yield s_pos + (target_len - t_pos), target_len, 0

180

181

182

class AnnotatedKnitContent(KnitContent):

183

"""Annotated content."""

184

185

def __init__(self, lines):

186

self._lines = lines

187

188

def annotate_iter(self):

189

"""Yield tuples of (origin, text) for each content line."""

190

return iter(self._lines)

191

192

def strip_last_line_newline(self):

193

line = self._lines[-1][1].rstrip('\n')

194

self._lines[-1] = (self._lines[-1][0], line)

195

196

def text(self):

197

return [text for origin, text in self._lines]

198

199

def copy(self):

200

return AnnotatedKnitContent(self._lines[:])

201

202

203

class PlainKnitContent(KnitContent):

204

"""Unannotated content.

205

206

When annotate[_iter] is called on this content, the same version is reported

207

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

208

objects.

209

"""

210

211

def __init__(self, lines, version_id):

212

self._lines = lines

213

self._version_id = version_id

214

215

def annotate_iter(self):

216

"""Yield tuples of (origin, text) for each content line."""

217

for line in self._lines:

218

yield self._version_id, line

219

220

def copy(self):

221

return PlainKnitContent(self._lines[:], self._version_id)

222

223

def strip_last_line_newline(self):

224

self._lines[-1] = self._lines[-1].rstrip('\n')

225

226

def text(self):

227

return self._lines

228

229

230

class KnitAnnotateFactory(object):

231

"""Factory for creating annotated Content objects."""

232

233

annotated = True

234

235

def make(self, lines, version_id):

236

num_lines = len(lines)

237

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

238

239

def parse_fulltext(self, content, version_id):

240

"""Convert fulltext to internal representation

241

242

fulltext content is of the format

243

revid(utf8) plaintext\n

244

internal representation is of the format:

245

(revid, plaintext)

246

"""

247

# TODO: jam 20070209 The tests expect this to be returned as tuples,

248

# but the code itself doesn't really depend on that.

249

# Figure out a way to not require the overhead of turning the

250

# list back into tuples.

251

lines = [tuple(line.split(' ', 1)) for line in content]

252

return AnnotatedKnitContent(lines)

253

254

def parse_line_delta_iter(self, lines):

255

return iter(self.parse_line_delta(lines))

256

257

def parse_line_delta(self, lines, version_id, plain=False):

258

"""Convert a line based delta into internal representation.

259

260

line delta is in the form of:

261

intstart intend intcount

262

1..count lines:

263

revid(utf8) newline\n

264

internal representation is

265

(start, end, count, [1..count tuples (revid, newline)])

266

267

:param plain: If True, the lines are returned as a plain

268

list, not as a list of tuples, i.e.

269

(start, end, count, [1..count newline])

270

"""

271

result = []

272

lines = iter(lines)

273

next = lines.next

274

275

cache = {}

276

def cache_and_return(line):

277

origin, text = line.split(' ', 1)

278

return cache.setdefault(origin, origin), text

279

280

# walk through the lines parsing.

281

# Note that the plain test is explicitly pulled out of the

282

# loop to minimise any performance impact

283

if plain:

284

for header in lines:

285

start, end, count = [int(n) for n in header.split(',')]

286

contents = [next().split(' ', 1)[1] for i in xrange(count)]

287

result.append((start, end, count, contents))

288

else:

289

for header in lines:

290

start, end, count = [int(n) for n in header.split(',')]

291

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

292

result.append((start, end, count, contents))

293

return result

294

295

def get_fulltext_content(self, lines):

296

"""Extract just the content lines from a fulltext."""

297

return (line.split(' ', 1)[1] for line in lines)

298

299

def get_linedelta_content(self, lines):

300

"""Extract just the content from a line delta.

301

302

This doesn't return all of the extra information stored in a delta.

303

Only the actual content lines.

304

"""

305

lines = iter(lines)

306

next = lines.next

307

for header in lines:

308

header = header.split(',')

309

count = int(header[2])

310

for i in xrange(count):

311

origin, text = next().split(' ', 1)

312

yield text

313

314

def lower_fulltext(self, content):

315

"""convert a fulltext content record into a serializable form.

316

317

see parse_fulltext which this inverts.

318

"""

319

# TODO: jam 20070209 We only do the caching thing to make sure that

320

# the origin is a valid utf-8 line, eventually we could remove it

321

return ['%s %s' % (o, t) for o, t in content._lines]

322

323

def lower_line_delta(self, delta):

324

"""convert a delta into a serializable form.

325

326

See parse_line_delta which this inverts.

327

"""

328

# TODO: jam 20070209 We only do the caching thing to make sure that

329

# the origin is a valid utf-8 line, eventually we could remove it

330

out = []

331

for start, end, c, lines in delta:

332

out.append('%d,%d,%d\n' % (start, end, c))

333

out.extend(origin + ' ' + text

334

for origin, text in lines)

335

return out

336

337

def annotate_iter(self, knit, version_id):

338

content = knit._get_content(version_id)

339

return content.annotate_iter()

340

341

342

class KnitPlainFactory(object):

343

"""Factory for creating plain Content objects."""

344

345

annotated = False

346

347

def make(self, lines, version_id):

348

return PlainKnitContent(lines, version_id)

349

350

def parse_fulltext(self, content, version_id):

351

"""This parses an unannotated fulltext.

352

353

Note that this is not a noop - the internal representation

354

has (versionid, line) - its just a constant versionid.

355

"""

356

return self.make(content, version_id)

357

358

def parse_line_delta_iter(self, lines, version_id):

359

cur = 0

360

num_lines = len(lines)

361

while cur < num_lines:

362

header = lines[cur]

363

cur += 1

364

start, end, c = [int(n) for n in header.split(',')]

365

yield start, end, c, lines[cur:cur+c]

366

cur += c

367

368

def parse_line_delta(self, lines, version_id):

369

return list(self.parse_line_delta_iter(lines, version_id))

370

371

def get_fulltext_content(self, lines):

372

"""Extract just the content lines from a fulltext."""

373

return iter(lines)

374

375

def get_linedelta_content(self, lines):

376

"""Extract just the content from a line delta.

377

378

This doesn't return all of the extra information stored in a delta.

379

Only the actual content lines.

380

"""

381

lines = iter(lines)

382

next = lines.next

383

for header in lines:

384

header = header.split(',')

385

count = int(header[2])

386

for i in xrange(count):

387

yield next()

388

389

def lower_fulltext(self, content):

390

return content.text()

391

392

def lower_line_delta(self, delta):

393

out = []

394

for start, end, c, lines in delta:

395

out.append('%d,%d,%d\n' % (start, end, c))

396

out.extend(lines)

397

return out

398

399

def annotate_iter(self, knit, version_id):

400

return annotate_knit(knit, version_id)

401

402

403

def make_empty_knit(transport, relpath):

404

"""Construct a empty knit at the specified location."""

405

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

406

407

408

class KnitVersionedFile(VersionedFile):

409

"""Weave-like structure with faster random access.

410

411

A knit stores a number of texts and a summary of the relationships

412

between them. Texts are identified by a string version-id. Texts

413

are normally stored and retrieved as a series of lines, but can

414

also be passed as single strings.

415

416

Lines are stored with the trailing newline (if any) included, to

417

avoid special cases for files with no final newline. Lines are

418

composed of 8-bit characters, not unicode. The combination of

419

these approaches should mean any 'binary' file can be safely

420

stored and retrieved.

421

"""

422

423

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

424

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

425

create=False, create_parent_dir=False, delay_create=False,

426

dir_mode=None, index=None, access_method=None):

427

"""Construct a knit at location specified by relpath.

428

429

:param create: If not True, only open an existing knit.

430

:param create_parent_dir: If True, create the parent directory if

431

creating the file fails. (This is used for stores with

432

hash-prefixes that may not exist yet)

433

:param delay_create: The calling code is aware that the knit won't

434

actually be created until the first data is stored.

435

:param index: An index to use for the knit.

436

"""

437

if deprecated_passed(basis_knit):

438

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

439

" deprecated as of bzr 0.9.",

440

DeprecationWarning, stacklevel=2)

441

if access_mode is None:

442

access_mode = 'w'

443

super(KnitVersionedFile, self).__init__(access_mode)

444

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

445

self.transport = transport

446

self.filename = relpath

447

self.factory = factory or KnitAnnotateFactory()

448

self.writable = (access_mode == 'w')

449

self.delta = delta

450

451

self._max_delta_chain = 200

452

453

if index is None:

454

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

455

access_mode, create=create, file_mode=file_mode,

456

create_parent_dir=create_parent_dir, delay_create=delay_create,

457

dir_mode=dir_mode)

458

else:

459

self._index = index

460

if access_method is None:

461

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

462

((create and not len(self)) and delay_create), create_parent_dir)

463

else:

464

_access = access_method

465

if create and not len(self) and not delay_create:

466

_access.create()

467

self._data = _KnitData(_access)

468

469

def __repr__(self):

470

return '%s(%s)' % (self.__class__.__name__,

471

self.transport.abspath(self.filename))

472

473

def _check_should_delta(self, first_parents):

474

"""Iterate back through the parent listing, looking for a fulltext.

475

476

This is used when we want to decide whether to add a delta or a new

477

fulltext. It searches for _max_delta_chain parents. When it finds a

478

fulltext parent, it sees if the total size of the deltas leading up to

479

it is large enough to indicate that we want a new full text anyway.

480

481

Return True if we should create a new delta, False if we should use a

482

full text.

483

"""

484

delta_size = 0

485

fulltext_size = None

486

delta_parents = first_parents

487

for count in xrange(self._max_delta_chain):

488

parent = delta_parents[0]

489

method = self._index.get_method(parent)

490

index, pos, size = self._index.get_position(parent)

491

if method == 'fulltext':

492

fulltext_size = size

493

break

494

delta_size += size

495

delta_parents = self._index.get_parents(parent)

496

else:

497

# We couldn't find a fulltext, so we must create a new one

498

return False

499

500

return fulltext_size > delta_size

501

502

def _add_raw_records(self, records, data):

503

"""Add all the records 'records' with data pre-joined in 'data'.

504

505

:param records: A list of tuples(version_id, options, parents, size).

506

:param data: The data for the records. When it is written, the records

507

are adjusted to have pos pointing into data by the sum of

508

the preceding records sizes.

509

"""

510

# write all the data

511

raw_record_sizes = [record[3] for record in records]

512

positions = self._data.add_raw_records(raw_record_sizes, data)

513

offset = 0

514

index_entries = []

515

for (version_id, options, parents, size), access_memo in zip(

516

records, positions):

517

index_entries.append((version_id, options, access_memo, parents))

518

if self._data._do_cache:

519

self._data._cache[version_id] = data[offset:offset+size]

520

offset += size

521

self._index.add_versions(index_entries)

522

523

def enable_cache(self):

524

"""Start caching data for this knit"""

525

self._data.enable_cache()

526

527

def clear_cache(self):

528

"""Clear the data cache only."""

529

self._data.clear_cache()

530

531

def copy_to(self, name, transport):

532

"""See VersionedFile.copy_to()."""

533

# copy the current index to a temp index to avoid racing with local

534

# writes

535

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

536

self.transport.get(self._index._filename))

537

# copy the data file

538

f = self._data._open_file()

539

try:

540

transport.put_file(name + DATA_SUFFIX, f)

541

finally:

542

f.close()

543

# move the copied index into place

544

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

545

546

def create_empty(self, name, transport, mode=None):

547

return KnitVersionedFile(name, transport, factory=self.factory,

548

delta=self.delta, create=True)

549

550

def get_data_stream(self, required_versions):

551

"""Get a data stream for the specified versions.

552

553

Versions may be returned in any order, not necessarily the order

554

specified.

555

556

:param required_versions: The exact set of versions to be extracted.

557

Unlike some other knit methods, this is not used to generate a

558

transitive closure, rather it is used precisely as given.

559

560

:returns: format_signature, list of (version, options, length, parents),

561

reader_callable.

562

"""

563

required_versions = set([osutils.safe_revision_id(v) for v in

564

required_versions])

565

# we don't care about inclusions, the caller cares.

566

# but we need to setup a list of records to visit.

567

for version_id in required_versions:

568

if not self.has_version(version_id):

569

raise RevisionNotPresent(version_id, self.filename)

570

# Pick the desired versions out of the index in oldest-to-newest order

571

version_list = []

572

for version_id in self.versions():

573

if version_id in required_versions:

574

version_list.append(version_id)

575

576

# create the list of version information for the result

577

copy_queue_records = []

578

copy_set = set()

579

result_version_list = []

580

for version_id in version_list:

581

options = self._index.get_options(version_id)

582

parents = self._index.get_parents_with_ghosts(version_id)

583

index_memo = self._index.get_position(version_id)

584

copy_queue_records.append((version_id, index_memo))

585

none, data_pos, data_size = index_memo

586

copy_set.add(version_id)

587

# version, options, length, parents

588

result_version_list.append((version_id, options, data_size,

589

parents))

590

591

# Read the compressed record data.

592

# XXX:

593

# From here down to the return should really be logic in the returned

594

# callable -- in a class that adapts read_records_iter_raw to read

595

# requests.

596

raw_datum = []

597

for (version_id, raw_data), \

598

(version_id2, options, _, parents) in \

599

izip(self._data.read_records_iter_raw(copy_queue_records),

600

result_version_list):

601

assert version_id == version_id2, 'logic error, inconsistent results'

602

raw_datum.append(raw_data)

603

pseudo_file = StringIO(''.join(raw_datum))

604

def read(length):

605

if length is None:

606

return pseudo_file.read()

607

else:

608

return pseudo_file.read(length)

609

return (self.get_format_signature(), result_version_list, read)

610

611

def _extract_blocks(self, version_id, source, target):

612

if self._index.get_method(version_id) != 'line-delta':

613

return None

614

parent, sha1, noeol, delta = self.get_delta(version_id)

615

return KnitContent.get_line_delta_blocks(delta, source, target)

616

617

def get_delta(self, version_id):

618

"""Get a delta for constructing version from some other version."""

619

version_id = osutils.safe_revision_id(version_id)

620

self.check_not_reserved_id(version_id)

621

parents = self.get_parents(version_id)

622

if len(parents):

623

parent = parents[0]

624

else:

625

parent = None

626

index_memo = self._index.get_position(version_id)

627

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

628

noeol = 'no-eol' in self._index.get_options(version_id)

629

if 'fulltext' == self._index.get_method(version_id):

630

new_content = self.factory.parse_fulltext(data, version_id)

631

if parent is not None:

632

reference_content = self._get_content(parent)

633

old_texts = reference_content.text()

634

else:

635

old_texts = []

636

new_texts = new_content.text()

637

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

638

new_texts)

639

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

640

else:

641

delta = self.factory.parse_line_delta(data, version_id)

642

return parent, sha1, noeol, delta

643

644

def get_format_signature(self):

645

"""See VersionedFile.get_format_signature()."""

646

if self.factory.annotated:

647

annotated_part = "annotated"

648

else:

649

annotated_part = "plain"

650

return "knit-%s" % (annotated_part,)

651

652

def get_graph_with_ghosts(self):

653

"""See VersionedFile.get_graph_with_ghosts()."""

654

graph_items = self._index.get_graph()

655

return dict(graph_items)

656

657

def get_sha1(self, version_id):

658

return self.get_sha1s([version_id])[0]

659

660

def get_sha1s(self, version_ids):

661

"""See VersionedFile.get_sha1()."""

662

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

663

record_map = self._get_record_map(version_ids)

664

# record entry 2 is the 'digest'.

665

return [record_map[v][2] for v in version_ids]

666

667

@staticmethod

668

def get_suffixes():

669

"""See VersionedFile.get_suffixes()."""

670

return [DATA_SUFFIX, INDEX_SUFFIX]

671

672

def has_ghost(self, version_id):

673

"""True if there is a ghost reference in the file to version_id."""

674

version_id = osutils.safe_revision_id(version_id)

675

# maybe we have it

676

if self.has_version(version_id):

677

return False

678

# optimisable if needed by memoising the _ghosts set.

679

items = self._index.get_graph()

680

for node, parents in items:

681

for parent in parents:

682

if parent not in self._index._cache:

683

if parent == version_id:

684

return True

685

return False

686

687

def insert_data_stream(self, (format, data_list, reader_callable)):

688

"""Insert knit records from a data stream into this knit.

689

690

If a version in the stream is already present in this knit, it will not

691

be inserted a second time. It will be checked for consistency with the

692

stored version however, and may cause a KnitCorrupt error to be raised

693

if the data in the stream disagrees with the already stored data.

694

695

:seealso: get_data_stream

696

"""

697

if format != self.get_format_signature():

698

trace.mutter('incompatible format signature inserting to %r', self)

699

raise KnitDataStreamIncompatible(

700

format, self.get_format_signature())

701

702

for version_id, options, length, parents in data_list:

703

if self.has_version(version_id):

704

# First check: the list of parents.

705

my_parents = self.get_parents_with_ghosts(version_id)

706

if my_parents != parents:

707

# XXX: KnitCorrupt is not quite the right exception here.

708

raise KnitCorrupt(

709

self.filename,

710

'parents list %r from data stream does not match '

711

'already recorded parents %r for %s'

712

% (parents, my_parents, version_id))

713

714

# Also check the SHA-1 of the fulltext this content will

715

# produce.

716

raw_data = reader_callable(length)

717

my_fulltext_sha1 = self.get_sha1(version_id)

718

df, rec = self._data._parse_record_header(version_id, raw_data)

719

stream_fulltext_sha1 = rec[3]

720

if my_fulltext_sha1 != stream_fulltext_sha1:

721

# Actually, we don't know if it's this knit that's corrupt,

722

# or the data stream we're trying to insert.

723

raise KnitCorrupt(

724

self.filename, 'sha-1 does not match %s' % version_id)

725

else:

726

self._add_raw_records(

727

[(version_id, options, parents, length)],

728

reader_callable(length))

729

730

def versions(self):

731

"""See VersionedFile.versions."""

732

if 'evil' in debug.debug_flags:

733

trace.mutter_callsite(2, "versions scales with size of history")

734

return self._index.get_versions()

735

736

def has_version(self, version_id):

737

"""See VersionedFile.has_version."""

738

if 'evil' in debug.debug_flags:

739

trace.mutter_callsite(2, "has_version is a LBYL scenario")

740

version_id = osutils.safe_revision_id(version_id)

741

return self._index.has_version(version_id)

742

743

__contains__ = has_version

744

745

def _merge_annotations(self, content, parents, parent_texts={},

746

delta=None, annotated=None,

747

left_matching_blocks=None):

748

"""Merge annotations for content. This is done by comparing

749

the annotations based on changed to the text.

750

"""

751

if left_matching_blocks is not None:

752

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

753

else:

754

delta_seq = None

755

if annotated:

756

for parent_id in parents:

757

merge_content = self._get_content(parent_id, parent_texts)

758

if (parent_id == parents[0] and delta_seq is not None):

759

seq = delta_seq

760

else:

761

seq = patiencediff.PatienceSequenceMatcher(

762

None, merge_content.text(), content.text())

763

for i, j, n in seq.get_matching_blocks():

764

if n == 0:

765

continue

766

# this appears to copy (origin, text) pairs across to the

767

# new content for any line that matches the last-checked

768

# parent.

769

content._lines[j:j+n] = merge_content._lines[i:i+n]

770

if delta:

771

if delta_seq is None:

772

reference_content = self._get_content(parents[0], parent_texts)

773

new_texts = content.text()

774

old_texts = reference_content.text()

775

delta_seq = patiencediff.PatienceSequenceMatcher(

776

None, old_texts, new_texts)

777

return self._make_line_delta(delta_seq, content)

778

779

def _make_line_delta(self, delta_seq, new_content):

780

"""Generate a line delta from delta_seq and new_content."""

781

diff_hunks = []

782

for op in delta_seq.get_opcodes():

783

if op[0] == 'equal':

784

continue

785

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

786

return diff_hunks

787

788

def _get_components_positions(self, version_ids):

789

"""Produce a map of position data for the components of versions.

790

791

This data is intended to be used for retrieving the knit records.

792

793

A dict of version_id to (method, data_pos, data_size, next) is

794

returned.

795

method is the way referenced data should be applied.

796

data_pos is the position of the data in the knit.

797

data_size is the size of the data in the knit.

798

next is the build-parent of the version, or None for fulltexts.

799

"""

800

component_data = {}

801

for version_id in version_ids:

802

cursor = version_id

803

804

while cursor is not None and cursor not in component_data:

805

method = self._index.get_method(cursor)

806

if method == 'fulltext':

807

next = None

808

else:

809

next = self.get_parents(cursor)[0]

810

index_memo = self._index.get_position(cursor)

811

component_data[cursor] = (method, index_memo, next)

812

cursor = next

813

return component_data

814

815

def _get_content(self, version_id, parent_texts={}):

816

"""Returns a content object that makes up the specified

817

version."""

818

cached_version = parent_texts.get(version_id, None)

819

if cached_version is not None:

820

if not self.has_version(version_id):

821

raise RevisionNotPresent(version_id, self.filename)

822

return cached_version

823

824

text_map, contents_map = self._get_content_maps([version_id])

825

return contents_map[version_id]

826

827

def _check_versions_present(self, version_ids):

828

"""Check that all specified versions are present."""

829

self._index.check_versions_present(version_ids)

830

831

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

832

nostore_sha, random_id, check_content):

833

"""See VersionedFile.add_lines_with_ghosts()."""

834

self._check_add(version_id, lines, random_id, check_content)

835

return self._add(version_id, lines, parents, self.delta,

836

parent_texts, None, nostore_sha, random_id)

837

838

def _add_lines(self, version_id, parents, lines, parent_texts,

839

left_matching_blocks, nostore_sha, random_id, check_content):

840

"""See VersionedFile.add_lines."""

841

self._check_add(version_id, lines, random_id, check_content)

842

self._check_versions_present(parents)

843

return self._add(version_id, lines[:], parents, self.delta,

844

parent_texts, left_matching_blocks, nostore_sha, random_id)

845

846

def _check_add(self, version_id, lines, random_id, check_content):

847

"""check that version_id and lines are safe to add."""

848

if contains_whitespace(version_id):

849

raise InvalidRevisionId(version_id, self.filename)

850

self.check_not_reserved_id(version_id)

851

# Technically this could be avoided if we are happy to allow duplicate

852

# id insertion when other things than bzr core insert texts, but it

853

# seems useful for folk using the knit api directly to have some safety

854

# blanket that we can disable.

855

if not random_id and self.has_version(version_id):

856

raise RevisionAlreadyPresent(version_id, self.filename)

857

if check_content:

858

self._check_lines_not_unicode(lines)

859

self._check_lines_are_lines(lines)

860

861

def _add(self, version_id, lines, parents, delta, parent_texts,

862

left_matching_blocks, nostore_sha, random_id):

863

"""Add a set of lines on top of version specified by parents.

864

865

If delta is true, compress the text as a line-delta against

866

the first parent.

867

868

Any versions not present will be converted into ghosts.

869

"""

870

# first thing, if the content is something we don't need to store, find

871

# that out.

872

line_bytes = ''.join(lines)

873

digest = sha_string(line_bytes)

874

if nostore_sha == digest:

875

raise errors.ExistingContent

876

877

present_parents = []

878

if parent_texts is None:

879

parent_texts = {}

880

for parent in parents:

881

if self.has_version(parent):

882

present_parents.append(parent)

883

884

# can only compress against the left most present parent.

885

if (delta and

886

(len(present_parents) == 0 or

887

present_parents[0] != parents[0])):

888

delta = False

889

890

text_length = len(line_bytes)

891

options = []

892

if lines:

893

if lines[-1][-1] != '\n':

894

# copy the contents of lines.

895

lines = lines[:]

896

options.append('no-eol')

897

lines[-1] = lines[-1] + '\n'

898

899

if delta:

900

# To speed the extract of texts the delta chain is limited

901

# to a fixed number of deltas. This should minimize both

902

# I/O and the time spend applying deltas.

903

delta = self._check_should_delta(present_parents)

904

905

assert isinstance(version_id, str)

906

content = self.factory.make(lines, version_id)

907

if delta or (self.factory.annotated and len(present_parents) > 0):

908

# Merge annotations from parent texts if needed.

909

delta_hunks = self._merge_annotations(content, present_parents,

910

parent_texts, delta, self.factory.annotated,

911

left_matching_blocks)

912

913

if delta:

914

options.append('line-delta')

915

store_lines = self.factory.lower_line_delta(delta_hunks)

916

size, bytes = self._data._record_to_data(version_id, digest,

917

store_lines)

918

else:

919

options.append('fulltext')

920

# get mixed annotation + content and feed it into the

921

# serialiser.

922

store_lines = self.factory.lower_fulltext(content)

923

size, bytes = self._data._record_to_data(version_id, digest,

924

store_lines)

925

926

access_memo = self._data.add_raw_records([size], bytes)[0]

927

self._index.add_versions(

928

((version_id, options, access_memo, parents),),

929

random_id=random_id)

930

return digest, text_length, content

931

932

def check(self, progress_bar=None):

933

"""See VersionedFile.check()."""

934

935

def _clone_text(self, new_version_id, old_version_id, parents):

936

"""See VersionedFile.clone_text()."""

937

# FIXME RBC 20060228 make fast by only inserting an index with null

938

# delta.

939

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

940

941

def get_lines(self, version_id):

942

"""See VersionedFile.get_lines()."""

943

return self.get_line_list([version_id])[0]

944

945

def _get_record_map(self, version_ids):

946

"""Produce a dictionary of knit records.

947

948

The keys are version_ids, the values are tuples of (method, content,

949

digest, next).

950

method is the way the content should be applied.

951

content is a KnitContent object.

952

digest is the SHA1 digest of this version id after all steps are done

953

next is the build-parent of the version, i.e. the leftmost ancestor.

954

If the method is fulltext, next will be None.

955

"""

956

position_map = self._get_components_positions(version_ids)

957

# c = component_id, m = method, i_m = index_memo, n = next

958

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

959

record_map = {}

960

for component_id, content, digest in \

961

self._data.read_records_iter(records):

962

method, index_memo, next = position_map[component_id]

963

record_map[component_id] = method, content, digest, next

964

965

return record_map

966

967

def get_text(self, version_id):

968

"""See VersionedFile.get_text"""

969

return self.get_texts([version_id])[0]

970

971

def get_texts(self, version_ids):

972

return [''.join(l) for l in self.get_line_list(version_ids)]

973

974

def get_line_list(self, version_ids):

975

"""Return the texts of listed versions as a list of strings."""

976

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

977

for version_id in version_ids:

978

self.check_not_reserved_id(version_id)

979

text_map, content_map = self._get_content_maps(version_ids)

980

return [text_map[v] for v in version_ids]

981

982

_get_lf_split_line_list = get_line_list

983

984

def _get_content_maps(self, version_ids):

985

"""Produce maps of text and KnitContents

986

987

:return: (text_map, content_map) where text_map contains the texts for

988

the requested versions and content_map contains the KnitContents.

989

Both dicts take version_ids as their keys.

990

"""

991

for version_id in version_ids:

992

if not self.has_version(version_id):

993

raise RevisionNotPresent(version_id, self.filename)

994

record_map = self._get_record_map(version_ids)

995

996

text_map = {}

997

content_map = {}

998

final_content = {}

999

for version_id in version_ids:

1000

components = []

1001

cursor = version_id

1002

while cursor is not None:

1003

method, data, digest, next = record_map[cursor]

1004

components.append((cursor, method, data, digest))

1005

if cursor in content_map:

1006

break

1007

cursor = next

1008

1009

content = None

1010

for component_id, method, data, digest in reversed(components):

1011

if component_id in content_map:

1012

content = content_map[component_id]

1013

else:

1014

if method == 'fulltext':

1015

assert content is None

1016

content = self.factory.parse_fulltext(data, version_id)

1017

elif method == 'line-delta':

1018

delta = self.factory.parse_line_delta(data, version_id)

1019

content = content.copy()

1020

content._lines = self._apply_delta(content._lines,

1021

delta)

1022

content_map[component_id] = content

1023

1024

if 'no-eol' in self._index.get_options(version_id):

1025

content = content.copy()

1026

content.strip_last_line_newline()

1027

final_content[version_id] = content

1028

1029

# digest here is the digest from the last applied component.

1030

text = content.text()

1031

if sha_strings(text) != digest:

1032

raise KnitCorrupt(self.filename,

1033

'sha-1 does not match %s' % version_id)

1034

1035

text_map[version_id] = text

1036

return text_map, final_content

1037

1038

@staticmethod

1039

def _apply_delta(lines, delta):

1040

"""Apply delta to lines."""

1041

lines = list(lines)

1042

offset = 0

1043

for start, end, count, delta_lines in delta:

1044

lines[offset+start:offset+end] = delta_lines

1045

offset = offset + (start - end) + count

1046

return lines

1047

1048

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1049

pb=None):

1050

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1051

if version_ids is None:

1052

version_ids = self.versions()

1053

else:

1054

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1055

if pb is None:

1056

pb = progress.DummyProgress()

1057

# we don't care about inclusions, the caller cares.

1058

# but we need to setup a list of records to visit.

1059

# we need version_id, position, length

1060

version_id_records = []

1061

requested_versions = set(version_ids)

1062

# filter for available versions

1063

for version_id in requested_versions:

1064

if not self.has_version(version_id):

1065

raise RevisionNotPresent(version_id, self.filename)

1066

# get a in-component-order queue:

1067

for version_id in self.versions():

1068

if version_id in requested_versions:

1069

index_memo = self._index.get_position(version_id)

1070

version_id_records.append((version_id, index_memo))

1071

1072

total = len(version_id_records)

1073

for version_idx, (version_id, data, sha_value) in \

1074

enumerate(self._data.read_records_iter(version_id_records)):

1075

pb.update('Walking content.', version_idx, total)

1076

method = self._index.get_method(version_id)

1077

1078

assert method in ('fulltext', 'line-delta')

1079

if method == 'fulltext':

1080

line_iterator = self.factory.get_fulltext_content(data)

1081

else:

1082

line_iterator = self.factory.get_linedelta_content(data)

1083

for line in line_iterator:

1084

yield line

1085

1086

pb.update('Walking content.', total, total)

1087

1088

def iter_parents(self, version_ids):

1089

"""Iterate through the parents for many version ids.

1090

1091

:param version_ids: An iterable yielding version_ids.

1092

:return: An iterator that yields (version_id, parents). Requested

1093

version_ids not present in the versioned file are simply skipped.

1094

The order is undefined, allowing for different optimisations in

1095

the underlying implementation.

1096

"""

1097

version_ids = [osutils.safe_revision_id(version_id) for

1098

version_id in version_ids]

1099

return self._index.iter_parents(version_ids)

1100

1101

def num_versions(self):

1102

"""See VersionedFile.num_versions()."""

1103

return self._index.num_versions()

1104

1105

__len__ = num_versions

1106

1107

def annotate_iter(self, version_id):

1108

"""See VersionedFile.annotate_iter."""

1109

version_id = osutils.safe_revision_id(version_id)

1110

return self.factory.annotate_iter(self, version_id)

1111

1112

def get_parents(self, version_id):

1113

"""See VersionedFile.get_parents."""

1114

# perf notes:

1115

# optimism counts!

1116

# 52554 calls in 1264 872 internal down from 3674

1117

version_id = osutils.safe_revision_id(version_id)

1118

try:

1119

return self._index.get_parents(version_id)

1120

except KeyError:

1121

raise RevisionNotPresent(version_id, self.filename)

1122

1123

def get_parents_with_ghosts(self, version_id):

1124

"""See VersionedFile.get_parents."""

1125

version_id = osutils.safe_revision_id(version_id)

1126

try:

1127

return self._index.get_parents_with_ghosts(version_id)

1128

except KeyError:

1129

raise RevisionNotPresent(version_id, self.filename)

1130

1131

def get_ancestry(self, versions, topo_sorted=True):

1132

"""See VersionedFile.get_ancestry."""

1133

if isinstance(versions, basestring):

1134

versions = [versions]

1135

if not versions:

1136

return []

1137

versions = [osutils.safe_revision_id(v) for v in versions]

1138

return self._index.get_ancestry(versions, topo_sorted)

1139

1140

def get_ancestry_with_ghosts(self, versions):

1141

"""See VersionedFile.get_ancestry_with_ghosts."""

1142

if isinstance(versions, basestring):

1143

versions = [versions]

1144

if not versions:

1145

return []

1146

versions = [osutils.safe_revision_id(v) for v in versions]

1147

return self._index.get_ancestry_with_ghosts(versions)

1148

1149

def plan_merge(self, ver_a, ver_b):

1150

"""See VersionedFile.plan_merge."""

1151

ver_a = osutils.safe_revision_id(ver_a)

1152

ver_b = osutils.safe_revision_id(ver_b)

1153

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1154

1155

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1156

annotated_a = self.annotate(ver_a)

1157

annotated_b = self.annotate(ver_b)

1158

return merge._plan_annotate_merge(annotated_a, annotated_b,

1159

ancestors_a, ancestors_b)

1160

1161

1162

class _KnitComponentFile(object):

1163

"""One of the files used to implement a knit database"""

1164

1165

def __init__(self, transport, filename, mode, file_mode=None,

1166

create_parent_dir=False, dir_mode=None):

1167

self._transport = transport

1168

self._filename = filename

1169

self._mode = mode

1170

self._file_mode = file_mode

1171

self._dir_mode = dir_mode

1172

self._create_parent_dir = create_parent_dir

1173

self._need_to_create = False

1174

1175

def _full_path(self):

1176

"""Return the full path to this file."""

1177

return self._transport.base + self._filename

1178

1179

def check_header(self, fp):

1180

line = fp.readline()

1181

if line == '':

1182

# An empty file can actually be treated as though the file doesn't

1183

# exist yet.

1184

raise errors.NoSuchFile(self._full_path())

1185

if line != self.HEADER:

1186

raise KnitHeaderError(badline=line,

1187

filename=self._transport.abspath(self._filename))

1188

1189

def __repr__(self):

1190

return '%s(%s)' % (self.__class__.__name__, self._filename)

1191

1192

1193

class _KnitIndex(_KnitComponentFile):

1194

"""Manages knit index file.

1195

1196

The index is already kept in memory and read on startup, to enable

1197

fast lookups of revision information. The cursor of the index

1198

file is always pointing to the end, making it easy to append

1199

entries.

1200

1201

_cache is a cache for fast mapping from version id to a Index

1202

object.

1203

1204

_history is a cache for fast mapping from indexes to version ids.

1205

1206

The index data format is dictionary compressed when it comes to

1207

parent references; a index entry may only have parents that with a

1208

lover index number. As a result, the index is topological sorted.

1209

1210

Duplicate entries may be written to the index for a single version id

1211

if this is done then the latter one completely replaces the former:

1212

this allows updates to correct version and parent information.

1213

Note that the two entries may share the delta, and that successive

1214

annotations and references MUST point to the first entry.

1215

1216

The index file on disc contains a header, followed by one line per knit

1217

record. The same revision can be present in an index file more than once.

1218

The first occurrence gets assigned a sequence number starting from 0.

1219

1220

The format of a single line is

1221

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1222

REVISION_ID is a utf8-encoded revision id

1223

FLAGS is a comma separated list of flags about the record. Values include

1224

no-eol, line-delta, fulltext.

1225

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1226

that the the compressed data starts at.

1227

LENGTH is the ascii representation of the length of the data file.

1228

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1229

REVISION_ID.

1230

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1231

revision id already in the knit that is a parent of REVISION_ID.

1232

The ' :' marker is the end of record marker.

1233

1234

partial writes:

1235

when a write is interrupted to the index file, it will result in a line

1236

that does not end in ' :'. If the ' :' is not present at the end of a line,

1237

or at the end of the file, then the record that is missing it will be

1238

ignored by the parser.

1239

1240

When writing new records to the index file, the data is preceded by '\n'

1241

to ensure that records always start on new lines even if the last write was

1242

interrupted. As a result its normal for the last line in the index to be

1243

missing a trailing newline. One can be added with no harmful effects.

1244

"""

1245

1246

HEADER = "# bzr knit index 8\n"

1247

1248

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1249

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1250

1251

def _cache_version(self, version_id, options, pos, size, parents):

1252

"""Cache a version record in the history array and index cache.

1253

1254

This is inlined into _load_data for performance. KEEP IN SYNC.

1255

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1256

indexes).

1257

"""

1258

# only want the _history index to reference the 1st index entry

1259

# for version_id

1260

if version_id not in self._cache:

1261

index = len(self._history)

1262

self._history.append(version_id)

1263

else:

1264

index = self._cache[version_id][5]

1265

self._cache[version_id] = (version_id,

1266

options,

1267

pos,

1268

size,

1269

parents,

1270

index)

1271

1272

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1273

create_parent_dir=False, delay_create=False, dir_mode=None):

1274

_KnitComponentFile.__init__(self, transport, filename, mode,

1275

file_mode=file_mode,

1276

create_parent_dir=create_parent_dir,

1277

dir_mode=dir_mode)

1278

self._cache = {}

1279

# position in _history is the 'official' index for a revision

1280

# but the values may have come from a newer entry.

1281

# so - wc -l of a knit index is != the number of unique names

1282

# in the knit.

1283

self._history = []

1284

try:

1285

fp = self._transport.get(self._filename)

1286

try:

1287

# _load_data may raise NoSuchFile if the target knit is

1288

# completely empty.

1289

_load_data(self, fp)

1290

finally:

1291

fp.close()

1292

except NoSuchFile:

1293

if mode != 'w' or not create:

1294

raise

1295

elif delay_create:

1296

self._need_to_create = True

1297

else:

1298

self._transport.put_bytes_non_atomic(

1299

self._filename, self.HEADER, mode=self._file_mode)

1300

1301

def get_graph(self):

1302

"""Return a list of the node:parents lists from this knit index."""

1303

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1304

1305

def get_ancestry(self, versions, topo_sorted=True):

1306

"""See VersionedFile.get_ancestry."""

1307

# get a graph of all the mentioned versions:

1308

graph = {}

1309

pending = set(versions)

1310

cache = self._cache

1311

while pending:

1312

version = pending.pop()

1313

# trim ghosts

1314

try:

1315

parents = [p for p in cache[version][4] if p in cache]

1316

except KeyError:

1317

raise RevisionNotPresent(version, self._filename)

1318

# if not completed and not a ghost

1319

pending.update([p for p in parents if p not in graph])

1320

graph[version] = parents

1321

if not topo_sorted:

1322

return graph.keys()

1323

return topo_sort(graph.items())

1324

1325

def get_ancestry_with_ghosts(self, versions):

1326

"""See VersionedFile.get_ancestry_with_ghosts."""

1327

# get a graph of all the mentioned versions:

1328

self.check_versions_present(versions)

1329

cache = self._cache

1330

graph = {}

1331

pending = set(versions)

1332

while pending:

1333

version = pending.pop()

1334

try:

1335

parents = cache[version][4]

1336

except KeyError:

1337

# ghost, fake it

1338

graph[version] = []

1339

else:

1340

# if not completed

1341

pending.update([p for p in parents if p not in graph])

1342

graph[version] = parents

1343

return topo_sort(graph.items())

1344

1345

def iter_parents(self, version_ids):

1346

"""Iterate through the parents for many version ids.

1347

1348

:param version_ids: An iterable yielding version_ids.

1349

:return: An iterator that yields (version_id, parents). Requested

1350

version_ids not present in the versioned file are simply skipped.

1351

The order is undefined, allowing for different optimisations in

1352

the underlying implementation.

1353

"""

1354

for version_id in version_ids:

1355

try:

1356

yield version_id, tuple(self.get_parents(version_id))

1357

except KeyError:

1358

pass

1359

1360

def num_versions(self):

1361

return len(self._history)

1362

1363

__len__ = num_versions

1364

1365

def get_versions(self):

1366

"""Get all the versions in the file. not topologically sorted."""

1367

return self._history

1368

1369

def _version_list_to_index(self, versions):

1370

result_list = []

1371

cache = self._cache

1372

for version in versions:

1373

if version in cache:

1374

# -- inlined lookup() --

1375

result_list.append(str(cache[version][5]))

1376

# -- end lookup () --

1377

else:

1378

result_list.append('.' + version)

1379

return ' '.join(result_list)

1380

1381

def add_version(self, version_id, options, index_memo, parents):

1382

"""Add a version record to the index."""

1383

self.add_versions(((version_id, options, index_memo, parents),))

1384

1385

def add_versions(self, versions, random_id=False):

1386

"""Add multiple versions to the index.

1387

1388

:param versions: a list of tuples:

1389

(version_id, options, pos, size, parents).

1390

:param random_id: If True the ids being added were randomly generated

1391

and no check for existence will be performed.

1392

"""

1393

lines = []

1394

orig_history = self._history[:]

1395

orig_cache = self._cache.copy()

1396

1397

try:

1398

for version_id, options, (index, pos, size), parents in versions:

1399

line = "\n%s %s %s %s %s :" % (version_id,

1400

','.join(options),

1401

pos,

1402

size,

1403

self._version_list_to_index(parents))

1404

assert isinstance(line, str), \

1405

'content must be utf-8 encoded: %r' % (line,)

1406

lines.append(line)

1407

self._cache_version(version_id, options, pos, size, parents)

1408

if not self._need_to_create:

1409

self._transport.append_bytes(self._filename, ''.join(lines))

1410

else:

1411

sio = StringIO()

1412

sio.write(self.HEADER)

1413

sio.writelines(lines)

1414

sio.seek(0)

1415

self._transport.put_file_non_atomic(self._filename, sio,

1416

create_parent_dir=self._create_parent_dir,

1417

mode=self._file_mode,

1418

dir_mode=self._dir_mode)

1419

self._need_to_create = False

1420

except:

1421

# If any problems happen, restore the original values and re-raise

1422

self._history = orig_history

1423

self._cache = orig_cache

1424

raise

1425

1426

def has_version(self, version_id):

1427

"""True if the version is in the index."""

1428

return version_id in self._cache

1429

1430

def get_position(self, version_id):

1431

"""Return details needed to access the version.

1432

1433

.kndx indices do not support split-out data, so return None for the

1434

index field.

1435

1436

:return: a tuple (None, data position, size) to hand to the access

1437

logic to get the record.

1438

"""

1439

entry = self._cache[version_id]

1440

return None, entry[2], entry[3]

1441

1442

def get_method(self, version_id):

1443

"""Return compression method of specified version."""

1444

options = self._cache[version_id][1]

1445

if 'fulltext' in options:

1446

return 'fulltext'

1447

else:

1448

if 'line-delta' not in options:

1449

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1450

return 'line-delta'

1451

1452

def get_options(self, version_id):

1453

"""Return a string represention options.

1454

1455

e.g. foo,bar

1456

"""

1457

return self._cache[version_id][1]

1458

1459

def get_parents(self, version_id):

1460

"""Return parents of specified version ignoring ghosts."""

1461

return [parent for parent in self._cache[version_id][4]

1462

if parent in self._cache]

1463

1464

def get_parents_with_ghosts(self, version_id):

1465

"""Return parents of specified version with ghosts."""

1466

return self._cache[version_id][4]

1467

1468

def check_versions_present(self, version_ids):

1469

"""Check that all specified versions are present."""

1470

cache = self._cache

1471

for version_id in version_ids:

1472

if version_id not in cache:

1473

raise RevisionNotPresent(version_id, self._filename)

1474

1475

1476

class KnitGraphIndex(object):

1477

"""A knit index that builds on GraphIndex."""

1478

1479

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1480

"""Construct a KnitGraphIndex on a graph_index.

1481

1482

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1483

:param deltas: Allow delta-compressed records.

1484

:param add_callback: If not None, allow additions to the index and call

1485

this callback with a list of added GraphIndex nodes:

1486

[(node, value, node_refs), ...]

1487

:param parents: If True, record knits parents, if not do not record

1488

parents.

1489

"""

1490

self._graph_index = graph_index

1491

self._deltas = deltas

1492

self._add_callback = add_callback

1493

self._parents = parents

1494

if deltas and not parents:

1495

raise KnitCorrupt(self, "Cannot do delta compression without "

1496

"parent tracking.")

1497

1498

def _get_entries(self, keys, check_present=False):

1499

"""Get the entries for keys.

1500

1501

:param keys: An iterable of index keys, - 1-tuples.

1502

"""

1503

keys = set(keys)

1504

found_keys = set()

1505

if self._parents:

1506

for node in self._graph_index.iter_entries(keys):

1507

yield node

1508

found_keys.add(node[1])

1509

else:

1510

# adapt parentless index to the rest of the code.

1511

for node in self._graph_index.iter_entries(keys):

1512

yield node[0], node[1], node[2], ()

1513

found_keys.add(node[1])

1514

if check_present:

1515

missing_keys = keys.difference(found_keys)

1516

if missing_keys:

1517

raise RevisionNotPresent(missing_keys.pop(), self)

1518

1519

def _present_keys(self, version_ids):

1520

return set([

1521

node[1] for node in self._get_entries(version_ids)])

1522

1523

def _parentless_ancestry(self, versions):

1524

"""Honour the get_ancestry API for parentless knit indices."""

1525

wanted_keys = self._version_ids_to_keys(versions)

1526

present_keys = self._present_keys(wanted_keys)

1527

missing = set(wanted_keys).difference(present_keys)

1528

if missing:

1529

raise RevisionNotPresent(missing.pop(), self)

1530

return list(self._keys_to_version_ids(present_keys))

1531

1532

def get_ancestry(self, versions, topo_sorted=True):

1533

"""See VersionedFile.get_ancestry."""

1534

if not self._parents:

1535

return self._parentless_ancestry(versions)

1536

# XXX: This will do len(history) index calls - perhaps

1537

# it should be altered to be a index core feature?

1538

# get a graph of all the mentioned versions:

1539

graph = {}

1540

ghosts = set()

1541

versions = self._version_ids_to_keys(versions)

1542

pending = set(versions)

1543

while pending:

1544

# get all pending nodes

1545

this_iteration = pending

1546

new_nodes = self._get_entries(this_iteration)

1547

found = set()

1548

pending = set()

1549

for (index, key, value, node_refs) in new_nodes:

1550

# dont ask for ghosties - otherwise

1551

# we we can end up looping with pending

1552

# being entirely ghosted.

1553

graph[key] = [parent for parent in node_refs[0]

1554

if parent not in ghosts]

1555

# queue parents

1556

for parent in graph[key]:

1557

# dont examine known nodes again

1558

if parent in graph:

1559

continue

1560

pending.add(parent)

1561

found.add(key)

1562

ghosts.update(this_iteration.difference(found))

1563

if versions.difference(graph):

1564

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1565

if topo_sorted:

1566

result_keys = topo_sort(graph.items())

1567

else:

1568

result_keys = graph.iterkeys()

1569

return [key[0] for key in result_keys]

1570

1571

def get_ancestry_with_ghosts(self, versions):

1572

"""See VersionedFile.get_ancestry."""

1573

if not self._parents:

1574

return self._parentless_ancestry(versions)

1575

# XXX: This will do len(history) index calls - perhaps

1576

# it should be altered to be a index core feature?

1577

# get a graph of all the mentioned versions:

1578

graph = {}

1579

versions = self._version_ids_to_keys(versions)

1580

pending = set(versions)

1581

while pending:

1582

# get all pending nodes

1583

this_iteration = pending

1584

new_nodes = self._get_entries(this_iteration)

1585

pending = set()

1586

for (index, key, value, node_refs) in new_nodes:

1587

graph[key] = node_refs[0]

1588

# queue parents

1589

for parent in graph[key]:

1590

# dont examine known nodes again

1591

if parent in graph:

1592

continue

1593

pending.add(parent)

1594

missing_versions = this_iteration.difference(graph)

1595

missing_needed = versions.intersection(missing_versions)

1596

if missing_needed:

1597

raise RevisionNotPresent(missing_needed.pop(), self)

1598

for missing_version in missing_versions:

1599

# add a key, no parents

1600

graph[missing_version] = []

1601

pending.discard(missing_version) # don't look for it

1602

result_keys = topo_sort(graph.items())

1603

return [key[0] for key in result_keys]

1604

1605

def get_graph(self):

1606

"""Return a list of the node:parents lists from this knit index."""

1607

if not self._parents:

1608

return [(key, ()) for key in self.get_versions()]

1609

result = []

1610

for index, key, value, refs in self._graph_index.iter_all_entries():

1611

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1612

return result

1613

1614

def iter_parents(self, version_ids):

1615

"""Iterate through the parents for many version ids.

1616

1617

:param version_ids: An iterable yielding version_ids.

1618

:return: An iterator that yields (version_id, parents). Requested

1619

version_ids not present in the versioned file are simply skipped.

1620

The order is undefined, allowing for different optimisations in

1621

the underlying implementation.

1622

"""

1623

if self._parents:

1624

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1625

all_parents = set()

1626

present_parents = set()

1627

for node in all_nodes:

1628

all_parents.update(node[3][0])

1629

# any node we are querying must be present

1630

present_parents.add(node[1])

1631

unknown_parents = all_parents.difference(present_parents)

1632

present_parents.update(self._present_keys(unknown_parents))

1633

for node in all_nodes:

1634

parents = []

1635

for parent in node[3][0]:

1636

if parent in present_parents:

1637

parents.append(parent[0])

1638

yield node[1][0], tuple(parents)

1639

else:

1640

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1641

yield node[1][0], ()

1642

1643

def num_versions(self):

1644

return len(list(self._graph_index.iter_all_entries()))

1645

1646

__len__ = num_versions

1647

1648

def get_versions(self):

1649

"""Get all the versions in the file. not topologically sorted."""

1650

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1651

1652

def has_version(self, version_id):

1653

"""True if the version is in the index."""

1654

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1655

1656

def _keys_to_version_ids(self, keys):

1657

return tuple(key[0] for key in keys)

1658

1659

def get_position(self, version_id):

1660

"""Return details needed to access the version.

1661

1662

:return: a tuple (index, data position, size) to hand to the access

1663

logic to get the record.

1664

"""

1665

node = self._get_node(version_id)

1666

bits = node[2][1:].split(' ')

1667

return node[0], int(bits[0]), int(bits[1])

1668

1669

def get_method(self, version_id):

1670

"""Return compression method of specified version."""

1671

if not self._deltas:

1672

return 'fulltext'

1673

return self._parent_compression(self._get_node(version_id)[3][1])

1674

1675

def _parent_compression(self, reference_list):

1676

# use the second reference list to decide if this is delta'd or not.

1677

if len(reference_list):

1678

return 'line-delta'

1679

else:

1680

return 'fulltext'

1681

1682

def _get_node(self, version_id):

1683

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1684

1685

def get_options(self, version_id):

1686

"""Return a string represention options.

1687

1688

e.g. foo,bar

1689

"""

1690

node = self._get_node(version_id)

1691

if not self._deltas:

1692

options = ['fulltext']

1693

else:

1694

options = [self._parent_compression(node[3][1])]

1695

if node[2][0] == 'N':

1696

options.append('no-eol')

1697

return options

1698

1699

def get_parents(self, version_id):

1700

"""Return parents of specified version ignoring ghosts."""

1701

parents = list(self.iter_parents([version_id]))

1702

if not parents:

1703

# missing key

1704

raise errors.RevisionNotPresent(version_id, self)

1705

return parents[0][1]

1706

1707

def get_parents_with_ghosts(self, version_id):

1708

"""Return parents of specified version with ghosts."""

1709

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1710

check_present=True))

1711

if not self._parents:

1712

return ()

1713

return self._keys_to_version_ids(nodes[0][3][0])

1714

1715

def check_versions_present(self, version_ids):

1716

"""Check that all specified versions are present."""

1717

keys = self._version_ids_to_keys(version_ids)

1718

present = self._present_keys(keys)

1719

missing = keys.difference(present)

1720

if missing:

1721

raise RevisionNotPresent(missing.pop(), self)

1722

1723

def add_version(self, version_id, options, access_memo, parents):

1724

"""Add a version record to the index."""

1725

return self.add_versions(((version_id, options, access_memo, parents),))

1726

1727

def add_versions(self, versions, random_id=False):

1728

"""Add multiple versions to the index.

1729

1730

This function does not insert data into the Immutable GraphIndex

1731

backing the KnitGraphIndex, instead it prepares data for insertion by

1732

the caller and checks that it is safe to insert then calls

1733

self._add_callback with the prepared GraphIndex nodes.

1734

1735

:param versions: a list of tuples:

1736

(version_id, options, pos, size, parents).

1737

:param random_id: If True the ids being added were randomly generated

1738

and no check for existence will be performed.

1739

"""

1740

if not self._add_callback:

1741

raise errors.ReadOnlyError(self)

1742

# we hope there are no repositories with inconsistent parentage

1743

# anymore.

1744

# check for dups

1745

1746

keys = {}

1747

for (version_id, options, access_memo, parents) in versions:

1748

index, pos, size = access_memo

1749

key = (version_id, )

1750

parents = tuple((parent, ) for parent in parents)

1751

if 'no-eol' in options:

1752

value = 'N'

1753

else:

1754

value = ' '

1755

value += "%d %d" % (pos, size)

1756

if not self._deltas:

1757

if 'line-delta' in options:

1758

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1759

if self._parents:

1760

if self._deltas:

1761

if 'line-delta' in options:

1762

node_refs = (parents, (parents[0],))

1763

else:

1764

node_refs = (parents, ())

1765

else:

1766

node_refs = (parents, )

1767

else:

1768

if parents:

1769

raise KnitCorrupt(self, "attempt to add node with parents "

1770

"in parentless index.")

1771

node_refs = ()

1772

keys[key] = (value, node_refs)

1773

if not random_id:

1774

present_nodes = self._get_entries(keys)

1775

for (index, key, value, node_refs) in present_nodes:

1776

if (value, node_refs) != keys[key]:

1777

raise KnitCorrupt(self, "inconsistent details in add_versions"

1778

": %s %s" % ((value, node_refs), keys[key]))

1779

del keys[key]

1780

result = []

1781

if self._parents:

1782

for key, (value, node_refs) in keys.iteritems():

1783

result.append((key, value, node_refs))

1784

else:

1785

for key, (value, node_refs) in keys.iteritems():

1786

result.append((key, value))

1787

self._add_callback(result)

1788

1789

def _version_ids_to_keys(self, version_ids):

1790

return set((version_id, ) for version_id in version_ids)

1791

1792

1793

class _KnitAccess(object):

1794

"""Access to knit records in a .knit file."""

1795

1796

def __init__(self, transport, filename, _file_mode, _dir_mode,

1797

_need_to_create, _create_parent_dir):

1798

"""Create a _KnitAccess for accessing and inserting data.

1799

1800

:param transport: The transport the .knit is located on.

1801

:param filename: The filename of the .knit.

1802

"""

1803

self._transport = transport

1804

self._filename = filename

1805

self._file_mode = _file_mode

1806

self._dir_mode = _dir_mode

1807

self._need_to_create = _need_to_create

1808

self._create_parent_dir = _create_parent_dir

1809

1810

def add_raw_records(self, sizes, raw_data):

1811

"""Add raw knit bytes to a storage area.

1812

1813

The data is spooled to whereever the access method is storing data.

1814

1815

:param sizes: An iterable containing the size of each raw data segment.

1816

:param raw_data: A bytestring containing the data.

1817

:return: A list of memos to retrieve the record later. Each memo is a

1818

tuple - (index, pos, length), where the index field is always None

1819

for the .knit access method.

1820

"""

1821

assert type(raw_data) == str, \

1822

'data must be plain bytes was %s' % type(raw_data)

1823

if not self._need_to_create:

1824

base = self._transport.append_bytes(self._filename, raw_data)

1825

else:

1826

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1827

create_parent_dir=self._create_parent_dir,

1828

mode=self._file_mode,

1829

dir_mode=self._dir_mode)

1830

self._need_to_create = False

1831

base = 0

1832

result = []

1833

for size in sizes:

1834

result.append((None, base, size))

1835

base += size

1836

return result

1837

1838

def create(self):

1839

"""IFF this data access has its own storage area, initialise it.

1840

1841

:return: None.

1842

"""

1843

self._transport.put_bytes_non_atomic(self._filename, '',

1844

mode=self._file_mode)

1845

1846

def open_file(self):

1847

"""IFF this data access can be represented as a single file, open it.

1848

1849

For knits that are not mapped to a single file on disk this will

1850

always return None.

1851

1852

:return: None or a file handle.

1853

"""

1854

try:

1855

return self._transport.get(self._filename)

1856

except NoSuchFile:

1857

pass

1858

return None

1859

1860

def get_raw_records(self, memos_for_retrieval):

1861

"""Get the raw bytes for a records.

1862

1863

:param memos_for_retrieval: An iterable containing the (index, pos,

1864

length) memo for retrieving the bytes. The .knit method ignores

1865

the index as there is always only a single file.

1866

:return: An iterator over the bytes of the records.

1867

"""

1868

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1869

for pos, data in self._transport.readv(self._filename, read_vector):

1870

yield data

1871

1872

1873

class _PackAccess(object):

1874

"""Access to knit records via a collection of packs."""

1875

1876

def __init__(self, index_to_packs, writer=None):

1877

"""Create a _PackAccess object.

1878

1879

:param index_to_packs: A dict mapping index objects to the transport

1880

and file names for obtaining data.

1881

:param writer: A tuple (pack.ContainerWriter, write_index) which

1882

contains the pack to write, and the index that reads from it will

1883

be associated with.

1884

"""

1885

if writer:

1886

self.container_writer = writer[0]

1887

self.write_index = writer[1]

1888

else:

1889

self.container_writer = None

1890

self.write_index = None

1891

self.indices = index_to_packs

1892

1893

def add_raw_records(self, sizes, raw_data):

1894

"""Add raw knit bytes to a storage area.

1895

1896

The data is spooled to the container writer in one bytes-record per

1897

raw data item.

1898

1899

:param sizes: An iterable containing the size of each raw data segment.

1900

:param raw_data: A bytestring containing the data.

1901

:return: A list of memos to retrieve the record later. Each memo is a

1902

tuple - (index, pos, length), where the index field is the

1903

write_index object supplied to the PackAccess object.

1904

"""

1905

assert type(raw_data) == str, \

1906

'data must be plain bytes was %s' % type(raw_data)

1907

result = []

1908

offset = 0

1909

for size in sizes:

1910

p_offset, p_length = self.container_writer.add_bytes_record(

1911

raw_data[offset:offset+size], [])

1912

offset += size

1913

result.append((self.write_index, p_offset, p_length))

1914

return result

1915

1916

def create(self):

1917

"""Pack based knits do not get individually created."""

1918

1919

def get_raw_records(self, memos_for_retrieval):

1920

"""Get the raw bytes for a records.

1921

1922

:param memos_for_retrieval: An iterable containing the (index, pos,

1923

length) memo for retrieving the bytes. The Pack access method

1924

looks up the pack to use for a given record in its index_to_pack

1925

map.

1926

:return: An iterator over the bytes of the records.

1927

"""

1928

# first pass, group into same-index requests

1929

request_lists = []

1930

current_index = None

1931

for (index, offset, length) in memos_for_retrieval:

1932

if current_index == index:

1933

current_list.append((offset, length))

1934

else:

1935

if current_index is not None:

1936

request_lists.append((current_index, current_list))

1937

current_index = index

1938

current_list = [(offset, length)]

1939

# handle the last entry

1940

if current_index is not None:

1941

request_lists.append((current_index, current_list))

1942

for index, offsets in request_lists:

1943

transport, path = self.indices[index]

1944

reader = pack.make_readv_reader(transport, path, offsets)

1945

for names, read_func in reader.iter_records():

1946

yield read_func(None)

1947

1948

def open_file(self):

1949

"""Pack based knits have no single file."""

1950

return None

1951

1952

def set_writer(self, writer, index, (transport, packname)):

1953

"""Set a writer to use for adding data."""

1954

self.indices[index] = (transport, packname)

1955

self.container_writer = writer

1956

self.write_index = index

1957

1958

1959

class _KnitData(object):

1960

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1961

1962

The KnitData class provides the logic for parsing and using knit records,

1963

making use of an access method for the low level read and write operations.

1964

"""

1965

1966

def __init__(self, access):

1967

"""Create a KnitData object.

1968

1969

:param access: The access method to use. Access methods such as

1970

_KnitAccess manage the insertion of raw records and the subsequent

1971

retrieval of the same.

1972

"""

1973

self._access = access

1974

self._checked = False

1975

# TODO: jam 20060713 conceptually, this could spill to disk

1976

# if the cached size gets larger than a certain amount

1977

# but it complicates the model a bit, so for now just use

1978

# a simple dictionary

1979

self._cache = {}

1980

self._do_cache = False

1981

1982

def enable_cache(self):

1983

"""Enable caching of reads."""

1984

self._do_cache = True

1985

1986

def clear_cache(self):

1987

"""Clear the record cache."""

1988

self._do_cache = False

1989

self._cache = {}

1990

1991

def _open_file(self):

1992

return self._access.open_file()

1993

1994

def _record_to_data(self, version_id, digest, lines):

1995

"""Convert version_id, digest, lines into a raw data block.

1996

1997

:return: (len, a StringIO instance with the raw data ready to read.)

1998

"""

1999

bytes = (''.join(chain(

2000

["version %s %d %s\n" % (version_id,

2001

len(lines),

2002

digest)],

2003

lines,

2004

["end %s\n" % version_id])))

2005

assert bytes.__class__ == str

2006

compressed_bytes = bytes_to_gzip(bytes)

2007

return len(compressed_bytes), compressed_bytes

2008

2009

def add_raw_records(self, sizes, raw_data):

2010

"""Append a prepared record to the data file.

2011

2012

:param sizes: An iterable containing the size of each raw data segment.

2013

:param raw_data: A bytestring containing the data.

2014

:return: a list of index data for the way the data was stored.

2015

See the access method add_raw_records documentation for more

2016

details.

2017

"""

2018

return self._access.add_raw_records(sizes, raw_data)

2019

2020

def _parse_record_header(self, version_id, raw_data):

2021

"""Parse a record header for consistency.

2022

2023

:return: the header and the decompressor stream.

2024

as (stream, header_record)

2025

"""

2026

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2027

try:

2028

rec = self._check_header(version_id, df.readline())

2029

except Exception, e:

2030

raise KnitCorrupt(self._access,

2031

"While reading {%s} got %s(%s)"

2032

% (version_id, e.__class__.__name__, str(e)))

2033

return df, rec

2034

2035

def _check_header(self, version_id, line):

2036

rec = line.split()

2037

if len(rec) != 4:

2038

raise KnitCorrupt(self._access,

2039

'unexpected number of elements in record header')

2040

if rec[1] != version_id:

2041

raise KnitCorrupt(self._access,

2042

'unexpected version, wanted %r, got %r'

2043

% (version_id, rec[1]))

2044

return rec

2045

2046

def _parse_record(self, version_id, data):

2047

# profiling notes:

2048

# 4168 calls in 2880 217 internal

2049

# 4168 calls to _parse_record_header in 2121

2050

# 4168 calls to readlines in 330

2051

df = GzipFile(mode='rb', fileobj=StringIO(data))

2052

2053

try:

2054

record_contents = df.readlines()

2055

except Exception, e:

2056

raise KnitCorrupt(self._access,

2057

"While reading {%s} got %s(%s)"

2058

% (version_id, e.__class__.__name__, str(e)))

2059

header = record_contents.pop(0)

2060

rec = self._check_header(version_id, header)

2061

2062

last_line = record_contents.pop()

2063

if len(record_contents) != int(rec[2]):

2064

raise KnitCorrupt(self._access,

2065

'incorrect number of lines %s != %s'

2066

' for version {%s}'

2067

% (len(record_contents), int(rec[2]),

2068

version_id))

2069

if last_line != 'end %s\n' % rec[1]:

2070

raise KnitCorrupt(self._access,

2071

'unexpected version end line %r, wanted %r'

2072

% (last_line, version_id))

2073

df.close()

2074

return record_contents, rec[3]

2075

2076

def read_records_iter_raw(self, records):

2077

"""Read text records from data file and yield raw data.

2078

2079

This unpacks enough of the text record to validate the id is

2080

as expected but thats all.

2081

"""

2082

# setup an iterator of the external records:

2083

# uses readv so nice and fast we hope.

2084

if len(records):

2085

# grab the disk data needed.

2086

if self._cache:

2087

# Don't check _cache if it is empty

2088

needed_offsets = [index_memo for version_id, index_memo

2089

in records

2090

if version_id not in self._cache]

2091

else:

2092

needed_offsets = [index_memo for version_id, index_memo

2093

in records]

2094

2095

raw_records = self._access.get_raw_records(needed_offsets)

2096

2097

for version_id, index_memo in records:

2098

if version_id in self._cache:

2099

# This data has already been validated

2100

data = self._cache[version_id]

2101

else:

2102

data = raw_records.next()

2103

if self._do_cache:

2104

self._cache[version_id] = data

2105

2106

# validate the header

2107

df, rec = self._parse_record_header(version_id, data)

2108

df.close()

2109

yield version_id, data

2110

2111

def read_records_iter(self, records):

2112

"""Read text records from data file and yield result.

2113

2114

The result will be returned in whatever is the fastest to read.

2115

Not by the order requested. Also, multiple requests for the same

2116

record will only yield 1 response.

2117

:param records: A list of (version_id, pos, len) entries

2118

:return: Yields (version_id, contents, digest) in the order

2119

read, not the order requested

2120

"""

2121

if not records:

2122

return

2123

2124

if self._cache:

2125

# Skip records we have alread seen

2126

yielded_records = set()

2127

needed_records = set()

2128

for record in records:

2129

if record[0] in self._cache:

2130

if record[0] in yielded_records:

2131

continue

2132

yielded_records.add(record[0])

2133

data = self._cache[record[0]]

2134

content, digest = self._parse_record(record[0], data)

2135

yield (record[0], content, digest)

2136

else:

2137

needed_records.add(record)

2138

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2139

else:

2140

needed_records = sorted(set(records), key=operator.itemgetter(1))

2141

2142

if not needed_records:

2143

return

2144

2145

# The transport optimizes the fetching as well

2146

# (ie, reads continuous ranges.)

2147

raw_data = self._access.get_raw_records(

2148

[index_memo for version_id, index_memo in needed_records])

2149

2150

for (version_id, index_memo), data in \

2151

izip(iter(needed_records), raw_data):

2152

content, digest = self._parse_record(version_id, data)

2153

if self._do_cache:

2154

self._cache[version_id] = data

2155

yield version_id, content, digest

2156

2157

def read_records(self, records):

2158

"""Read records into a dictionary."""

2159

components = {}

2160

for record_id, content, digest in \

2161

self.read_records_iter(records):

2162

components[record_id] = (content, digest)

2163

return components

2164

2165

2166

class InterKnit(InterVersionedFile):

2167

"""Optimised code paths for knit to knit operations."""

2168

2169

_matching_file_from_factory = KnitVersionedFile

2170

_matching_file_to_factory = KnitVersionedFile

2171

2172

@staticmethod

2173

def is_compatible(source, target):

2174

"""Be compatible with knits. """

2175

try:

2176

return (isinstance(source, KnitVersionedFile) and

2177

isinstance(target, KnitVersionedFile))

2178

except AttributeError:

2179

return False

2180

2181

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2182

"""See InterVersionedFile.join."""

2183

assert isinstance(self.source, KnitVersionedFile)

2184

assert isinstance(self.target, KnitVersionedFile)

2185

2186

# If the source and target are mismatched w.r.t. annotations vs

2187

# plain, the data needs to be converted accordingly

2188

if self.source.factory.annotated == self.target.factory.annotated:

2189

converter = None

2190

elif self.source.factory.annotated:

2191

converter = self._anno_to_plain_converter

2192

else:

2193

# We're converting from a plain to an annotated knit. This requires

2194

# building the annotations from scratch. The generic join code

2195

# handles this implicitly so we delegate to it.

2196

return super(InterKnit, self).join(pb, msg, version_ids,

2197

ignore_missing)

2198

2199

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2200

if not version_ids:

2201

return 0

2202

2203

pb = ui.ui_factory.nested_progress_bar()

2204

try:

2205

version_ids = list(version_ids)

2206

if None in version_ids:

2207

version_ids.remove(None)

2208

2209

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2210

this_versions = set(self.target._index.get_versions())

2211

# XXX: For efficiency we should not look at the whole index,

2212

# we only need to consider the referenced revisions - they

2213

# must all be present, or the method must be full-text.

2214

# TODO, RBC 20070919

2215

needed_versions = self.source_ancestry - this_versions

2216

2217

if not needed_versions:

2218

return 0

2219

full_list = topo_sort(self.source.get_graph())

2220

2221

version_list = [i for i in full_list if (not self.target.has_version(i)

2222

and i in needed_versions)]

2223

2224

# plan the join:

2225

copy_queue = []

2226

copy_queue_records = []

2227

copy_set = set()

2228

for version_id in version_list:

2229

options = self.source._index.get_options(version_id)

2230

parents = self.source._index.get_parents_with_ghosts(version_id)

2231

# check that its will be a consistent copy:

2232

for parent in parents:

2233

# if source has the parent, we must :

2234

# * already have it or

2235

# * have it scheduled already

2236

# otherwise we don't care

2237

assert (self.target.has_version(parent) or

2238

parent in copy_set or

2239

not self.source.has_version(parent))

2240

index_memo = self.source._index.get_position(version_id)

2241

copy_queue_records.append((version_id, index_memo))

2242

copy_queue.append((version_id, options, parents))

2243

copy_set.add(version_id)

2244

2245

# data suck the join:

2246

count = 0

2247

total = len(version_list)

2248

raw_datum = []

2249

raw_records = []

2250

for (version_id, raw_data), \

2251

(version_id2, options, parents) in \

2252

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2253

copy_queue):

2254

assert version_id == version_id2, 'logic error, inconsistent results'

2255

count = count + 1

2256

pb.update("Joining knit", count, total)

2257

if converter:

2258

size, raw_data = converter(raw_data, version_id, options,

2259

parents)

2260

else:

2261

size = len(raw_data)

2262

raw_records.append((version_id, options, parents, size))

2263

raw_datum.append(raw_data)

2264

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2265

return count

2266

finally:

2267

pb.finished()

2268

2269

def _anno_to_plain_converter(self, raw_data, version_id, options,

2270

parents):

2271

"""Convert annotated content to plain content."""

2272

data, digest = self.source._data._parse_record(version_id, raw_data)

2273

if 'fulltext' in options:

2274

content = self.source.factory.parse_fulltext(data, version_id)

2275

lines = self.target.factory.lower_fulltext(content)

2276

else:

2277

delta = self.source.factory.parse_line_delta(data, version_id,

2278

plain=True)

2279

lines = self.target.factory.lower_line_delta(delta)

2280

return self.target._data._record_to_data(version_id, digest, lines)

2281

2282

2283

InterVersionedFile.register_optimiser(InterKnit)

2284

2285

2286

class WeaveToKnit(InterVersionedFile):

2287

"""Optimised code paths for weave to knit operations."""

2288

2289

_matching_file_from_factory = bzrlib.weave.WeaveFile

2290

_matching_file_to_factory = KnitVersionedFile

2291

2292

@staticmethod

2293

def is_compatible(source, target):

2294

"""Be compatible with weaves to knits."""

2295

try:

2296

return (isinstance(source, bzrlib.weave.Weave) and

2297

isinstance(target, KnitVersionedFile))

2298

except AttributeError:

2299

return False

2300

2301

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2302

"""See InterVersionedFile.join."""

2303

assert isinstance(self.source, bzrlib.weave.Weave)

2304

assert isinstance(self.target, KnitVersionedFile)

2305

2306

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2307

2308

if not version_ids:

2309

return 0

2310

2311

pb = ui.ui_factory.nested_progress_bar()

2312

try:

2313

version_ids = list(version_ids)

2314

2315

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2316

this_versions = set(self.target._index.get_versions())

2317

needed_versions = self.source_ancestry - this_versions

2318

2319

if not needed_versions:

2320

return 0

2321

full_list = topo_sort(self.source.get_graph())

2322

2323

version_list = [i for i in full_list if (not self.target.has_version(i)

2324

and i in needed_versions)]

2325

2326

# do the join:

2327

count = 0

2328

total = len(version_list)

2329

for version_id in version_list:

2330

pb.update("Converting to knit", count, total)

2331

parents = self.source.get_parents(version_id)

2332

# check that its will be a consistent copy:

2333

for parent in parents:

2334

# if source has the parent, we must already have it

2335

assert (self.target.has_version(parent))

2336

self.target.add_lines(

2337

version_id, parents, self.source.get_lines(version_id))

2338

count = count + 1

2339

return count

2340

finally:

2341

pb.finished()

2342

2343

2344

InterVersionedFile.register_optimiser(WeaveToKnit)

2345

2346

2347

# Deprecated, use PatienceSequenceMatcher instead

2348

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2349

2350

2351

def annotate_knit(knit, revision_id):

2352

"""Annotate a knit with no cached annotations.

2353

2354

This implementation is for knits with no cached annotations.

2355

It will work for knits with cached annotations, but this is not

2356

recommended.

2357

"""

2358

ancestry = knit.get_ancestry(revision_id)

2359

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2360

annotations = {}

2361

for candidate in ancestry:

2362

if candidate in annotations:

2363

continue

2364

parents = knit.get_parents(candidate)

2365

if len(parents) == 0:

2366

blocks = None

2367

elif knit._index.get_method(candidate) != 'line-delta':

2368

blocks = None

2369

else:

2370

parent, sha1, noeol, delta = knit.get_delta(candidate)

2371

blocks = KnitContent.get_line_delta_blocks(delta,

2372

fulltext[parents[0]], fulltext[candidate])

2373

annotations[candidate] = list(annotate.reannotate([annotations[p]

2374

for p in parents], fulltext[candidate], candidate, blocks))

2375

return iter(annotations[revision_id])

2376

2377

2378

try:

2379

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2380

except ImportError:

2381

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »