/brz/remove-bazaar : revision 2770.1.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Aaron Bentley
Date: 2007-08-31 19:38:52 UTC
mto: This revision was merged to the branch mainline in revision 2777.
Revision ID: abentley@panoramicfeedback.com-20070831193852-6ip22sbw058yib5u

Clean up docs, test matching blocks for reannotate

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

100

RevisionAlreadyPresent,

101

)

102

from bzrlib.tuned_gzip import GzipFile

103

from bzrlib.osutils import (

104

contains_whitespace,

105

contains_linebreaks,

106

sha_strings,

107

)

108

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

109

from bzrlib.tsort import topo_sort

110

import bzrlib.ui

111

import bzrlib.weave

112

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

113

114

115

# TODO: Split out code specific to this format into an associated object.

116

117

# TODO: Can we put in some kind of value to check that the index and data

118

# files belong together?

119

120

# TODO: accommodate binaries, perhaps by storing a byte count

121

122

# TODO: function to check whole file

123

124

# TODO: atomically append data, then measure backwards from the cursor

125

# position after writing to work out where it was located. we may need to

126

# bypass python file buffering.

127

128

DATA_SUFFIX = '.knit'

129

INDEX_SUFFIX = '.kndx'

130

131

132

class KnitContent(object):

133

"""Content of a knit version to which deltas can be applied."""

134

135

def __init__(self, lines):

136

self._lines = lines

137

138

def annotate_iter(self):

139

"""Yield tuples of (origin, text) for each content line."""

140

return iter(self._lines)

141

142

def annotate(self):

143

"""Return a list of (origin, text) tuples."""

144

return list(self.annotate_iter())

145

146

def line_delta_iter(self, new_lines):

147

"""Generate line-based delta from this content to new_lines."""

148

new_texts = new_lines.text()

149

old_texts = self.text()

150

s = KnitSequenceMatcher(None, old_texts, new_texts)

151

for tag, i1, i2, j1, j2 in s.get_opcodes():

152

if tag == 'equal':

153

continue

154

# ofrom, oto, length, data

155

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

156

157

def line_delta(self, new_lines):

158

return list(self.line_delta_iter(new_lines))

159

160

def text(self):

161

return [text for origin, text in self._lines]

162

163

def copy(self):

164

return KnitContent(self._lines[:])

165

166

@staticmethod

167

def get_line_delta_blocks(knit_delta, source, target):

168

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

169

target_len = len(target)

170

s_pos = 0

171

t_pos = 0

172

for s_begin, s_end, t_len, new_text in knit_delta:

173

true_n = s_begin - s_pos

174

n = true_n

175

if n > 0:

176

# knit deltas do not provide reliable info about whether the

177

# last line of a file matches, due to eol handling.

178

if source[s_pos + n -1] != target[t_pos + n -1]:

179

n-=1

180

if n > 0:

181

yield s_pos, t_pos, n

182

t_pos += t_len + true_n

183

s_pos = s_end

184

n = target_len - t_pos

185

if n > 0:

186

if source[s_pos + n -1] != target[t_pos + n -1]:

187

n-=1

188

if n > 0:

189

yield s_pos, t_pos, n

190

yield s_pos + (target_len - t_pos), target_len, 0

191

192

193

class _KnitFactory(object):

194

"""Base factory for creating content objects."""

195

196

def make(self, lines, version_id):

197

num_lines = len(lines)

198

return KnitContent(zip([version_id] * num_lines, lines))

199

200

201

class KnitAnnotateFactory(_KnitFactory):

202

"""Factory for creating annotated Content objects."""

203

204

annotated = True

205

206

def parse_fulltext(self, content, version_id):

207

"""Convert fulltext to internal representation

208

209

fulltext content is of the format

210

revid(utf8) plaintext\n

211

internal representation is of the format:

212

(revid, plaintext)

213

"""

214

# TODO: jam 20070209 The tests expect this to be returned as tuples,

215

# but the code itself doesn't really depend on that.

216

# Figure out a way to not require the overhead of turning the

217

# list back into tuples.

218

lines = [tuple(line.split(' ', 1)) for line in content]

219

return KnitContent(lines)

220

221

def parse_line_delta_iter(self, lines):

222

return iter(self.parse_line_delta(lines))

223

224

def parse_line_delta(self, lines, version_id):

225

"""Convert a line based delta into internal representation.

226

227

line delta is in the form of:

228

intstart intend intcount

229

1..count lines:

230

revid(utf8) newline\n

231

internal representation is

232

(start, end, count, [1..count tuples (revid, newline)])

233

"""

234

result = []

235

lines = iter(lines)

236

next = lines.next

237

238

cache = {}

239

def cache_and_return(line):

240

origin, text = line.split(' ', 1)

241

return cache.setdefault(origin, origin), text

242

243

# walk through the lines parsing.

244

for header in lines:

245

start, end, count = [int(n) for n in header.split(',')]

246

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

247

result.append((start, end, count, contents))

248

return result

249

250

def get_fulltext_content(self, lines):

251

"""Extract just the content lines from a fulltext."""

252

return (line.split(' ', 1)[1] for line in lines)

253

254

def get_linedelta_content(self, lines):

255

"""Extract just the content from a line delta.

256

257

This doesn't return all of the extra information stored in a delta.

258

Only the actual content lines.

259

"""

260

lines = iter(lines)

261

next = lines.next

262

for header in lines:

263

header = header.split(',')

264

count = int(header[2])

265

for i in xrange(count):

266

origin, text = next().split(' ', 1)

267

yield text

268

269

def lower_fulltext(self, content):

270

"""convert a fulltext content record into a serializable form.

271

272

see parse_fulltext which this inverts.

273

"""

274

# TODO: jam 20070209 We only do the caching thing to make sure that

275

# the origin is a valid utf-8 line, eventually we could remove it

276

return ['%s %s' % (o, t) for o, t in content._lines]

277

278

def lower_line_delta(self, delta):

279

"""convert a delta into a serializable form.

280

281

See parse_line_delta which this inverts.

282

"""

283

# TODO: jam 20070209 We only do the caching thing to make sure that

284

# the origin is a valid utf-8 line, eventually we could remove it

285

out = []

286

for start, end, c, lines in delta:

287

out.append('%d,%d,%d\n' % (start, end, c))

288

out.extend(origin + ' ' + text

289

for origin, text in lines)

290

return out

291

292

def annotate_iter(self, knit, version_id):

293

content = knit._get_content(version_id)

294

for origin, text in content.annotate_iter():

295

yield origin, text

296

297

def annotate_iter(self, knit, version_id):

298

return annotate_knit(knit, version_id)

299

300

301

class KnitPlainFactory(_KnitFactory):

302

"""Factory for creating plain Content objects."""

303

304

annotated = False

305

306

def parse_fulltext(self, content, version_id):

307

"""This parses an unannotated fulltext.

308

309

Note that this is not a noop - the internal representation

310

has (versionid, line) - its just a constant versionid.

311

"""

312

return self.make(content, version_id)

313

314

def parse_line_delta_iter(self, lines, version_id):

315

cur = 0

316

num_lines = len(lines)

317

while cur < num_lines:

318

header = lines[cur]

319

cur += 1

320

start, end, c = [int(n) for n in header.split(',')]

321

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

322

cur += c

323

324

def parse_line_delta(self, lines, version_id):

325

return list(self.parse_line_delta_iter(lines, version_id))

326

327

def get_fulltext_content(self, lines):

328

"""Extract just the content lines from a fulltext."""

329

return iter(lines)

330

331

def get_linedelta_content(self, lines):

332

"""Extract just the content from a line delta.

333

334

This doesn't return all of the extra information stored in a delta.

335

Only the actual content lines.

336

"""

337

lines = iter(lines)

338

next = lines.next

339

for header in lines:

340

header = header.split(',')

341

count = int(header[2])

342

for i in xrange(count):

343

yield next()

344

345

def lower_fulltext(self, content):

346

return content.text()

347

348

def lower_line_delta(self, delta):

349

out = []

350

for start, end, c, lines in delta:

351

out.append('%d,%d,%d\n' % (start, end, c))

352

out.extend([text for origin, text in lines])

353

return out

354

355

def annotate_iter(self, knit, version_id):

356

return annotate_knit(knit, version_id)

357

358

359

def make_empty_knit(transport, relpath):

360

"""Construct a empty knit at the specified location."""

361

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

362

363

364

class KnitVersionedFile(VersionedFile):

365

"""Weave-like structure with faster random access.

366

367

A knit stores a number of texts and a summary of the relationships

368

between them. Texts are identified by a string version-id. Texts

369

are normally stored and retrieved as a series of lines, but can

370

also be passed as single strings.

371

372

Lines are stored with the trailing newline (if any) included, to

373

avoid special cases for files with no final newline. Lines are

374

composed of 8-bit characters, not unicode. The combination of

375

these approaches should mean any 'binary' file can be safely

376

stored and retrieved.

377

"""

378

379

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

380

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

381

create=False, create_parent_dir=False, delay_create=False,

382

dir_mode=None, index=None, access_method=None):

383

"""Construct a knit at location specified by relpath.

384

385

:param create: If not True, only open an existing knit.

386

:param create_parent_dir: If True, create the parent directory if

387

creating the file fails. (This is used for stores with

388

hash-prefixes that may not exist yet)

389

:param delay_create: The calling code is aware that the knit won't

390

actually be created until the first data is stored.

391

:param index: An index to use for the knit.

392

"""

393

if deprecated_passed(basis_knit):

394

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

395

" deprecated as of bzr 0.9.",

396

DeprecationWarning, stacklevel=2)

397

if access_mode is None:

398

access_mode = 'w'

399

super(KnitVersionedFile, self).__init__(access_mode)

400

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

401

self.transport = transport

402

self.filename = relpath

403

self.factory = factory or KnitAnnotateFactory()

404

self.writable = (access_mode == 'w')

405

self.delta = delta

406

407

self._max_delta_chain = 200

408

409

if index is None:

410

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

411

access_mode, create=create, file_mode=file_mode,

412

create_parent_dir=create_parent_dir, delay_create=delay_create,

413

dir_mode=dir_mode)

414

else:

415

self._index = index

416

if access_method is None:

417

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

418

((create and not len(self)) and delay_create), create_parent_dir)

419

else:

420

_access = access_method

421

if create and not len(self) and not delay_create:

422

_access.create()

423

self._data = _KnitData(_access)

424

425

def __repr__(self):

426

return '%s(%s)' % (self.__class__.__name__,

427

self.transport.abspath(self.filename))

428

429

def _check_should_delta(self, first_parents):

430

"""Iterate back through the parent listing, looking for a fulltext.

431

432

This is used when we want to decide whether to add a delta or a new

433

fulltext. It searches for _max_delta_chain parents. When it finds a

434

fulltext parent, it sees if the total size of the deltas leading up to

435

it is large enough to indicate that we want a new full text anyway.

436

437

Return True if we should create a new delta, False if we should use a

438

full text.

439

"""

440

delta_size = 0

441

fulltext_size = None

442

delta_parents = first_parents

443

for count in xrange(self._max_delta_chain):

444

parent = delta_parents[0]

445

method = self._index.get_method(parent)

446

index, pos, size = self._index.get_position(parent)

447

if method == 'fulltext':

448

fulltext_size = size

449

break

450

delta_size += size

451

delta_parents = self._index.get_parents(parent)

452

else:

453

# We couldn't find a fulltext, so we must create a new one

454

return False

455

456

return fulltext_size > delta_size

457

458

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

459

"""See VersionedFile._add_delta()."""

460

self._check_add(version_id, []) # should we check the lines ?

461

self._check_versions_present(parents)

462

present_parents = []

463

ghosts = []

464

parent_texts = {}

465

for parent in parents:

466

if not self.has_version(parent):

467

ghosts.append(parent)

468

else:

469

present_parents.append(parent)

470

471

if delta_parent is None:

472

# reconstitute as full text.

473

assert len(delta) == 1 or len(delta) == 0

474

if len(delta):

475

assert delta[0][0] == 0

476

assert delta[0][1] == 0, delta[0][1]

477

return super(KnitVersionedFile, self)._add_delta(version_id,

478

parents,

479

delta_parent,

480

sha1,

481

noeol,

482

delta)

483

484

digest = sha1

485

486

options = []

487

if noeol:

488

options.append('no-eol')

489

490

if delta_parent is not None:

491

# determine the current delta chain length.

492

# To speed the extract of texts the delta chain is limited

493

# to a fixed number of deltas. This should minimize both

494

# I/O and the time spend applying deltas.

495

# The window was changed to a maximum of 200 deltas, but also added

496

# was a check that the total compressed size of the deltas is

497

# smaller than the compressed size of the fulltext.

498

if not self._check_should_delta([delta_parent]):

499

# We don't want a delta here, just do a normal insertion.

500

return super(KnitVersionedFile, self)._add_delta(version_id,

501

parents,

502

delta_parent,

503

sha1,

504

noeol,

505

delta)

506

507

options.append('line-delta')

508

store_lines = self.factory.lower_line_delta(delta)

509

510

access_memo = self._data.add_record(version_id, digest, store_lines)

511

self._index.add_version(version_id, options, access_memo, parents)

512

513

def _add_raw_records(self, records, data):

514

"""Add all the records 'records' with data pre-joined in 'data'.

515

516

:param records: A list of tuples(version_id, options, parents, size).

517

:param data: The data for the records. When it is written, the records

518

are adjusted to have pos pointing into data by the sum of

519

the preceding records sizes.

520

"""

521

# write all the data

522

raw_record_sizes = [record[3] for record in records]

523

positions = self._data.add_raw_records(raw_record_sizes, data)

524

offset = 0

525

index_entries = []

526

for (version_id, options, parents, size), access_memo in zip(

527

records, positions):

528

index_entries.append((version_id, options, access_memo, parents))

529

if self._data._do_cache:

530

self._data._cache[version_id] = data[offset:offset+size]

531

offset += size

532

self._index.add_versions(index_entries)

533

534

def enable_cache(self):

535

"""Start caching data for this knit"""

536

self._data.enable_cache()

537

538

def clear_cache(self):

539

"""Clear the data cache only."""

540

self._data.clear_cache()

541

542

def copy_to(self, name, transport):

543

"""See VersionedFile.copy_to()."""

544

# copy the current index to a temp index to avoid racing with local

545

# writes

546

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

547

self.transport.get(self._index._filename))

548

# copy the data file

549

f = self._data._open_file()

550

try:

551

transport.put_file(name + DATA_SUFFIX, f)

552

finally:

553

f.close()

554

# move the copied index into place

555

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

556

557

def create_empty(self, name, transport, mode=None):

558

return KnitVersionedFile(name, transport, factory=self.factory,

559

delta=self.delta, create=True)

560

561

def _fix_parents(self, version_id, new_parents):

562

"""Fix the parents list for version.

563

564

This is done by appending a new version to the index

565

with identical data except for the parents list.

566

the parents list must be a superset of the current

567

list.

568

"""

569

current_values = self._index._cache[version_id]

570

assert set(current_values[4]).difference(set(new_parents)) == set()

571

self._index.add_version(version_id,

572

current_values[1],

573

(None, current_values[2], current_values[3]),

574

new_parents)

575

576

def _extract_blocks(self, version_id, source, target):

577

if self._index.get_method(version_id) != 'line-delta':

578

return None

579

parent, sha1, noeol, delta = self.get_delta(version_id)

580

return KnitContent.get_line_delta_blocks(delta, source, target)

581

582

def get_delta(self, version_id):

583

"""Get a delta for constructing version from some other version."""

584

version_id = osutils.safe_revision_id(version_id)

585

self.check_not_reserved_id(version_id)

586

if not self.has_version(version_id):

587

raise RevisionNotPresent(version_id, self.filename)

588

589

parents = self.get_parents(version_id)

590

if len(parents):

591

parent = parents[0]

592

else:

593

parent = None

594

index_memo = self._index.get_position(version_id)

595

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

596

noeol = 'no-eol' in self._index.get_options(version_id)

597

if 'fulltext' == self._index.get_method(version_id):

598

new_content = self.factory.parse_fulltext(data, version_id)

599

if parent is not None:

600

reference_content = self._get_content(parent)

601

old_texts = reference_content.text()

602

else:

603

old_texts = []

604

new_texts = new_content.text()

605

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

606

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

607

else:

608

delta = self.factory.parse_line_delta(data, version_id)

609

return parent, sha1, noeol, delta

610

611

def get_graph_with_ghosts(self):

612

"""See VersionedFile.get_graph_with_ghosts()."""

613

graph_items = self._index.get_graph()

614

return dict(graph_items)

615

616

def get_sha1(self, version_id):

617

return self.get_sha1s([version_id])[0]

618

619

def get_sha1s(self, version_ids):

620

"""See VersionedFile.get_sha1()."""

621

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

622

record_map = self._get_record_map(version_ids)

623

# record entry 2 is the 'digest'.

624

return [record_map[v][2] for v in version_ids]

625

626

@staticmethod

627

def get_suffixes():

628

"""See VersionedFile.get_suffixes()."""

629

return [DATA_SUFFIX, INDEX_SUFFIX]

630

631

def has_ghost(self, version_id):

632

"""True if there is a ghost reference in the file to version_id."""

633

version_id = osutils.safe_revision_id(version_id)

634

# maybe we have it

635

if self.has_version(version_id):

636

return False

637

# optimisable if needed by memoising the _ghosts set.

638

items = self._index.get_graph()

639

for node, parents in items:

640

for parent in parents:

641

if parent not in self._index._cache:

642

if parent == version_id:

643

return True

644

return False

645

646

def versions(self):

647

"""See VersionedFile.versions."""

648

if 'evil' in debug.debug_flags:

649

trace.mutter_callsite(2, "versions scales with size of history")

650

return self._index.get_versions()

651

652

def has_version(self, version_id):

653

"""See VersionedFile.has_version."""

654

if 'evil' in debug.debug_flags:

655

trace.mutter_callsite(2, "has_version is a LBYL scenario")

656

version_id = osutils.safe_revision_id(version_id)

657

return self._index.has_version(version_id)

658

659

__contains__ = has_version

660

661

def _merge_annotations(self, content, parents, parent_texts={},

662

delta=None, annotated=None,

663

left_matching_blocks=None):

664

"""Merge annotations for content. This is done by comparing

665

the annotations based on changed to the text.

666

"""

667

if left_matching_blocks is not None:

668

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

669

else:

670

delta_seq = None

671

if annotated:

672

for parent_id in parents:

673

merge_content = self._get_content(parent_id, parent_texts)

674

if (parent_id == parents[0] and delta_seq is not None):

675

seq = delta_seq

676

else:

677

seq = patiencediff.PatienceSequenceMatcher(

678

None, merge_content.text(), content.text())

679

for i, j, n in seq.get_matching_blocks():

680

if n == 0:

681

continue

682

# this appears to copy (origin, text) pairs across to the

683

# new content for any line that matches the last-checked

684

# parent.

685

content._lines[j:j+n] = merge_content._lines[i:i+n]

686

if delta:

687

if delta_seq is None:

688

reference_content = self._get_content(parents[0], parent_texts)

689

new_texts = content.text()

690

old_texts = reference_content.text()

691

delta_seq = patiencediff.PatienceSequenceMatcher(

692

None, old_texts, new_texts)

693

return self._make_line_delta(delta_seq, content)

694

695

def _make_line_delta(self, delta_seq, new_content):

696

"""Generate a line delta from delta_seq and new_content."""

697

diff_hunks = []

698

for op in delta_seq.get_opcodes():

699

if op[0] == 'equal':

700

continue

701

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

702

return diff_hunks

703

704

def _get_components_positions(self, version_ids):

705

"""Produce a map of position data for the components of versions.

706

707

This data is intended to be used for retrieving the knit records.

708

709

A dict of version_id to (method, data_pos, data_size, next) is

710

returned.

711

method is the way referenced data should be applied.

712

data_pos is the position of the data in the knit.

713

data_size is the size of the data in the knit.

714

next is the build-parent of the version, or None for fulltexts.

715

"""

716

component_data = {}

717

for version_id in version_ids:

718

cursor = version_id

719

720

while cursor is not None and cursor not in component_data:

721

method = self._index.get_method(cursor)

722

if method == 'fulltext':

723

next = None

724

else:

725

next = self.get_parents(cursor)[0]

726

index_memo = self._index.get_position(cursor)

727

component_data[cursor] = (method, index_memo, next)

728

cursor = next

729

return component_data

730

731

def _get_content(self, version_id, parent_texts={}):

732

"""Returns a content object that makes up the specified

733

version."""

734

if not self.has_version(version_id):

735

raise RevisionNotPresent(version_id, self.filename)

736

737

cached_version = parent_texts.get(version_id, None)

738

if cached_version is not None:

739

return cached_version

740

741

text_map, contents_map = self._get_content_maps([version_id])

742

return contents_map[version_id]

743

744

def _check_versions_present(self, version_ids):

745

"""Check that all specified versions are present."""

746

self._index.check_versions_present(version_ids)

747

748

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

749

"""See VersionedFile.add_lines_with_ghosts()."""

750

self._check_add(version_id, lines)

751

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

752

753

def _add_lines(self, version_id, parents, lines, parent_texts,

754

left_matching_blocks=None):

755

"""See VersionedFile.add_lines."""

756

self._check_add(version_id, lines)

757

self._check_versions_present(parents)

758

return self._add(version_id, lines[:], parents, self.delta,

759

parent_texts, left_matching_blocks)

760

761

def _check_add(self, version_id, lines):

762

"""check that version_id and lines are safe to add."""

763

assert self.writable, "knit is not opened for write"

764

### FIXME escape. RBC 20060228

765

if contains_whitespace(version_id):

766

raise InvalidRevisionId(version_id, self.filename)

767

self.check_not_reserved_id(version_id)

768

if self.has_version(version_id):

769

raise RevisionAlreadyPresent(version_id, self.filename)

770

self._check_lines_not_unicode(lines)

771

self._check_lines_are_lines(lines)

772

773

def _add(self, version_id, lines, parents, delta, parent_texts,

774

left_matching_blocks=None):

775

"""Add a set of lines on top of version specified by parents.

776

777

If delta is true, compress the text as a line-delta against

778

the first parent.

779

780

Any versions not present will be converted into ghosts.

781

"""

782

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

783

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

784

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

785

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

786

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

787

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

788

# +1383 0 8.0370 8.0370 +<len>

789

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

790

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

791

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

792

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

793

794

present_parents = []

795

ghosts = []

796

if parent_texts is None:

797

parent_texts = {}

798

for parent in parents:

799

if not self.has_version(parent):

800

ghosts.append(parent)

801

else:

802

present_parents.append(parent)

803

804

if delta and not len(present_parents):

805

delta = False

806

807

digest = sha_strings(lines)

808

options = []

809

if lines:

810

if lines[-1][-1] != '\n':

811

options.append('no-eol')

812

lines[-1] = lines[-1] + '\n'

813

814

if len(present_parents) and delta:

815

# To speed the extract of texts the delta chain is limited

816

# to a fixed number of deltas. This should minimize both

817

# I/O and the time spend applying deltas.

818

delta = self._check_should_delta(present_parents)

819

820

assert isinstance(version_id, str)

821

lines = self.factory.make(lines, version_id)

822

if delta or (self.factory.annotated and len(present_parents) > 0):

823

# Merge annotations from parent texts if so is needed.

824

delta_hunks = self._merge_annotations(lines, present_parents,

825

parent_texts, delta, self.factory.annotated,

826

left_matching_blocks)

827

828

if delta:

829

options.append('line-delta')

830

store_lines = self.factory.lower_line_delta(delta_hunks)

831

else:

832

options.append('fulltext')

833

store_lines = self.factory.lower_fulltext(lines)

834

835

access_memo = self._data.add_record(version_id, digest, store_lines)

836

self._index.add_version(version_id, options, access_memo, parents)

837

return lines

838

839

def check(self, progress_bar=None):

840

"""See VersionedFile.check()."""

841

842

def _clone_text(self, new_version_id, old_version_id, parents):

843

"""See VersionedFile.clone_text()."""

844

# FIXME RBC 20060228 make fast by only inserting an index with null

845

# delta.

846

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

847

848

def get_lines(self, version_id):

849

"""See VersionedFile.get_lines()."""

850

return self.get_line_list([version_id])[0]

851

852

def _get_record_map(self, version_ids):

853

"""Produce a dictionary of knit records.

854

855

The keys are version_ids, the values are tuples of (method, content,

856

digest, next).

857

method is the way the content should be applied.

858

content is a KnitContent object.

859

digest is the SHA1 digest of this version id after all steps are done

860

next is the build-parent of the version, i.e. the leftmost ancestor.

861

If the method is fulltext, next will be None.

862

"""

863

position_map = self._get_components_positions(version_ids)

864

# c = component_id, m = method, i_m = index_memo, n = next

865

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

866

record_map = {}

867

for component_id, content, digest in \

868

self._data.read_records_iter(records):

869

method, index_memo, next = position_map[component_id]

870

record_map[component_id] = method, content, digest, next

871

872

return record_map

873

874

def get_text(self, version_id):

875

"""See VersionedFile.get_text"""

876

return self.get_texts([version_id])[0]

877

878

def get_texts(self, version_ids):

879

return [''.join(l) for l in self.get_line_list(version_ids)]

880

881

def get_line_list(self, version_ids):

882

"""Return the texts of listed versions as a list of strings."""

883

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

884

for version_id in version_ids:

885

self.check_not_reserved_id(version_id)

886

text_map, content_map = self._get_content_maps(version_ids)

887

return [text_map[v] for v in version_ids]

888

889

_get_lf_split_line_list = get_line_list

890

891

def _get_content_maps(self, version_ids):

892

"""Produce maps of text and KnitContents

893

894

:return: (text_map, content_map) where text_map contains the texts for

895

the requested versions and content_map contains the KnitContents.

896

Both dicts take version_ids as their keys.

897

"""

898

for version_id in version_ids:

899

if not self.has_version(version_id):

900

raise RevisionNotPresent(version_id, self.filename)

901

record_map = self._get_record_map(version_ids)

902

903

text_map = {}

904

content_map = {}

905

final_content = {}

906

for version_id in version_ids:

907

components = []

908

cursor = version_id

909

while cursor is not None:

910

method, data, digest, next = record_map[cursor]

911

components.append((cursor, method, data, digest))

912

if cursor in content_map:

913

break

914

cursor = next

915

916

content = None

917

for component_id, method, data, digest in reversed(components):

918

if component_id in content_map:

919

content = content_map[component_id]

920

else:

921

if method == 'fulltext':

922

assert content is None

923

content = self.factory.parse_fulltext(data, version_id)

924

elif method == 'line-delta':

925

delta = self.factory.parse_line_delta(data, version_id)

926

content = content.copy()

927

content._lines = self._apply_delta(content._lines,

928

delta)

929

content_map[component_id] = content

930

931

if 'no-eol' in self._index.get_options(version_id):

932

content = content.copy()

933

line = content._lines[-1][1].rstrip('\n')

934

content._lines[-1] = (content._lines[-1][0], line)

935

final_content[version_id] = content

936

937

# digest here is the digest from the last applied component.

938

text = content.text()

939

if sha_strings(text) != digest:

940

raise KnitCorrupt(self.filename,

941

'sha-1 does not match %s' % version_id)

942

943

text_map[version_id] = text

944

return text_map, final_content

945

946

def iter_lines_added_or_present_in_versions(self, version_ids=None,

947

pb=None):

948

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

949

if version_ids is None:

950

version_ids = self.versions()

951

else:

952

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

953

if pb is None:

954

pb = progress.DummyProgress()

955

# we don't care about inclusions, the caller cares.

956

# but we need to setup a list of records to visit.

957

# we need version_id, position, length

958

version_id_records = []

959

requested_versions = set(version_ids)

960

# filter for available versions

961

for version_id in requested_versions:

962

if not self.has_version(version_id):

963

raise RevisionNotPresent(version_id, self.filename)

964

# get a in-component-order queue:

965

for version_id in self.versions():

966

if version_id in requested_versions:

967

index_memo = self._index.get_position(version_id)

968

version_id_records.append((version_id, index_memo))

969

970

total = len(version_id_records)

971

for version_idx, (version_id, data, sha_value) in \

972

enumerate(self._data.read_records_iter(version_id_records)):

973

pb.update('Walking content.', version_idx, total)

974

method = self._index.get_method(version_id)

975

976

assert method in ('fulltext', 'line-delta')

977

if method == 'fulltext':

978

line_iterator = self.factory.get_fulltext_content(data)

979

else:

980

line_iterator = self.factory.get_linedelta_content(data)

981

for line in line_iterator:

982

yield line

983

984

pb.update('Walking content.', total, total)

985

986

def iter_parents(self, version_ids):

987

"""Iterate through the parents for many version ids.

988

989

:param version_ids: An iterable yielding version_ids.

990

:return: An iterator that yields (version_id, parents). Requested

991

version_ids not present in the versioned file are simply skipped.

992

The order is undefined, allowing for different optimisations in

993

the underlying implementation.

994

"""

995

version_ids = [osutils.safe_revision_id(version_id) for

996

version_id in version_ids]

997

return self._index.iter_parents(version_ids)

998

999

def num_versions(self):

1000

"""See VersionedFile.num_versions()."""

1001

return self._index.num_versions()

1002

1003

__len__ = num_versions

1004

1005

def annotate_iter(self, version_id):

1006

"""See VersionedFile.annotate_iter."""

1007

version_id = osutils.safe_revision_id(version_id)

1008

return self.factory.annotate_iter(self, version_id)

1009

1010

def get_parents(self, version_id):

1011

"""See VersionedFile.get_parents."""

1012

# perf notes:

1013

# optimism counts!

1014

# 52554 calls in 1264 872 internal down from 3674

1015

version_id = osutils.safe_revision_id(version_id)

1016

try:

1017

return self._index.get_parents(version_id)

1018

except KeyError:

1019

raise RevisionNotPresent(version_id, self.filename)

1020

1021

def get_parents_with_ghosts(self, version_id):

1022

"""See VersionedFile.get_parents."""

1023

version_id = osutils.safe_revision_id(version_id)

1024

try:

1025

return self._index.get_parents_with_ghosts(version_id)

1026

except KeyError:

1027

raise RevisionNotPresent(version_id, self.filename)

1028

1029

def get_ancestry(self, versions, topo_sorted=True):

1030

"""See VersionedFile.get_ancestry."""

1031

if isinstance(versions, basestring):

1032

versions = [versions]

1033

if not versions:

1034

return []

1035

versions = [osutils.safe_revision_id(v) for v in versions]

1036

return self._index.get_ancestry(versions, topo_sorted)

1037

1038

def get_ancestry_with_ghosts(self, versions):

1039

"""See VersionedFile.get_ancestry_with_ghosts."""

1040

if isinstance(versions, basestring):

1041

versions = [versions]

1042

if not versions:

1043

return []

1044

versions = [osutils.safe_revision_id(v) for v in versions]

1045

return self._index.get_ancestry_with_ghosts(versions)

1046

1047

def plan_merge(self, ver_a, ver_b):

1048

"""See VersionedFile.plan_merge."""

1049

ver_a = osutils.safe_revision_id(ver_a)

1050

ver_b = osutils.safe_revision_id(ver_b)

1051

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1052

1053

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1054

annotated_a = self.annotate(ver_a)

1055

annotated_b = self.annotate(ver_b)

1056

return merge._plan_annotate_merge(annotated_a, annotated_b,

1057

ancestors_a, ancestors_b)

1058

1059

1060

class _KnitComponentFile(object):

1061

"""One of the files used to implement a knit database"""

1062

1063

def __init__(self, transport, filename, mode, file_mode=None,

1064

create_parent_dir=False, dir_mode=None):

1065

self._transport = transport

1066

self._filename = filename

1067

self._mode = mode

1068

self._file_mode = file_mode

1069

self._dir_mode = dir_mode

1070

self._create_parent_dir = create_parent_dir

1071

self._need_to_create = False

1072

1073

def _full_path(self):

1074

"""Return the full path to this file."""

1075

return self._transport.base + self._filename

1076

1077

def check_header(self, fp):

1078

line = fp.readline()

1079

if line == '':

1080

# An empty file can actually be treated as though the file doesn't

1081

# exist yet.

1082

raise errors.NoSuchFile(self._full_path())

1083

if line != self.HEADER:

1084

raise KnitHeaderError(badline=line,

1085

filename=self._transport.abspath(self._filename))

1086

1087

def __repr__(self):

1088

return '%s(%s)' % (self.__class__.__name__, self._filename)

1089

1090

1091

class _KnitIndex(_KnitComponentFile):

1092

"""Manages knit index file.

1093

1094

The index is already kept in memory and read on startup, to enable

1095

fast lookups of revision information. The cursor of the index

1096

file is always pointing to the end, making it easy to append

1097

entries.

1098

1099

_cache is a cache for fast mapping from version id to a Index

1100

object.

1101

1102

_history is a cache for fast mapping from indexes to version ids.

1103

1104

The index data format is dictionary compressed when it comes to

1105

parent references; a index entry may only have parents that with a

1106

lover index number. As a result, the index is topological sorted.

1107

1108

Duplicate entries may be written to the index for a single version id

1109

if this is done then the latter one completely replaces the former:

1110

this allows updates to correct version and parent information.

1111

Note that the two entries may share the delta, and that successive

1112

annotations and references MUST point to the first entry.

1113

1114

The index file on disc contains a header, followed by one line per knit

1115

record. The same revision can be present in an index file more than once.

1116

The first occurrence gets assigned a sequence number starting from 0.

1117

1118

The format of a single line is

1119

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1120

REVISION_ID is a utf8-encoded revision id

1121

FLAGS is a comma separated list of flags about the record. Values include

1122

no-eol, line-delta, fulltext.

1123

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1124

that the the compressed data starts at.

1125

LENGTH is the ascii representation of the length of the data file.

1126

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1127

REVISION_ID.

1128

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1129

revision id already in the knit that is a parent of REVISION_ID.

1130

The ' :' marker is the end of record marker.

1131

1132

partial writes:

1133

when a write is interrupted to the index file, it will result in a line

1134

that does not end in ' :'. If the ' :' is not present at the end of a line,

1135

or at the end of the file, then the record that is missing it will be

1136

ignored by the parser.

1137

1138

When writing new records to the index file, the data is preceded by '\n'

1139

to ensure that records always start on new lines even if the last write was

1140

interrupted. As a result its normal for the last line in the index to be

1141

missing a trailing newline. One can be added with no harmful effects.

1142

"""

1143

1144

HEADER = "# bzr knit index 8\n"

1145

1146

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1147

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1148

1149

def _cache_version(self, version_id, options, pos, size, parents):

1150

"""Cache a version record in the history array and index cache.

1151

1152

This is inlined into _load_data for performance. KEEP IN SYNC.

1153

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1154

indexes).

1155

"""

1156

# only want the _history index to reference the 1st index entry

1157

# for version_id

1158

if version_id not in self._cache:

1159

index = len(self._history)

1160

self._history.append(version_id)

1161

else:

1162

index = self._cache[version_id][5]

1163

self._cache[version_id] = (version_id,

1164

options,

1165

pos,

1166

size,

1167

parents,

1168

index)

1169

1170

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1171

create_parent_dir=False, delay_create=False, dir_mode=None):

1172

_KnitComponentFile.__init__(self, transport, filename, mode,

1173

file_mode=file_mode,

1174

create_parent_dir=create_parent_dir,

1175

dir_mode=dir_mode)

1176

self._cache = {}

1177

# position in _history is the 'official' index for a revision

1178

# but the values may have come from a newer entry.

1179

# so - wc -l of a knit index is != the number of unique names

1180

# in the knit.

1181

self._history = []

1182

try:

1183

fp = self._transport.get(self._filename)

1184

try:

1185

# _load_data may raise NoSuchFile if the target knit is

1186

# completely empty.

1187

_load_data(self, fp)

1188

finally:

1189

fp.close()

1190

except NoSuchFile:

1191

if mode != 'w' or not create:

1192

raise

1193

elif delay_create:

1194

self._need_to_create = True

1195

else:

1196

self._transport.put_bytes_non_atomic(

1197

self._filename, self.HEADER, mode=self._file_mode)

1198

1199

def get_graph(self):

1200

"""Return a list of the node:parents lists from this knit index."""

1201

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1202

1203

def get_ancestry(self, versions, topo_sorted=True):

1204

"""See VersionedFile.get_ancestry."""

1205

# get a graph of all the mentioned versions:

1206

graph = {}

1207

pending = set(versions)

1208

cache = self._cache

1209

while pending:

1210

version = pending.pop()

1211

# trim ghosts

1212

try:

1213

parents = [p for p in cache[version][4] if p in cache]

1214

except KeyError:

1215

raise RevisionNotPresent(version, self._filename)

1216

# if not completed and not a ghost

1217

pending.update([p for p in parents if p not in graph])

1218

graph[version] = parents

1219

if not topo_sorted:

1220

return graph.keys()

1221

return topo_sort(graph.items())

1222

1223

def get_ancestry_with_ghosts(self, versions):

1224

"""See VersionedFile.get_ancestry_with_ghosts."""

1225

# get a graph of all the mentioned versions:

1226

self.check_versions_present(versions)

1227

cache = self._cache

1228

graph = {}

1229

pending = set(versions)

1230

while pending:

1231

version = pending.pop()

1232

try:

1233

parents = cache[version][4]

1234

except KeyError:

1235

# ghost, fake it

1236

graph[version] = []

1237

else:

1238

# if not completed

1239

pending.update([p for p in parents if p not in graph])

1240

graph[version] = parents

1241

return topo_sort(graph.items())

1242

1243

def iter_parents(self, version_ids):

1244

"""Iterate through the parents for many version ids.

1245

1246

:param version_ids: An iterable yielding version_ids.

1247

:return: An iterator that yields (version_id, parents). Requested

1248

version_ids not present in the versioned file are simply skipped.

1249

The order is undefined, allowing for different optimisations in

1250

the underlying implementation.

1251

"""

1252

for version_id in version_ids:

1253

try:

1254

yield version_id, tuple(self.get_parents(version_id))

1255

except KeyError:

1256

pass

1257

1258

def num_versions(self):

1259

return len(self._history)

1260

1261

__len__ = num_versions

1262

1263

def get_versions(self):

1264

"""Get all the versions in the file. not topologically sorted."""

1265

return self._history

1266

1267

def _version_list_to_index(self, versions):

1268

result_list = []

1269

cache = self._cache

1270

for version in versions:

1271

if version in cache:

1272

# -- inlined lookup() --

1273

result_list.append(str(cache[version][5]))

1274

# -- end lookup () --

1275

else:

1276

result_list.append('.' + version)

1277

return ' '.join(result_list)

1278

1279

def add_version(self, version_id, options, index_memo, parents):

1280

"""Add a version record to the index."""

1281

self.add_versions(((version_id, options, index_memo, parents),))

1282

1283

def add_versions(self, versions):

1284

"""Add multiple versions to the index.

1285

1286

:param versions: a list of tuples:

1287

(version_id, options, pos, size, parents).

1288

"""

1289

lines = []

1290

orig_history = self._history[:]

1291

orig_cache = self._cache.copy()

1292

1293

try:

1294

for version_id, options, (index, pos, size), parents in versions:

1295

line = "\n%s %s %s %s %s :" % (version_id,

1296

','.join(options),

1297

pos,

1298

size,

1299

self._version_list_to_index(parents))

1300

assert isinstance(line, str), \

1301

'content must be utf-8 encoded: %r' % (line,)

1302

lines.append(line)

1303

self._cache_version(version_id, options, pos, size, parents)

1304

if not self._need_to_create:

1305

self._transport.append_bytes(self._filename, ''.join(lines))

1306

else:

1307

sio = StringIO()

1308

sio.write(self.HEADER)

1309

sio.writelines(lines)

1310

sio.seek(0)

1311

self._transport.put_file_non_atomic(self._filename, sio,

1312

create_parent_dir=self._create_parent_dir,

1313

mode=self._file_mode,

1314

dir_mode=self._dir_mode)

1315

self._need_to_create = False

1316

except:

1317

# If any problems happen, restore the original values and re-raise

1318

self._history = orig_history

1319

self._cache = orig_cache

1320

raise

1321

1322

def has_version(self, version_id):

1323

"""True if the version is in the index."""

1324

return version_id in self._cache

1325

1326

def get_position(self, version_id):

1327

"""Return details needed to access the version.

1328

1329

.kndx indices do not support split-out data, so return None for the

1330

index field.

1331

1332

:return: a tuple (None, data position, size) to hand to the access

1333

logic to get the record.

1334

"""

1335

entry = self._cache[version_id]

1336

return None, entry[2], entry[3]

1337

1338

def get_method(self, version_id):

1339

"""Return compression method of specified version."""

1340

options = self._cache[version_id][1]

1341

if 'fulltext' in options:

1342

return 'fulltext'

1343

else:

1344

if 'line-delta' not in options:

1345

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1346

return 'line-delta'

1347

1348

def get_options(self, version_id):

1349

"""Return a string represention options.

1350

1351

e.g. foo,bar

1352

"""

1353

return self._cache[version_id][1]

1354

1355

def get_parents(self, version_id):

1356

"""Return parents of specified version ignoring ghosts."""

1357

return [parent for parent in self._cache[version_id][4]

1358

if parent in self._cache]

1359

1360

def get_parents_with_ghosts(self, version_id):

1361

"""Return parents of specified version with ghosts."""

1362

return self._cache[version_id][4]

1363

1364

def check_versions_present(self, version_ids):

1365

"""Check that all specified versions are present."""

1366

cache = self._cache

1367

for version_id in version_ids:

1368

if version_id not in cache:

1369

raise RevisionNotPresent(version_id, self._filename)

1370

1371

1372

class KnitGraphIndex(object):

1373

"""A knit index that builds on GraphIndex."""

1374

1375

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1376

"""Construct a KnitGraphIndex on a graph_index.

1377

1378

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1379

:param deltas: Allow delta-compressed records.

1380

:param add_callback: If not None, allow additions to the index and call

1381

this callback with a list of added GraphIndex nodes:

1382

[(node, value, node_refs), ...]

1383

:param parents: If True, record knits parents, if not do not record

1384

parents.

1385

"""

1386

self._graph_index = graph_index

1387

self._deltas = deltas

1388

self._add_callback = add_callback

1389

self._parents = parents

1390

if deltas and not parents:

1391

raise KnitCorrupt(self, "Cannot do delta compression without "

1392

"parent tracking.")

1393

1394

def _get_entries(self, keys, check_present=False):

1395

"""Get the entries for keys.

1396

1397

:param keys: An iterable of index keys, - 1-tuples.

1398

"""

1399

keys = set(keys)

1400

found_keys = set()

1401

if self._parents:

1402

for node in self._graph_index.iter_entries(keys):

1403

yield node

1404

found_keys.add(node[1])

1405

else:

1406

# adapt parentless index to the rest of the code.

1407

for node in self._graph_index.iter_entries(keys):

1408

yield node[0], node[1], node[2], ()

1409

found_keys.add(node[1])

1410

if check_present:

1411

missing_keys = keys.difference(found_keys)

1412

if missing_keys:

1413

raise RevisionNotPresent(missing_keys.pop(), self)

1414

1415

def _present_keys(self, version_ids):

1416

return set([

1417

node[1] for node in self._get_entries(version_ids)])

1418

1419

def _parentless_ancestry(self, versions):

1420

"""Honour the get_ancestry API for parentless knit indices."""

1421

wanted_keys = self._version_ids_to_keys(versions)

1422

present_keys = self._present_keys(wanted_keys)

1423

missing = set(wanted_keys).difference(present_keys)

1424

if missing:

1425

raise RevisionNotPresent(missing.pop(), self)

1426

return list(self._keys_to_version_ids(present_keys))

1427

1428

def get_ancestry(self, versions, topo_sorted=True):

1429

"""See VersionedFile.get_ancestry."""

1430

if not self._parents:

1431

return self._parentless_ancestry(versions)

1432

# XXX: This will do len(history) index calls - perhaps

1433

# it should be altered to be a index core feature?

1434

# get a graph of all the mentioned versions:

1435

graph = {}

1436

ghosts = set()

1437

versions = self._version_ids_to_keys(versions)

1438

pending = set(versions)

1439

while pending:

1440

# get all pending nodes

1441

this_iteration = pending

1442

new_nodes = self._get_entries(this_iteration)

1443

found = set()

1444

pending = set()

1445

for (index, key, value, node_refs) in new_nodes:

1446

# dont ask for ghosties - otherwise

1447

# we we can end up looping with pending

1448

# being entirely ghosted.

1449

graph[key] = [parent for parent in node_refs[0]

1450

if parent not in ghosts]

1451

# queue parents

1452

for parent in graph[key]:

1453

# dont examine known nodes again

1454

if parent in graph:

1455

continue

1456

pending.add(parent)

1457

found.add(key)

1458

ghosts.update(this_iteration.difference(found))

1459

if versions.difference(graph):

1460

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1461

if topo_sorted:

1462

result_keys = topo_sort(graph.items())

1463

else:

1464

result_keys = graph.iterkeys()

1465

return [key[0] for key in result_keys]

1466

1467

def get_ancestry_with_ghosts(self, versions):

1468

"""See VersionedFile.get_ancestry."""

1469

if not self._parents:

1470

return self._parentless_ancestry(versions)

1471

# XXX: This will do len(history) index calls - perhaps

1472

# it should be altered to be a index core feature?

1473

# get a graph of all the mentioned versions:

1474

graph = {}

1475

versions = self._version_ids_to_keys(versions)

1476

pending = set(versions)

1477

while pending:

1478

# get all pending nodes

1479

this_iteration = pending

1480

new_nodes = self._get_entries(this_iteration)

1481

pending = set()

1482

for (index, key, value, node_refs) in new_nodes:

1483

graph[key] = node_refs[0]

1484

# queue parents

1485

for parent in graph[key]:

1486

# dont examine known nodes again

1487

if parent in graph:

1488

continue

1489

pending.add(parent)

1490

missing_versions = this_iteration.difference(graph)

1491

missing_needed = versions.intersection(missing_versions)

1492

if missing_needed:

1493

raise RevisionNotPresent(missing_needed.pop(), self)

1494

for missing_version in missing_versions:

1495

# add a key, no parents

1496

graph[missing_version] = []

1497

pending.discard(missing_version) # don't look for it

1498

result_keys = topo_sort(graph.items())

1499

return [key[0] for key in result_keys]

1500

1501

def get_graph(self):

1502

"""Return a list of the node:parents lists from this knit index."""

1503

if not self._parents:

1504

return [(key, ()) for key in self.get_versions()]

1505

result = []

1506

for index, key, value, refs in self._graph_index.iter_all_entries():

1507

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1508

return result

1509

1510

def iter_parents(self, version_ids):

1511

"""Iterate through the parents for many version ids.

1512

1513

:param version_ids: An iterable yielding version_ids.

1514

:return: An iterator that yields (version_id, parents). Requested

1515

version_ids not present in the versioned file are simply skipped.

1516

The order is undefined, allowing for different optimisations in

1517

the underlying implementation.

1518

"""

1519

if self._parents:

1520

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1521

all_parents = set()

1522

present_parents = set()

1523

for node in all_nodes:

1524

all_parents.update(node[3][0])

1525

# any node we are querying must be present

1526

present_parents.add(node[1])

1527

unknown_parents = all_parents.difference(present_parents)

1528

present_parents.update(self._present_keys(unknown_parents))

1529

for node in all_nodes:

1530

parents = []

1531

for parent in node[3][0]:

1532

if parent in present_parents:

1533

parents.append(parent[0])

1534

yield node[1][0], tuple(parents)

1535

else:

1536

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1537

yield node[1][0], ()

1538

1539

def num_versions(self):

1540

return len(list(self._graph_index.iter_all_entries()))

1541

1542

__len__ = num_versions

1543

1544

def get_versions(self):

1545

"""Get all the versions in the file. not topologically sorted."""

1546

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1547

1548

def has_version(self, version_id):

1549

"""True if the version is in the index."""

1550

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1551

1552

def _keys_to_version_ids(self, keys):

1553

return tuple(key[0] for key in keys)

1554

1555

def get_position(self, version_id):

1556

"""Return details needed to access the version.

1557

1558

:return: a tuple (index, data position, size) to hand to the access

1559

logic to get the record.

1560

"""

1561

node = self._get_node(version_id)

1562

bits = node[2][1:].split(' ')

1563

return node[0], int(bits[0]), int(bits[1])

1564

1565

def get_method(self, version_id):

1566

"""Return compression method of specified version."""

1567

if not self._deltas:

1568

return 'fulltext'

1569

return self._parent_compression(self._get_node(version_id)[3][1])

1570

1571

def _parent_compression(self, reference_list):

1572

# use the second reference list to decide if this is delta'd or not.

1573

if len(reference_list):

1574

return 'line-delta'

1575

else:

1576

return 'fulltext'

1577

1578

def _get_node(self, version_id):

1579

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1580

1581

def get_options(self, version_id):

1582

"""Return a string represention options.

1583

1584

e.g. foo,bar

1585

"""

1586

node = self._get_node(version_id)

1587

if not self._deltas:

1588

options = ['fulltext']

1589

else:

1590

options = [self._parent_compression(node[3][1])]

1591

if node[2][0] == 'N':

1592

options.append('no-eol')

1593

return options

1594

1595

def get_parents(self, version_id):

1596

"""Return parents of specified version ignoring ghosts."""

1597

parents = list(self.iter_parents([version_id]))

1598

if not parents:

1599

# missing key

1600

raise errors.RevisionNotPresent(version_id, self)

1601

return parents[0][1]

1602

1603

def get_parents_with_ghosts(self, version_id):

1604

"""Return parents of specified version with ghosts."""

1605

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1606

check_present=True))

1607

if not self._parents:

1608

return ()

1609

return self._keys_to_version_ids(nodes[0][3][0])

1610

1611

def check_versions_present(self, version_ids):

1612

"""Check that all specified versions are present."""

1613

keys = self._version_ids_to_keys(version_ids)

1614

present = self._present_keys(keys)

1615

missing = keys.difference(present)

1616

if missing:

1617

raise RevisionNotPresent(missing.pop(), self)

1618

1619

def add_version(self, version_id, options, access_memo, parents):

1620

"""Add a version record to the index."""

1621

return self.add_versions(((version_id, options, access_memo, parents),))

1622

1623

def add_versions(self, versions):

1624

"""Add multiple versions to the index.

1625

1626

This function does not insert data into the Immutable GraphIndex

1627

backing the KnitGraphIndex, instead it prepares data for insertion by

1628

the caller and checks that it is safe to insert then calls

1629

self._add_callback with the prepared GraphIndex nodes.

1630

1631

:param versions: a list of tuples:

1632

(version_id, options, pos, size, parents).

1633

"""

1634

if not self._add_callback:

1635

raise errors.ReadOnlyError(self)

1636

# we hope there are no repositories with inconsistent parentage

1637

# anymore.

1638

# check for dups

1639

1640

keys = {}

1641

for (version_id, options, access_memo, parents) in versions:

1642

index, pos, size = access_memo

1643

key = (version_id, )

1644

parents = tuple((parent, ) for parent in parents)

1645

if 'no-eol' in options:

1646

value = 'N'

1647

else:

1648

value = ' '

1649

value += "%d %d" % (pos, size)

1650

if not self._deltas:

1651

if 'line-delta' in options:

1652

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1653

if self._parents:

1654

if self._deltas:

1655

if 'line-delta' in options:

1656

node_refs = (parents, (parents[0],))

1657

else:

1658

node_refs = (parents, ())

1659

else:

1660

node_refs = (parents, )

1661

else:

1662

if parents:

1663

raise KnitCorrupt(self, "attempt to add node with parents "

1664

"in parentless index.")

1665

node_refs = ()

1666

keys[key] = (value, node_refs)

1667

present_nodes = self._get_entries(keys)

1668

for (index, key, value, node_refs) in present_nodes:

1669

if (value, node_refs) != keys[key]:

1670

raise KnitCorrupt(self, "inconsistent details in add_versions"

1671

": %s %s" % ((value, node_refs), keys[key]))

1672

del keys[key]

1673

result = []

1674

if self._parents:

1675

for key, (value, node_refs) in keys.iteritems():

1676

result.append((key, value, node_refs))

1677

else:

1678

for key, (value, node_refs) in keys.iteritems():

1679

result.append((key, value))

1680

self._add_callback(result)

1681

1682

def _version_ids_to_keys(self, version_ids):

1683

return set((version_id, ) for version_id in version_ids)

1684

1685

1686

class _KnitAccess(object):

1687

"""Access to knit records in a .knit file."""

1688

1689

def __init__(self, transport, filename, _file_mode, _dir_mode,

1690

_need_to_create, _create_parent_dir):

1691

"""Create a _KnitAccess for accessing and inserting data.

1692

1693

:param transport: The transport the .knit is located on.

1694

:param filename: The filename of the .knit.

1695

"""

1696

self._transport = transport

1697

self._filename = filename

1698

self._file_mode = _file_mode

1699

self._dir_mode = _dir_mode

1700

self._need_to_create = _need_to_create

1701

self._create_parent_dir = _create_parent_dir

1702

1703

def add_raw_records(self, sizes, raw_data):

1704

"""Add raw knit bytes to a storage area.

1705

1706

The data is spooled to whereever the access method is storing data.

1707

1708

:param sizes: An iterable containing the size of each raw data segment.

1709

:param raw_data: A bytestring containing the data.

1710

:return: A list of memos to retrieve the record later. Each memo is a

1711

tuple - (index, pos, length), where the index field is always None

1712

for the .knit access method.

1713

"""

1714

assert type(raw_data) == str, \

1715

'data must be plain bytes was %s' % type(raw_data)

1716

if not self._need_to_create:

1717

base = self._transport.append_bytes(self._filename, raw_data)

1718

else:

1719

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1720

create_parent_dir=self._create_parent_dir,

1721

mode=self._file_mode,

1722

dir_mode=self._dir_mode)

1723

self._need_to_create = False

1724

base = 0

1725

result = []

1726

for size in sizes:

1727

result.append((None, base, size))

1728

base += size

1729

return result

1730

1731

def create(self):

1732

"""IFF this data access has its own storage area, initialise it.

1733

1734

:return: None.

1735

"""

1736

self._transport.put_bytes_non_atomic(self._filename, '',

1737

mode=self._file_mode)

1738

1739

def open_file(self):

1740

"""IFF this data access can be represented as a single file, open it.

1741

1742

For knits that are not mapped to a single file on disk this will

1743

always return None.

1744

1745

:return: None or a file handle.

1746

"""

1747

try:

1748

return self._transport.get(self._filename)

1749

except NoSuchFile:

1750

pass

1751

return None

1752

1753

def get_raw_records(self, memos_for_retrieval):

1754

"""Get the raw bytes for a records.

1755

1756

:param memos_for_retrieval: An iterable containing the (index, pos,

1757

length) memo for retrieving the bytes. The .knit method ignores

1758

the index as there is always only a single file.

1759

:return: An iterator over the bytes of the records.

1760

"""

1761

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1762

for pos, data in self._transport.readv(self._filename, read_vector):

1763

yield data

1764

1765

1766

class _PackAccess(object):

1767

"""Access to knit records via a collection of packs."""

1768

1769

def __init__(self, index_to_packs, writer=None):

1770

"""Create a _PackAccess object.

1771

1772

:param index_to_packs: A dict mapping index objects to the transport

1773

and file names for obtaining data.

1774

:param writer: A tuple (pack.ContainerWriter, write_index) which

1775

contains the pack to write, and the index that reads from it will

1776

be associated with.

1777

"""

1778

if writer:

1779

self.container_writer = writer[0]

1780

self.write_index = writer[1]

1781

else:

1782

self.container_writer = None

1783

self.write_index = None

1784

self.indices = index_to_packs

1785

1786

def add_raw_records(self, sizes, raw_data):

1787

"""Add raw knit bytes to a storage area.

1788

1789

The data is spooled to the container writer in one bytes-record per

1790

raw data item.

1791

1792

:param sizes: An iterable containing the size of each raw data segment.

1793

:param raw_data: A bytestring containing the data.

1794

:return: A list of memos to retrieve the record later. Each memo is a

1795

tuple - (index, pos, length), where the index field is the

1796

write_index object supplied to the PackAccess object.

1797

"""

1798

assert type(raw_data) == str, \

1799

'data must be plain bytes was %s' % type(raw_data)

1800

result = []

1801

offset = 0

1802

for size in sizes:

1803

p_offset, p_length = self.container_writer.add_bytes_record(

1804

raw_data[offset:offset+size], [])

1805

offset += size

1806

result.append((self.write_index, p_offset, p_length))

1807

return result

1808

1809

def create(self):

1810

"""Pack based knits do not get individually created."""

1811

1812

def get_raw_records(self, memos_for_retrieval):

1813

"""Get the raw bytes for a records.

1814

1815

:param memos_for_retrieval: An iterable containing the (index, pos,

1816

length) memo for retrieving the bytes. The Pack access method

1817

looks up the pack to use for a given record in its index_to_pack

1818

map.

1819

:return: An iterator over the bytes of the records.

1820

"""

1821

# first pass, group into same-index requests

1822

request_lists = []

1823

current_index = None

1824

for (index, offset, length) in memos_for_retrieval:

1825

if current_index == index:

1826

current_list.append((offset, length))

1827

else:

1828

if current_index is not None:

1829

request_lists.append((current_index, current_list))

1830

current_index = index

1831

current_list = [(offset, length)]

1832

# handle the last entry

1833

if current_index is not None:

1834

request_lists.append((current_index, current_list))

1835

for index, offsets in request_lists:

1836

transport, path = self.indices[index]

1837

reader = pack.make_readv_reader(transport, path, offsets)

1838

for names, read_func in reader.iter_records():

1839

yield read_func(None)

1840

1841

def open_file(self):

1842

"""Pack based knits have no single file."""

1843

return None

1844

1845

def set_writer(self, writer, index, (transport, packname)):

1846

"""Set a writer to use for adding data."""

1847

self.indices[index] = (transport, packname)

1848

self.container_writer = writer

1849

self.write_index = index

1850

1851

1852

class _KnitData(object):

1853

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1854

1855

The KnitData class provides the logic for parsing and using knit records,

1856

making use of an access method for the low level read and write operations.

1857

"""

1858

1859

def __init__(self, access):

1860

"""Create a KnitData object.

1861

1862

:param access: The access method to use. Access methods such as

1863

_KnitAccess manage the insertion of raw records and the subsequent

1864

retrieval of the same.

1865

"""

1866

self._access = access

1867

self._checked = False

1868

# TODO: jam 20060713 conceptually, this could spill to disk

1869

# if the cached size gets larger than a certain amount

1870

# but it complicates the model a bit, so for now just use

1871

# a simple dictionary

1872

self._cache = {}

1873

self._do_cache = False

1874

1875

def enable_cache(self):

1876

"""Enable caching of reads."""

1877

self._do_cache = True

1878

1879

def clear_cache(self):

1880

"""Clear the record cache."""

1881

self._do_cache = False

1882

self._cache = {}

1883

1884

def _open_file(self):

1885

return self._access.open_file()

1886

1887

def _record_to_data(self, version_id, digest, lines):

1888

"""Convert version_id, digest, lines into a raw data block.

1889

1890

:return: (len, a StringIO instance with the raw data ready to read.)

1891

"""

1892

sio = StringIO()

1893

data_file = GzipFile(None, mode='wb', fileobj=sio)

1894

1895

assert isinstance(version_id, str)

1896

data_file.writelines(chain(

1897

["version %s %d %s\n" % (version_id,

1898

len(lines),

1899

digest)],

1900

lines,

1901

["end %s\n" % version_id]))

1902

data_file.close()

1903

length= sio.tell()

1904

1905

sio.seek(0)

1906

return length, sio

1907

1908

def add_raw_records(self, sizes, raw_data):

1909

"""Append a prepared record to the data file.

1910

1911

:param sizes: An iterable containing the size of each raw data segment.

1912

:param raw_data: A bytestring containing the data.

1913

:return: a list of index data for the way the data was stored.

1914

See the access method add_raw_records documentation for more

1915

details.

1916

"""

1917

return self._access.add_raw_records(sizes, raw_data)

1918

1919

def add_record(self, version_id, digest, lines):

1920

"""Write new text record to disk.

1921

1922

Returns index data for retrieving it later, as per add_raw_records.

1923

"""

1924

size, sio = self._record_to_data(version_id, digest, lines)

1925

result = self.add_raw_records([size], sio.getvalue())

1926

if self._do_cache:

1927

self._cache[version_id] = sio.getvalue()

1928

return result[0]

1929

1930

def _parse_record_header(self, version_id, raw_data):

1931

"""Parse a record header for consistency.

1932

1933

:return: the header and the decompressor stream.

1934

as (stream, header_record)

1935

"""

1936

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1937

try:

1938

rec = self._check_header(version_id, df.readline())

1939

except Exception, e:

1940

raise KnitCorrupt(self._access,

1941

"While reading {%s} got %s(%s)"

1942

% (version_id, e.__class__.__name__, str(e)))

1943

return df, rec

1944

1945

def _check_header(self, version_id, line):

1946

rec = line.split()

1947

if len(rec) != 4:

1948

raise KnitCorrupt(self._access,

1949

'unexpected number of elements in record header')

1950

if rec[1] != version_id:

1951

raise KnitCorrupt(self._access,

1952

'unexpected version, wanted %r, got %r'

1953

% (version_id, rec[1]))

1954

return rec

1955

1956

def _parse_record(self, version_id, data):

1957

# profiling notes:

1958

# 4168 calls in 2880 217 internal

1959

# 4168 calls to _parse_record_header in 2121

1960

# 4168 calls to readlines in 330

1961

df = GzipFile(mode='rb', fileobj=StringIO(data))

1962

1963

try:

1964

record_contents = df.readlines()

1965

except Exception, e:

1966

raise KnitCorrupt(self._access,

1967

"While reading {%s} got %s(%s)"

1968

% (version_id, e.__class__.__name__, str(e)))

1969

header = record_contents.pop(0)

1970

rec = self._check_header(version_id, header)

1971

1972

last_line = record_contents.pop()

1973

if len(record_contents) != int(rec[2]):

1974

raise KnitCorrupt(self._access,

1975

'incorrect number of lines %s != %s'

1976

' for version {%s}'

1977

% (len(record_contents), int(rec[2]),

1978

version_id))

1979

if last_line != 'end %s\n' % rec[1]:

1980

raise KnitCorrupt(self._access,

1981

'unexpected version end line %r, wanted %r'

1982

% (last_line, version_id))

1983

df.close()

1984

return record_contents, rec[3]

1985

1986

def read_records_iter_raw(self, records):

1987

"""Read text records from data file and yield raw data.

1988

1989

This unpacks enough of the text record to validate the id is

1990

as expected but thats all.

1991

"""

1992

# setup an iterator of the external records:

1993

# uses readv so nice and fast we hope.

1994

if len(records):

1995

# grab the disk data needed.

1996

if self._cache:

1997

# Don't check _cache if it is empty

1998

needed_offsets = [index_memo for version_id, index_memo

1999

in records

2000

if version_id not in self._cache]

2001

else:

2002

needed_offsets = [index_memo for version_id, index_memo

2003

in records]

2004

2005

raw_records = self._access.get_raw_records(needed_offsets)

2006

2007

for version_id, index_memo in records:

2008

if version_id in self._cache:

2009

# This data has already been validated

2010

data = self._cache[version_id]

2011

else:

2012

data = raw_records.next()

2013

if self._do_cache:

2014

self._cache[version_id] = data

2015

2016

# validate the header

2017

df, rec = self._parse_record_header(version_id, data)

2018

df.close()

2019

yield version_id, data

2020

2021

def read_records_iter(self, records):

2022

"""Read text records from data file and yield result.

2023

2024

The result will be returned in whatever is the fastest to read.

2025

Not by the order requested. Also, multiple requests for the same

2026

record will only yield 1 response.

2027

:param records: A list of (version_id, pos, len) entries

2028

:return: Yields (version_id, contents, digest) in the order

2029

read, not the order requested

2030

"""

2031

if not records:

2032

return

2033

2034

if self._cache:

2035

# Skip records we have alread seen

2036

yielded_records = set()

2037

needed_records = set()

2038

for record in records:

2039

if record[0] in self._cache:

2040

if record[0] in yielded_records:

2041

continue

2042

yielded_records.add(record[0])

2043

data = self._cache[record[0]]

2044

content, digest = self._parse_record(record[0], data)

2045

yield (record[0], content, digest)

2046

else:

2047

needed_records.add(record)

2048

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2049

else:

2050

needed_records = sorted(set(records), key=operator.itemgetter(1))

2051

2052

if not needed_records:

2053

return

2054

2055

# The transport optimizes the fetching as well

2056

# (ie, reads continuous ranges.)

2057

raw_data = self._access.get_raw_records(

2058

[index_memo for version_id, index_memo in needed_records])

2059

2060

for (version_id, index_memo), data in \

2061

izip(iter(needed_records), raw_data):

2062

content, digest = self._parse_record(version_id, data)

2063

if self._do_cache:

2064

self._cache[version_id] = data

2065

yield version_id, content, digest

2066

2067

def read_records(self, records):

2068

"""Read records into a dictionary."""

2069

components = {}

2070

for record_id, content, digest in \

2071

self.read_records_iter(records):

2072

components[record_id] = (content, digest)

2073

return components

2074

2075

2076

class InterKnit(InterVersionedFile):

2077

"""Optimised code paths for knit to knit operations."""

2078

2079

_matching_file_from_factory = KnitVersionedFile

2080

_matching_file_to_factory = KnitVersionedFile

2081

2082

@staticmethod

2083

def is_compatible(source, target):

2084

"""Be compatible with knits. """

2085

try:

2086

return (isinstance(source, KnitVersionedFile) and

2087

isinstance(target, KnitVersionedFile))

2088

except AttributeError:

2089

return False

2090

2091

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2092

"""See InterVersionedFile.join."""

2093

assert isinstance(self.source, KnitVersionedFile)

2094

assert isinstance(self.target, KnitVersionedFile)

2095

2096

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2097

2098

if not version_ids:

2099

return 0

2100

2101

pb = ui.ui_factory.nested_progress_bar()

2102

try:

2103

version_ids = list(version_ids)

2104

if None in version_ids:

2105

version_ids.remove(None)

2106

2107

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2108

this_versions = set(self.target._index.get_versions())

2109

needed_versions = self.source_ancestry - this_versions

2110

cross_check_versions = self.source_ancestry.intersection(this_versions)

2111

mismatched_versions = set()

2112

for version in cross_check_versions:

2113

# scan to include needed parents.

2114

n1 = set(self.target.get_parents_with_ghosts(version))

2115

n2 = set(self.source.get_parents_with_ghosts(version))

2116

if n1 != n2:

2117

# FIXME TEST this check for cycles being introduced works

2118

# the logic is we have a cycle if in our graph we are an

2119

# ancestor of any of the n2 revisions.

2120

for parent in n2:

2121

if parent in n1:

2122

# safe

2123

continue

2124

else:

2125

parent_ancestors = self.source.get_ancestry(parent)

2126

if version in parent_ancestors:

2127

raise errors.GraphCycleError([parent, version])

2128

# ensure this parent will be available later.

2129

new_parents = n2.difference(n1)

2130

needed_versions.update(new_parents.difference(this_versions))

2131

mismatched_versions.add(version)

2132

2133

if not needed_versions and not mismatched_versions:

2134

return 0

2135

full_list = topo_sort(self.source.get_graph())

2136

2137

version_list = [i for i in full_list if (not self.target.has_version(i)

2138

and i in needed_versions)]

2139

2140

# plan the join:

2141

copy_queue = []

2142

copy_queue_records = []

2143

copy_set = set()

2144

for version_id in version_list:

2145

options = self.source._index.get_options(version_id)

2146

parents = self.source._index.get_parents_with_ghosts(version_id)

2147

# check that its will be a consistent copy:

2148

for parent in parents:

2149

# if source has the parent, we must :

2150

# * already have it or

2151

# * have it scheduled already

2152

# otherwise we don't care

2153

assert (self.target.has_version(parent) or

2154

parent in copy_set or

2155

not self.source.has_version(parent))

2156

index_memo = self.source._index.get_position(version_id)

2157

copy_queue_records.append((version_id, index_memo))

2158

copy_queue.append((version_id, options, parents))

2159

copy_set.add(version_id)

2160

2161

# data suck the join:

2162

count = 0

2163

total = len(version_list)

2164

raw_datum = []

2165

raw_records = []

2166

for (version_id, raw_data), \

2167

(version_id2, options, parents) in \

2168

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2169

copy_queue):

2170

assert version_id == version_id2, 'logic error, inconsistent results'

2171

count = count + 1

2172

pb.update("Joining knit", count, total)

2173

raw_records.append((version_id, options, parents, len(raw_data)))

2174

raw_datum.append(raw_data)

2175

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2176

2177

for version in mismatched_versions:

2178

# FIXME RBC 20060309 is this needed?

2179

n1 = set(self.target.get_parents_with_ghosts(version))

2180

n2 = set(self.source.get_parents_with_ghosts(version))

2181

# write a combined record to our history preserving the current

2182

# parents as first in the list

2183

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2184

self.target.fix_parents(version, new_parents)

2185

return count

2186

finally:

2187

pb.finished()

2188

2189

2190

InterVersionedFile.register_optimiser(InterKnit)

2191

2192

2193

class WeaveToKnit(InterVersionedFile):

2194

"""Optimised code paths for weave to knit operations."""

2195

2196

_matching_file_from_factory = bzrlib.weave.WeaveFile

2197

_matching_file_to_factory = KnitVersionedFile

2198

2199

@staticmethod

2200

def is_compatible(source, target):

2201

"""Be compatible with weaves to knits."""

2202

try:

2203

return (isinstance(source, bzrlib.weave.Weave) and

2204

isinstance(target, KnitVersionedFile))

2205

except AttributeError:

2206

return False

2207

2208

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2209

"""See InterVersionedFile.join."""

2210

assert isinstance(self.source, bzrlib.weave.Weave)

2211

assert isinstance(self.target, KnitVersionedFile)

2212

2213

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2214

2215

if not version_ids:

2216

return 0

2217

2218

pb = ui.ui_factory.nested_progress_bar()

2219

try:

2220

version_ids = list(version_ids)

2221

2222

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2223

this_versions = set(self.target._index.get_versions())

2224

needed_versions = self.source_ancestry - this_versions

2225

cross_check_versions = self.source_ancestry.intersection(this_versions)

2226

mismatched_versions = set()

2227

for version in cross_check_versions:

2228

# scan to include needed parents.

2229

n1 = set(self.target.get_parents_with_ghosts(version))

2230

n2 = set(self.source.get_parents(version))

2231

# if all of n2's parents are in n1, then its fine.

2232

if n2.difference(n1):

2233

# FIXME TEST this check for cycles being introduced works

2234

# the logic is we have a cycle if in our graph we are an

2235

# ancestor of any of the n2 revisions.

2236

for parent in n2:

2237

if parent in n1:

2238

# safe

2239

continue

2240

else:

2241

parent_ancestors = self.source.get_ancestry(parent)

2242

if version in parent_ancestors:

2243

raise errors.GraphCycleError([parent, version])

2244

# ensure this parent will be available later.

2245

new_parents = n2.difference(n1)

2246

needed_versions.update(new_parents.difference(this_versions))

2247

mismatched_versions.add(version)

2248

2249

if not needed_versions and not mismatched_versions:

2250

return 0

2251

full_list = topo_sort(self.source.get_graph())

2252

2253

version_list = [i for i in full_list if (not self.target.has_version(i)

2254

and i in needed_versions)]

2255

2256

# do the join:

2257

count = 0

2258

total = len(version_list)

2259

for version_id in version_list:

2260

pb.update("Converting to knit", count, total)

2261

parents = self.source.get_parents(version_id)

2262

# check that its will be a consistent copy:

2263

for parent in parents:

2264

# if source has the parent, we must already have it

2265

assert (self.target.has_version(parent))

2266

self.target.add_lines(

2267

version_id, parents, self.source.get_lines(version_id))

2268

count = count + 1

2269

2270

for version in mismatched_versions:

2271

# FIXME RBC 20060309 is this needed?

2272

n1 = set(self.target.get_parents_with_ghosts(version))

2273

n2 = set(self.source.get_parents(version))

2274

# write a combined record to our history preserving the current

2275

# parents as first in the list

2276

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2277

self.target.fix_parents(version, new_parents)

2278

return count

2279

finally:

2280

pb.finished()

2281

2282

2283

InterVersionedFile.register_optimiser(WeaveToKnit)

2284

2285

2286

class KnitSequenceMatcher(difflib.SequenceMatcher):

2287

"""Knit tuned sequence matcher.

2288

2289

This is based on profiling of difflib which indicated some improvements

2290

for our usage pattern.

2291

"""

2292

2293

def find_longest_match(self, alo, ahi, blo, bhi):

2294

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2295

2296

If isjunk is not defined:

2297

2298

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2299

alo <= i <= i+k <= ahi

2300

blo <= j <= j+k <= bhi

2301

and for all (i',j',k') meeting those conditions,

2302

k >= k'

2303

i <= i'

2304

and if i == i', j <= j'

2305

2306

In other words, of all maximal matching blocks, return one that

2307

starts earliest in a, and of all those maximal matching blocks that

2308

start earliest in a, return the one that starts earliest in b.

2309

2310

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2311

>>> s.find_longest_match(0, 5, 0, 9)

2312

(0, 4, 5)

2313

2314

If isjunk is defined, first the longest matching block is

2315

determined as above, but with the additional restriction that no

2316

junk element appears in the block. Then that block is extended as

2317

far as possible by matching (only) junk elements on both sides. So

2318

the resulting block never matches on junk except as identical junk

2319

happens to be adjacent to an "interesting" match.

2320

2321

Here's the same example as before, but considering blanks to be

2322

junk. That prevents " abcd" from matching the " abcd" at the tail

2323

end of the second sequence directly. Instead only the "abcd" can

2324

match, and matches the leftmost "abcd" in the second sequence:

2325

2326

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2327

>>> s.find_longest_match(0, 5, 0, 9)

2328

(1, 0, 4)

2329

2330

If no blocks match, return (alo, blo, 0).

2331

2332

>>> s = SequenceMatcher(None, "ab", "c")

2333

>>> s.find_longest_match(0, 2, 0, 1)

2334

(0, 0, 0)

2335

"""

2336

2337

# CAUTION: stripping common prefix or suffix would be incorrect.

2338

# E.g.,

2339

# ab

2340

# acab

2341

# Longest matching block is "ab", but if common prefix is

2342

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2343

# strip, so ends up claiming that ab is changed to acab by

2344

# inserting "ca" in the middle. That's minimal but unintuitive:

2345

# "it's obvious" that someone inserted "ac" at the front.

2346

# Windiff ends up at the same place as diff, but by pairing up

2347

# the unique 'b's and then matching the first two 'a's.

2348

2349

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2350

besti, bestj, bestsize = alo, blo, 0

2351

# find longest junk-free match

2352

# during an iteration of the loop, j2len[j] = length of longest

2353

# junk-free match ending with a[i-1] and b[j]

2354

j2len = {}

2355

# nothing = []

2356

b2jget = b2j.get

2357

for i in xrange(alo, ahi):

2358

# look at all instances of a[i] in b; note that because

2359

# b2j has no junk keys, the loop is skipped if a[i] is junk

2360

j2lenget = j2len.get

2361

newj2len = {}

2362

2363

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2364

# following improvement

2365

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2366

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2367

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2368

# to

2369

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2370

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2371

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2372

2373

try:

2374

js = b2j[a[i]]

2375

except KeyError:

2376

pass

2377

else:

2378

for j in js:

2379

# a[i] matches b[j]

2380

if j >= blo:

2381

if j >= bhi:

2382

break

2383

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2384

if k > bestsize:

2385

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2386

j2len = newj2len

2387

2388

# Extend the best by non-junk elements on each end. In particular,

2389

# "popular" non-junk elements aren't in b2j, which greatly speeds

2390

# the inner loop above, but also means "the best" match so far

2391

# doesn't contain any junk *or* popular non-junk elements.

2392

while besti > alo and bestj > blo and \

2393

not isbjunk(b[bestj-1]) and \

2394

a[besti-1] == b[bestj-1]:

2395

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2396

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2397

not isbjunk(b[bestj+bestsize]) and \

2398

a[besti+bestsize] == b[bestj+bestsize]:

2399

bestsize += 1

2400

2401

# Now that we have a wholly interesting match (albeit possibly

2402

# empty!), we may as well suck up the matching junk on each

2403

# side of it too. Can't think of a good reason not to, and it

2404

# saves post-processing the (possibly considerable) expense of

2405

# figuring out what to do with it. In the case of an empty

2406

# interesting match, this is clearly the right thing to do,

2407

# because no other kind of match is possible in the regions.

2408

while besti > alo and bestj > blo and \

2409

isbjunk(b[bestj-1]) and \

2410

a[besti-1] == b[bestj-1]:

2411

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2412

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2413

isbjunk(b[bestj+bestsize]) and \

2414

a[besti+bestsize] == b[bestj+bestsize]:

2415

bestsize = bestsize + 1

2416

2417

return besti, bestj, bestsize

2418

2419

2420

def annotate_knit(knit, revision_id):

2421

"""Annotate a knit with no cached annotations.

2422

2423

This implementation is for knits with no cached annotations.

2424

It will work for knits with cached annotations, but this is not

2425

recommended.

2426

"""

2427

ancestry = knit.get_ancestry(revision_id, topo_sorted=False)

2428

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2429

annotations = {}

2430

pending_annotation = [revision_id]

2431

while len(pending_annotation) > 0:

2432

candidate = pending_annotation.pop()

2433

if candidate in annotations:

2434

continue

2435

parents = knit.get_parents(candidate)

2436

pending_parents = [p for p in parents if p not in annotations]

2437

if len(pending_parents) > 0:

2438

pending_annotation.append(candidate)

2439

pending_annotation.extend(pending_parents)

2440

else:

2441

if len(parents) == 0:

2442

blocks = None

2443

elif knit._index.get_method(candidate) != 'line-delta':

2444

blocks = None

2445

else:

2446

parent, sha1, noeol, delta = knit.get_delta(candidate)

2447

blocks = KnitContent.get_line_delta_blocks(delta,

2448

fulltext[parents[0]], fulltext[candidate])

2449

annotations[candidate] = list(annotate.reannotate([annotations[p]

2450

for p in parents], fulltext[candidate], candidate, blocks))

2451

return iter(annotations[revision_id])

2452

2453

2454

try:

2455

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2456

except ImportError:

2457

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »