/brz/remove-bazaar : revision 2624.2.17

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2007-08-08 04:45:55 UTC
mto: (2592.3.96 repository)
mto: This revision was merged to the branch mainline in revision 2742.
Revision ID: robertc@robertcollins.net-20070808044555-10k443n441f093dx

Review feedback.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/file_names.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_submit.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_file_names.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/bug_trackers.txt

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/developers

doc/developers/HACKING

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/scratch.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/shared_repository_layouts.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

103

104

105

# TODO: Split out code specific to this format into an associated object.

106

107

# TODO: Can we put in some kind of value to check that the index and data

108

# files belong together?

109

110

# TODO: accommodate binaries, perhaps by storing a byte count

111

112

# TODO: function to check whole file

113

114

# TODO: atomically append data, then measure backwards from the cursor

115

# position after writing to work out where it was located. we may need to

116

# bypass python file buffering.

117

118

DATA_SUFFIX = '.knit'

119

INDEX_SUFFIX = '.kndx'

120

121

122

class KnitContent(object):

123

"""Content of a knit version to which deltas can be applied."""

124

125

def __init__(self, lines):

126

self._lines = lines

127

128

def annotate_iter(self):

129

"""Yield tuples of (origin, text) for each content line."""

130

return iter(self._lines)

131

132

def annotate(self):

133

"""Return a list of (origin, text) tuples."""

134

return list(self.annotate_iter())

135

136

def line_delta_iter(self, new_lines):

137

"""Generate line-based delta from this content to new_lines."""

138

new_texts = new_lines.text()

139

old_texts = self.text()

140

s = KnitSequenceMatcher(None, old_texts, new_texts)

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

142

if tag == 'equal':

143

continue

144

# ofrom, oto, length, data

145

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

146

147

def line_delta(self, new_lines):

148

return list(self.line_delta_iter(new_lines))

149

150

def text(self):

151

return [text for origin, text in self._lines]

152

153

def copy(self):

154

return KnitContent(self._lines[:])

155

156

@staticmethod

157

def get_line_delta_blocks(knit_delta, source, target):

158

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

159

target_len = len(target)

160

s_pos = 0

161

t_pos = 0

162

for s_begin, s_end, t_len, new_text in knit_delta:

163

true_n = s_begin - s_pos

164

n = true_n

165

if n > 0:

166

# knit deltas do not provide reliable info about whether the

167

# last line of a file matches, due to eol handling.

168

if source[s_pos + n -1] != target[t_pos + n -1]:

169

n-=1

170

if n > 0:

171

yield s_pos, t_pos, n

172

t_pos += t_len + true_n

173

s_pos = s_end

174

n = target_len - t_pos

175

if n > 0:

176

if source[s_pos + n -1] != target[t_pos + n -1]:

177

n-=1

178

if n > 0:

179

yield s_pos, t_pos, n

180

yield s_pos + (target_len - t_pos), target_len, 0

181

182

183

class _KnitFactory(object):

184

"""Base factory for creating content objects."""

185

186

def make(self, lines, version_id):

187

num_lines = len(lines)

188

return KnitContent(zip([version_id] * num_lines, lines))

189

190

191

class KnitAnnotateFactory(_KnitFactory):

192

"""Factory for creating annotated Content objects."""

193

194

annotated = True

195

196

def parse_fulltext(self, content, version_id):

197

"""Convert fulltext to internal representation

198

199

fulltext content is of the format

200

revid(utf8) plaintext\n

201

internal representation is of the format:

202

(revid, plaintext)

203

"""

204

# TODO: jam 20070209 The tests expect this to be returned as tuples,

205

# but the code itself doesn't really depend on that.

206

# Figure out a way to not require the overhead of turning the

207

# list back into tuples.

208

lines = [tuple(line.split(' ', 1)) for line in content]

209

return KnitContent(lines)

210

211

def parse_line_delta_iter(self, lines):

212

return iter(self.parse_line_delta(lines))

213

214

def parse_line_delta(self, lines, version_id):

215

"""Convert a line based delta into internal representation.

216

217

line delta is in the form of:

218

intstart intend intcount

219

1..count lines:

220

revid(utf8) newline\n

221

internal representation is

222

(start, end, count, [1..count tuples (revid, newline)])

223

"""

224

result = []

225

lines = iter(lines)

226

next = lines.next

227

228

cache = {}

229

def cache_and_return(line):

230

origin, text = line.split(' ', 1)

231

return cache.setdefault(origin, origin), text

232

233

# walk through the lines parsing.

234

for header in lines:

235

start, end, count = [int(n) for n in header.split(',')]

236

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

237

result.append((start, end, count, contents))

238

return result

239

240

def get_fulltext_content(self, lines):

241

"""Extract just the content lines from a fulltext."""

242

return (line.split(' ', 1)[1] for line in lines)

243

244

def get_linedelta_content(self, lines):

245

"""Extract just the content from a line delta.

246

247

This doesn't return all of the extra information stored in a delta.

248

Only the actual content lines.

249

"""

250

lines = iter(lines)

251

next = lines.next

252

for header in lines:

253

header = header.split(',')

254

count = int(header[2])

255

for i in xrange(count):

256

origin, text = next().split(' ', 1)

257

yield text

258

259

def lower_fulltext(self, content):

260

"""convert a fulltext content record into a serializable form.

261

262

see parse_fulltext which this inverts.

263

"""

264

# TODO: jam 20070209 We only do the caching thing to make sure that

265

# the origin is a valid utf-8 line, eventually we could remove it

266

return ['%s %s' % (o, t) for o, t in content._lines]

267

268

def lower_line_delta(self, delta):

269

"""convert a delta into a serializable form.

270

271

See parse_line_delta which this inverts.

272

"""

273

# TODO: jam 20070209 We only do the caching thing to make sure that

274

# the origin is a valid utf-8 line, eventually we could remove it

275

out = []

276

for start, end, c, lines in delta:

277

out.append('%d,%d,%d\n' % (start, end, c))

278

out.extend(origin + ' ' + text

279

for origin, text in lines)

280

return out

281

282

283

class KnitPlainFactory(_KnitFactory):

284

"""Factory for creating plain Content objects."""

285

286

annotated = False

287

288

def parse_fulltext(self, content, version_id):

289

"""This parses an unannotated fulltext.

290

291

Note that this is not a noop - the internal representation

292

has (versionid, line) - its just a constant versionid.

293

"""

294

return self.make(content, version_id)

295

296

def parse_line_delta_iter(self, lines, version_id):

297

cur = 0

298

num_lines = len(lines)

299

while cur < num_lines:

300

header = lines[cur]

301

cur += 1

302

start, end, c = [int(n) for n in header.split(',')]

303

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

304

cur += c

305

306

def parse_line_delta(self, lines, version_id):

307

return list(self.parse_line_delta_iter(lines, version_id))

308

309

def get_fulltext_content(self, lines):

310

"""Extract just the content lines from a fulltext."""

311

return iter(lines)

312

313

def get_linedelta_content(self, lines):

314

"""Extract just the content from a line delta.

315

316

This doesn't return all of the extra information stored in a delta.

317

Only the actual content lines.

318

"""

319

lines = iter(lines)

320

next = lines.next

321

for header in lines:

322

header = header.split(',')

323

count = int(header[2])

324

for i in xrange(count):

325

yield next()

326

327

def lower_fulltext(self, content):

328

return content.text()

329

330

def lower_line_delta(self, delta):

331

out = []

332

for start, end, c, lines in delta:

333

out.append('%d,%d,%d\n' % (start, end, c))

334

out.extend([text for origin, text in lines])

335

return out

336

337

338

def make_empty_knit(transport, relpath):

339

"""Construct a empty knit at the specified location."""

340

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

341

k._data._open_file()

342

343

344

class KnitVersionedFile(VersionedFile):

345

"""Weave-like structure with faster random access.

346

347

A knit stores a number of texts and a summary of the relationships

348

between them. Texts are identified by a string version-id. Texts

349

are normally stored and retrieved as a series of lines, but can

350

also be passed as single strings.

351

352

Lines are stored with the trailing newline (if any) included, to

353

avoid special cases for files with no final newline. Lines are

354

composed of 8-bit characters, not unicode. The combination of

355

these approaches should mean any 'binary' file can be safely

356

stored and retrieved.

357

"""

358

359

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

360

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

361

create=False, create_parent_dir=False, delay_create=False,

362

dir_mode=None, index=None):

363

"""Construct a knit at location specified by relpath.

364

365

:param create: If not True, only open an existing knit.

366

:param create_parent_dir: If True, create the parent directory if

367

creating the file fails. (This is used for stores with

368

hash-prefixes that may not exist yet)

369

:param delay_create: The calling code is aware that the knit won't

370

actually be created until the first data is stored.

371

:param index: An index to use for the knit.

372

"""

373

if deprecated_passed(basis_knit):

374

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

375

" deprecated as of bzr 0.9.",

376

DeprecationWarning, stacklevel=2)

377

if access_mode is None:

378

access_mode = 'w'

379

super(KnitVersionedFile, self).__init__(access_mode)

380

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

381

self.transport = transport

382

self.filename = relpath

383

self.factory = factory or KnitAnnotateFactory()

384

self.writable = (access_mode == 'w')

385

self.delta = delta

386

387

self._max_delta_chain = 200

388

389

if index is None:

390

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

391

access_mode, create=create, file_mode=file_mode,

392

create_parent_dir=create_parent_dir, delay_create=delay_create,

393

dir_mode=dir_mode)

394

else:

395

self._index = index

396

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

397

access_mode, create=create and not len(self), file_mode=file_mode,

398

create_parent_dir=create_parent_dir, delay_create=delay_create,

399

dir_mode=dir_mode)

400

401

def __repr__(self):

402

return '%s(%s)' % (self.__class__.__name__,

403

self.transport.abspath(self.filename))

404

405

def _check_should_delta(self, first_parents):

406

"""Iterate back through the parent listing, looking for a fulltext.

407

408

This is used when we want to decide whether to add a delta or a new

409

fulltext. It searches for _max_delta_chain parents. When it finds a

410

fulltext parent, it sees if the total size of the deltas leading up to

411

it is large enough to indicate that we want a new full text anyway.

412

413

Return True if we should create a new delta, False if we should use a

414

full text.

415

"""

416

delta_size = 0

417

fulltext_size = None

418

delta_parents = first_parents

419

for count in xrange(self._max_delta_chain):

420

parent = delta_parents[0]

421

method = self._index.get_method(parent)

422

pos, size = self._index.get_position(parent)

423

if method == 'fulltext':

424

fulltext_size = size

425

break

426

delta_size += size

427

delta_parents = self._index.get_parents(parent)

428

else:

429

# We couldn't find a fulltext, so we must create a new one

430

return False

431

432

return fulltext_size > delta_size

433

434

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

435

"""See VersionedFile._add_delta()."""

436

self._check_add(version_id, []) # should we check the lines ?

437

self._check_versions_present(parents)

438

present_parents = []

439

ghosts = []

440

parent_texts = {}

441

for parent in parents:

442

if not self.has_version(parent):

443

ghosts.append(parent)

444

else:

445

present_parents.append(parent)

446

447

if delta_parent is None:

448

# reconstitute as full text.

449

assert len(delta) == 1 or len(delta) == 0

450

if len(delta):

451

assert delta[0][0] == 0

452

assert delta[0][1] == 0, delta[0][1]

453

return super(KnitVersionedFile, self)._add_delta(version_id,

454

parents,

455

delta_parent,

456

sha1,

457

noeol,

458

delta)

459

460

digest = sha1

461

462

options = []

463

if noeol:

464

options.append('no-eol')

465

466

if delta_parent is not None:

467

# determine the current delta chain length.

468

# To speed the extract of texts the delta chain is limited

469

# to a fixed number of deltas. This should minimize both

470

# I/O and the time spend applying deltas.

471

# The window was changed to a maximum of 200 deltas, but also added

472

# was a check that the total compressed size of the deltas is

473

# smaller than the compressed size of the fulltext.

474

if not self._check_should_delta([delta_parent]):

475

# We don't want a delta here, just do a normal insertion.

476

return super(KnitVersionedFile, self)._add_delta(version_id,

477

parents,

478

delta_parent,

479

sha1,

480

noeol,

481

delta)

482

483

options.append('line-delta')

484

store_lines = self.factory.lower_line_delta(delta)

485

486

where, size = self._data.add_record(version_id, digest, store_lines)

487

self._index.add_version(version_id, options, where, size, parents)

488

489

def _add_raw_records(self, records, data):

490

"""Add all the records 'records' with data pre-joined in 'data'.

491

492

:param records: A list of tuples(version_id, options, parents, size).

493

:param data: The data for the records. When it is written, the records

494

are adjusted to have pos pointing into data by the sum of

495

the preceding records sizes.

496

"""

497

# write all the data

498

pos = self._data.add_raw_record(data)

499

offset = 0

500

index_entries = []

501

for (version_id, options, parents, size) in records:

502

index_entries.append((version_id, options, pos+offset,

503

size, parents))

504

if self._data._do_cache:

505

self._data._cache[version_id] = data[offset:offset+size]

506

offset += size

507

self._index.add_versions(index_entries)

508

509

def enable_cache(self):

510

"""Start caching data for this knit"""

511

self._data.enable_cache()

512

513

def clear_cache(self):

514

"""Clear the data cache only."""

515

self._data.clear_cache()

516

517

def copy_to(self, name, transport):

518

"""See VersionedFile.copy_to()."""

519

# copy the current index to a temp index to avoid racing with local

520

# writes

521

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

522

self.transport.get(self._index._filename))

523

# copy the data file

524

f = self._data._open_file()

525

try:

526

transport.put_file(name + DATA_SUFFIX, f)

527

finally:

528

f.close()

529

# move the copied index into place

530

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

531

532

def create_empty(self, name, transport, mode=None):

533

return KnitVersionedFile(name, transport, factory=self.factory,

534

delta=self.delta, create=True)

535

536

def _fix_parents(self, version_id, new_parents):

537

"""Fix the parents list for version.

538

539

This is done by appending a new version to the index

540

with identical data except for the parents list.

541

the parents list must be a superset of the current

542

list.

543

"""

544

current_values = self._index._cache[version_id]

545

assert set(current_values[4]).difference(set(new_parents)) == set()

546

self._index.add_version(version_id,

547

current_values[1],

548

current_values[2],

549

current_values[3],

550

new_parents)

551

552

def _extract_blocks(self, version_id, source, target):

553

if self._index.get_method(version_id) != 'line-delta':

554

return None

555

parent, sha1, noeol, delta = self.get_delta(version_id)

556

return KnitContent.get_line_delta_blocks(delta, source, target)

557

558

def get_delta(self, version_id):

559

"""Get a delta for constructing version from some other version."""

560

version_id = osutils.safe_revision_id(version_id)

561

self.check_not_reserved_id(version_id)

562

if not self.has_version(version_id):

563

raise RevisionNotPresent(version_id, self.filename)

564

565

parents = self.get_parents(version_id)

566

if len(parents):

567

parent = parents[0]

568

else:

569

parent = None

570

data_pos, data_size = self._index.get_position(version_id)

571

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

572

noeol = 'no-eol' in self._index.get_options(version_id)

573

if 'fulltext' == self._index.get_method(version_id):

574

new_content = self.factory.parse_fulltext(data, version_id)

575

if parent is not None:

576

reference_content = self._get_content(parent)

577

old_texts = reference_content.text()

578

else:

579

old_texts = []

580

new_texts = new_content.text()

581

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

582

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

583

else:

584

delta = self.factory.parse_line_delta(data, version_id)

585

return parent, sha1, noeol, delta

586

587

def get_graph_with_ghosts(self):

588

"""See VersionedFile.get_graph_with_ghosts()."""

589

graph_items = self._index.get_graph()

590

return dict(graph_items)

591

592

def get_sha1(self, version_id):

593

return self.get_sha1s([version_id])[0]

594

595

def get_sha1s(self, version_ids):

596

"""See VersionedFile.get_sha1()."""

597

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

598

record_map = self._get_record_map(version_ids)

599

# record entry 2 is the 'digest'.

600

return [record_map[v][2] for v in version_ids]

601

602

@staticmethod

603

def get_suffixes():

604

"""See VersionedFile.get_suffixes()."""

605

return [DATA_SUFFIX, INDEX_SUFFIX]

606

607

def has_ghost(self, version_id):

608

"""True if there is a ghost reference in the file to version_id."""

609

version_id = osutils.safe_revision_id(version_id)

610

# maybe we have it

611

if self.has_version(version_id):

612

return False

613

# optimisable if needed by memoising the _ghosts set.

614

items = self._index.get_graph()

615

for node, parents in items:

616

for parent in parents:

617

if parent not in self._index._cache:

618

if parent == version_id:

619

return True

620

return False

621

622

def versions(self):

623

"""See VersionedFile.versions."""

624

return self._index.get_versions()

625

626

def has_version(self, version_id):

627

"""See VersionedFile.has_version."""

628

version_id = osutils.safe_revision_id(version_id)

629

return self._index.has_version(version_id)

630

631

__contains__ = has_version

632

633

def _merge_annotations(self, content, parents, parent_texts={},

634

delta=None, annotated=None):

635

"""Merge annotations for content. This is done by comparing

636

the annotations based on changed to the text.

637

"""

638

if annotated:

639

delta_seq = None

640

for parent_id in parents:

641

merge_content = self._get_content(parent_id, parent_texts)

642

seq = patiencediff.PatienceSequenceMatcher(

643

None, merge_content.text(), content.text())

644

if delta_seq is None:

645

# setup a delta seq to reuse.

646

delta_seq = seq

647

for i, j, n in seq.get_matching_blocks():

648

if n == 0:

649

continue

650

# this appears to copy (origin, text) pairs across to the new

651

# content for any line that matches the last-checked parent.

652

# FIXME: save the sequence control data for delta compression

653

# against the most relevant parent rather than rediffing.

654

content._lines[j:j+n] = merge_content._lines[i:i+n]

655

if delta:

656

if not annotated:

657

reference_content = self._get_content(parents[0], parent_texts)

658

new_texts = content.text()

659

old_texts = reference_content.text()

660

delta_seq = patiencediff.PatienceSequenceMatcher(

661

None, old_texts, new_texts)

662

return self._make_line_delta(delta_seq, content)

663

664

def _make_line_delta(self, delta_seq, new_content):

665

"""Generate a line delta from delta_seq and new_content."""

666

diff_hunks = []

667

for op in delta_seq.get_opcodes():

668

if op[0] == 'equal':

669

continue

670

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

671

return diff_hunks

672

673

def _get_components_positions(self, version_ids):

674

"""Produce a map of position data for the components of versions.

675

676

This data is intended to be used for retrieving the knit records.

677

678

A dict of version_id to (method, data_pos, data_size, next) is

679

returned.

680

method is the way referenced data should be applied.

681

data_pos is the position of the data in the knit.

682

data_size is the size of the data in the knit.

683

next is the build-parent of the version, or None for fulltexts.

684

"""

685

component_data = {}

686

for version_id in version_ids:

687

cursor = version_id

688

689

while cursor is not None and cursor not in component_data:

690

method = self._index.get_method(cursor)

691

if method == 'fulltext':

692

next = None

693

else:

694

next = self.get_parents(cursor)[0]

695

data_pos, data_size = self._index.get_position(cursor)

696

component_data[cursor] = (method, data_pos, data_size, next)

697

cursor = next

698

return component_data

699

700

def _get_content(self, version_id, parent_texts={}):

701

"""Returns a content object that makes up the specified

702

version."""

703

if not self.has_version(version_id):

704

raise RevisionNotPresent(version_id, self.filename)

705

706

cached_version = parent_texts.get(version_id, None)

707

if cached_version is not None:

708

return cached_version

709

710

text_map, contents_map = self._get_content_maps([version_id])

711

return contents_map[version_id]

712

713

def _check_versions_present(self, version_ids):

714

"""Check that all specified versions are present."""

715

self._index.check_versions_present(version_ids)

716

717

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

718

"""See VersionedFile.add_lines_with_ghosts()."""

719

self._check_add(version_id, lines)

720

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

721

722

def _add_lines(self, version_id, parents, lines, parent_texts):

723

"""See VersionedFile.add_lines."""

724

self._check_add(version_id, lines)

725

self._check_versions_present(parents)

726

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

727

728

def _check_add(self, version_id, lines):

729

"""check that version_id and lines are safe to add."""

730

assert self.writable, "knit is not opened for write"

731

### FIXME escape. RBC 20060228

732

if contains_whitespace(version_id):

733

raise InvalidRevisionId(version_id, self.filename)

734

self.check_not_reserved_id(version_id)

735

if self.has_version(version_id):

736

raise RevisionAlreadyPresent(version_id, self.filename)

737

self._check_lines_not_unicode(lines)

738

self._check_lines_are_lines(lines)

739

740

def _add(self, version_id, lines, parents, delta, parent_texts):

741

"""Add a set of lines on top of version specified by parents.

742

743

If delta is true, compress the text as a line-delta against

744

the first parent.

745

746

Any versions not present will be converted into ghosts.

747

"""

748

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

749

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

750

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

751

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

752

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

753

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

754

# +1383 0 8.0370 8.0370 +<len>

755

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

756

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

757

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

758

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

759

760

present_parents = []

761

ghosts = []

762

if parent_texts is None:

763

parent_texts = {}

764

for parent in parents:

765

if not self.has_version(parent):

766

ghosts.append(parent)

767

else:

768

present_parents.append(parent)

769

770

if delta and not len(present_parents):

771

delta = False

772

773

digest = sha_strings(lines)

774

options = []

775

if lines:

776

if lines[-1][-1] != '\n':

777

options.append('no-eol')

778

lines[-1] = lines[-1] + '\n'

779

780

if len(present_parents) and delta:

781

# To speed the extract of texts the delta chain is limited

782

# to a fixed number of deltas. This should minimize both

783

# I/O and the time spend applying deltas.

784

delta = self._check_should_delta(present_parents)

785

786

assert isinstance(version_id, str)

787

lines = self.factory.make(lines, version_id)

788

if delta or (self.factory.annotated and len(present_parents) > 0):

789

# Merge annotations from parent texts if so is needed.

790

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

791

delta, self.factory.annotated)

792

793

if delta:

794

options.append('line-delta')

795

store_lines = self.factory.lower_line_delta(delta_hunks)

796

else:

797

options.append('fulltext')

798

store_lines = self.factory.lower_fulltext(lines)

799

800

where, size = self._data.add_record(version_id, digest, store_lines)

801

self._index.add_version(version_id, options, where, size, parents)

802

return lines

803

804

def check(self, progress_bar=None):

805

"""See VersionedFile.check()."""

806

807

def _clone_text(self, new_version_id, old_version_id, parents):

808

"""See VersionedFile.clone_text()."""

809

# FIXME RBC 20060228 make fast by only inserting an index with null

810

# delta.

811

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

812

813

def get_lines(self, version_id):

814

"""See VersionedFile.get_lines()."""

815

return self.get_line_list([version_id])[0]

816

817

def _get_record_map(self, version_ids):

818

"""Produce a dictionary of knit records.

819

820

The keys are version_ids, the values are tuples of (method, content,

821

digest, next).

822

method is the way the content should be applied.

823

content is a KnitContent object.

824

digest is the SHA1 digest of this version id after all steps are done

825

next is the build-parent of the version, i.e. the leftmost ancestor.

826

If the method is fulltext, next will be None.

827

"""

828

position_map = self._get_components_positions(version_ids)

829

# c = component_id, m = method, p = position, s = size, n = next

830

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

831

record_map = {}

832

for component_id, content, digest in \

833

self._data.read_records_iter(records):

834

method, position, size, next = position_map[component_id]

835

record_map[component_id] = method, content, digest, next

836

837

return record_map

838

839

def get_text(self, version_id):

840

"""See VersionedFile.get_text"""

841

return self.get_texts([version_id])[0]

842

843

def get_texts(self, version_ids):

844

return [''.join(l) for l in self.get_line_list(version_ids)]

845

846

def get_line_list(self, version_ids):

847

"""Return the texts of listed versions as a list of strings."""

848

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

849

for version_id in version_ids:

850

self.check_not_reserved_id(version_id)

851

text_map, content_map = self._get_content_maps(version_ids)

852

return [text_map[v] for v in version_ids]

853

854

_get_lf_split_line_list = get_line_list

855

856

def _get_content_maps(self, version_ids):

857

"""Produce maps of text and KnitContents

858

859

:return: (text_map, content_map) where text_map contains the texts for

860

the requested versions and content_map contains the KnitContents.

861

Both dicts take version_ids as their keys.

862

"""

863

for version_id in version_ids:

864

if not self.has_version(version_id):

865

raise RevisionNotPresent(version_id, self.filename)

866

record_map = self._get_record_map(version_ids)

867

868

text_map = {}

869

content_map = {}

870

final_content = {}

871

for version_id in version_ids:

872

components = []

873

cursor = version_id

874

while cursor is not None:

875

method, data, digest, next = record_map[cursor]

876

components.append((cursor, method, data, digest))

877

if cursor in content_map:

878

break

879

cursor = next

880

881

content = None

882

for component_id, method, data, digest in reversed(components):

883

if component_id in content_map:

884

content = content_map[component_id]

885

else:

886

if method == 'fulltext':

887

assert content is None

888

content = self.factory.parse_fulltext(data, version_id)

889

elif method == 'line-delta':

890

delta = self.factory.parse_line_delta(data, version_id)

891

content = content.copy()

892

content._lines = self._apply_delta(content._lines,

893

delta)

894

content_map[component_id] = content

895

896

if 'no-eol' in self._index.get_options(version_id):

897

content = content.copy()

898

line = content._lines[-1][1].rstrip('\n')

899

content._lines[-1] = (content._lines[-1][0], line)

900

final_content[version_id] = content

901

902

# digest here is the digest from the last applied component.

903

text = content.text()

904

if sha_strings(text) != digest:

905

raise KnitCorrupt(self.filename,

906

'sha-1 does not match %s' % version_id)

907

908

text_map[version_id] = text

909

return text_map, final_content

910

911

def iter_lines_added_or_present_in_versions(self, version_ids=None,

912

pb=None):

913

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

914

if version_ids is None:

915

version_ids = self.versions()

916

else:

917

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

918

if pb is None:

919

pb = progress.DummyProgress()

920

# we don't care about inclusions, the caller cares.

921

# but we need to setup a list of records to visit.

922

# we need version_id, position, length

923

version_id_records = []

924

requested_versions = set(version_ids)

925

# filter for available versions

926

for version_id in requested_versions:

927

if not self.has_version(version_id):

928

raise RevisionNotPresent(version_id, self.filename)

929

# get a in-component-order queue:

930

for version_id in self.versions():

931

if version_id in requested_versions:

932

data_pos, length = self._index.get_position(version_id)

933

version_id_records.append((version_id, data_pos, length))

934

935

total = len(version_id_records)

936

for version_idx, (version_id, data, sha_value) in \

937

enumerate(self._data.read_records_iter(version_id_records)):

938

pb.update('Walking content.', version_idx, total)

939

method = self._index.get_method(version_id)

940

941

assert method in ('fulltext', 'line-delta')

942

if method == 'fulltext':

943

line_iterator = self.factory.get_fulltext_content(data)

944

else:

945

line_iterator = self.factory.get_linedelta_content(data)

946

for line in line_iterator:

947

yield line

948

949

pb.update('Walking content.', total, total)

950

951

def iter_parents(self, version_ids):

952

"""Iterate through the parents for many version ids.

953

954

:param version_ids: An iterable yielding version_ids.

955

:return: An iterator that yields (version_id, parents). Requested

956

version_ids not present in the versioned file are simply skipped.

957

The order is undefined, allowing for different optimisations in

958

the underlying implementation.

959

"""

960

version_ids = [osutils.safe_revision_id(version_id) for

961

version_id in version_ids]

962

return self._index.iter_parents(version_ids)

963

964

def num_versions(self):

965

"""See VersionedFile.num_versions()."""

966

return self._index.num_versions()

967

968

__len__ = num_versions

969

970

def annotate_iter(self, version_id):

971

"""See VersionedFile.annotate_iter."""

972

version_id = osutils.safe_revision_id(version_id)

973

content = self._get_content(version_id)

974

for origin, text in content.annotate_iter():

975

yield origin, text

976

977

def get_parents(self, version_id):

978

"""See VersionedFile.get_parents."""

979

# perf notes:

980

# optimism counts!

981

# 52554 calls in 1264 872 internal down from 3674

982

version_id = osutils.safe_revision_id(version_id)

983

try:

984

return self._index.get_parents(version_id)

985

except KeyError:

986

raise RevisionNotPresent(version_id, self.filename)

987

988

def get_parents_with_ghosts(self, version_id):

989

"""See VersionedFile.get_parents."""

990

version_id = osutils.safe_revision_id(version_id)

991

try:

992

return self._index.get_parents_with_ghosts(version_id)

993

except KeyError:

994

raise RevisionNotPresent(version_id, self.filename)

995

996

def get_ancestry(self, versions, topo_sorted=True):

997

"""See VersionedFile.get_ancestry."""

998

if isinstance(versions, basestring):

999

versions = [versions]

1000

if not versions:

1001

return []

1002

versions = [osutils.safe_revision_id(v) for v in versions]

1003

return self._index.get_ancestry(versions, topo_sorted)

1004

1005

def get_ancestry_with_ghosts(self, versions):

1006

"""See VersionedFile.get_ancestry_with_ghosts."""

1007

if isinstance(versions, basestring):

1008

versions = [versions]

1009

if not versions:

1010

return []

1011

versions = [osutils.safe_revision_id(v) for v in versions]

1012

return self._index.get_ancestry_with_ghosts(versions)

1013

1014

#@deprecated_method(zero_eight)

1015

def walk(self, version_ids):

1016

"""See VersionedFile.walk."""

1017

# We take the short path here, and extract all relevant texts

1018

# and put them in a weave and let that do all the work. Far

1019

# from optimal, but is much simpler.

1020

# FIXME RB 20060228 this really is inefficient!

1021

from bzrlib.weave import Weave

1022

1023

w = Weave(self.filename)

1024

ancestry = set(self.get_ancestry(version_ids, topo_sorted=False))

1025

sorted_graph = topo_sort(self._index.get_graph())

1026

version_list = [vid for vid in sorted_graph if vid in ancestry]

1027

1028

for version_id in version_list:

1029

lines = self.get_lines(version_id)

1030

w.add_lines(version_id, self.get_parents(version_id), lines)

1031

1032

for lineno, insert_id, dset, line in w.walk(version_ids):

1033

yield lineno, insert_id, dset, line

1034

1035

def plan_merge(self, ver_a, ver_b):

1036

"""See VersionedFile.plan_merge."""

1037

ver_a = osutils.safe_revision_id(ver_a)

1038

ver_b = osutils.safe_revision_id(ver_b)

1039

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1040

1041

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1042

annotated_a = self.annotate(ver_a)

1043

annotated_b = self.annotate(ver_b)

1044

return merge._plan_annotate_merge(annotated_a, annotated_b,

1045

ancestors_a, ancestors_b)

1046

1047

1048

class _KnitComponentFile(object):

1049

"""One of the files used to implement a knit database"""

1050

1051

def __init__(self, transport, filename, mode, file_mode=None,

1052

create_parent_dir=False, dir_mode=None):

1053

self._transport = transport

1054

self._filename = filename

1055

self._mode = mode

1056

self._file_mode = file_mode

1057

self._dir_mode = dir_mode

1058

self._create_parent_dir = create_parent_dir

1059

self._need_to_create = False

1060

1061

def _full_path(self):

1062

"""Return the full path to this file."""

1063

return self._transport.base + self._filename

1064

1065

def check_header(self, fp):

1066

line = fp.readline()

1067

if line == '':

1068

# An empty file can actually be treated as though the file doesn't

1069

# exist yet.

1070

raise errors.NoSuchFile(self._full_path())

1071

if line != self.HEADER:

1072

raise KnitHeaderError(badline=line,

1073

filename=self._transport.abspath(self._filename))

1074

1075

def commit(self):

1076

"""Commit is a nop."""

1077

1078

def __repr__(self):

1079

return '%s(%s)' % (self.__class__.__name__, self._filename)

1080

1081

1082

class _KnitIndex(_KnitComponentFile):

1083

"""Manages knit index file.

1084

1085

The index is already kept in memory and read on startup, to enable

1086

fast lookups of revision information. The cursor of the index

1087

file is always pointing to the end, making it easy to append

1088

entries.

1089

1090

_cache is a cache for fast mapping from version id to a Index

1091

object.

1092

1093

_history is a cache for fast mapping from indexes to version ids.

1094

1095

The index data format is dictionary compressed when it comes to

1096

parent references; a index entry may only have parents that with a

1097

lover index number. As a result, the index is topological sorted.

1098

1099

Duplicate entries may be written to the index for a single version id

1100

if this is done then the latter one completely replaces the former:

1101

this allows updates to correct version and parent information.

1102

Note that the two entries may share the delta, and that successive

1103

annotations and references MUST point to the first entry.

1104

1105

The index file on disc contains a header, followed by one line per knit

1106

record. The same revision can be present in an index file more than once.

1107

The first occurrence gets assigned a sequence number starting from 0.

1108

1109

The format of a single line is

1110

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1111

REVISION_ID is a utf8-encoded revision id

1112

FLAGS is a comma separated list of flags about the record. Values include

1113

no-eol, line-delta, fulltext.

1114

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1115

that the the compressed data starts at.

1116

LENGTH is the ascii representation of the length of the data file.

1117

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1118

REVISION_ID.

1119

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1120

revision id already in the knit that is a parent of REVISION_ID.

1121

The ' :' marker is the end of record marker.

1122

1123

partial writes:

1124

when a write is interrupted to the index file, it will result in a line

1125

that does not end in ' :'. If the ' :' is not present at the end of a line,

1126

or at the end of the file, then the record that is missing it will be

1127

ignored by the parser.

1128

1129

When writing new records to the index file, the data is preceded by '\n'

1130

to ensure that records always start on new lines even if the last write was

1131

interrupted. As a result its normal for the last line in the index to be

1132

missing a trailing newline. One can be added with no harmful effects.

1133

"""

1134

1135

HEADER = "# bzr knit index 8\n"

1136

1137

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1138

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1139

1140

def _cache_version(self, version_id, options, pos, size, parents):

1141

"""Cache a version record in the history array and index cache.

1142

1143

This is inlined into _load_data for performance. KEEP IN SYNC.

1144

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1145

indexes).

1146

"""

1147

# only want the _history index to reference the 1st index entry

1148

# for version_id

1149

if version_id not in self._cache:

1150

index = len(self._history)

1151

self._history.append(version_id)

1152

else:

1153

index = self._cache[version_id][5]

1154

self._cache[version_id] = (version_id,

1155

options,

1156

pos,

1157

size,

1158

parents,

1159

index)

1160

1161

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1162

create_parent_dir=False, delay_create=False, dir_mode=None):

1163

_KnitComponentFile.__init__(self, transport, filename, mode,

1164

file_mode=file_mode,

1165

create_parent_dir=create_parent_dir,

1166

dir_mode=dir_mode)

1167

self._cache = {}

1168

# position in _history is the 'official' index for a revision

1169

# but the values may have come from a newer entry.

1170

# so - wc -l of a knit index is != the number of unique names

1171

# in the knit.

1172

self._history = []

1173

try:

1174

fp = self._transport.get(self._filename)

1175

try:

1176

# _load_data may raise NoSuchFile if the target knit is

1177

# completely empty.

1178

_load_data(self, fp)

1179

finally:

1180

fp.close()

1181

except NoSuchFile:

1182

if mode != 'w' or not create:

1183

raise

1184

elif delay_create:

1185

self._need_to_create = True

1186

else:

1187

self._transport.put_bytes_non_atomic(

1188

self._filename, self.HEADER, mode=self._file_mode)

1189

1190

def get_graph(self):

1191

"""Return a list of the node:parents lists from this knit index."""

1192

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1193

1194

def get_ancestry(self, versions, topo_sorted=True):

1195

"""See VersionedFile.get_ancestry."""

1196

# get a graph of all the mentioned versions:

1197

graph = {}

1198

pending = set(versions)

1199

cache = self._cache

1200

while pending:

1201

version = pending.pop()

1202

# trim ghosts

1203

try:

1204

parents = [p for p in cache[version][4] if p in cache]

1205

except KeyError:

1206

raise RevisionNotPresent(version, self._filename)

1207

# if not completed and not a ghost

1208

pending.update([p for p in parents if p not in graph])

1209

graph[version] = parents

1210

if not topo_sorted:

1211

return graph.keys()

1212

return topo_sort(graph.items())

1213

1214

def get_ancestry_with_ghosts(self, versions):

1215

"""See VersionedFile.get_ancestry_with_ghosts."""

1216

# get a graph of all the mentioned versions:

1217

self.check_versions_present(versions)

1218

cache = self._cache

1219

graph = {}

1220

pending = set(versions)

1221

while pending:

1222

version = pending.pop()

1223

try:

1224

parents = cache[version][4]

1225

except KeyError:

1226

# ghost, fake it

1227

graph[version] = []

1228

else:

1229

# if not completed

1230

pending.update([p for p in parents if p not in graph])

1231

graph[version] = parents

1232

return topo_sort(graph.items())

1233

1234

def iter_parents(self, version_ids):

1235

"""Iterate through the parents for many version ids.

1236

1237

:param version_ids: An iterable yielding version_ids.

1238

:return: An iterator that yields (version_id, parents). Requested

1239

version_ids not present in the versioned file are simply skipped.

1240

The order is undefined, allowing for different optimisations in

1241

the underlying implementation.

1242

"""

1243

for version_id in version_ids:

1244

try:

1245

yield version_id, tuple(self.get_parents(version_id))

1246

except KeyError:

1247

pass

1248

1249

def num_versions(self):

1250

return len(self._history)

1251

1252

__len__ = num_versions

1253

1254

def get_versions(self):

1255

"""Get all the versions in the file. not topologically sorted."""

1256

return self._history

1257

1258

def _version_list_to_index(self, versions):

1259

result_list = []

1260

cache = self._cache

1261

for version in versions:

1262

if version in cache:

1263

# -- inlined lookup() --

1264

result_list.append(str(cache[version][5]))

1265

# -- end lookup () --

1266

else:

1267

result_list.append('.' + version)

1268

return ' '.join(result_list)

1269

1270

def add_version(self, version_id, options, pos, size, parents):

1271

"""Add a version record to the index."""

1272

self.add_versions(((version_id, options, pos, size, parents),))

1273

1274

def add_versions(self, versions):

1275

"""Add multiple versions to the index.

1276

1277

:param versions: a list of tuples:

1278

(version_id, options, pos, size, parents).

1279

"""

1280

lines = []

1281

orig_history = self._history[:]

1282

orig_cache = self._cache.copy()

1283

1284

try:

1285

for version_id, options, pos, size, parents in versions:

1286

line = "\n%s %s %s %s %s :" % (version_id,

1287

','.join(options),

1288

pos,

1289

size,

1290

self._version_list_to_index(parents))

1291

assert isinstance(line, str), \

1292

'content must be utf-8 encoded: %r' % (line,)

1293

lines.append(line)

1294

self._cache_version(version_id, options, pos, size, parents)

1295

if not self._need_to_create:

1296

self._transport.append_bytes(self._filename, ''.join(lines))

1297

else:

1298

sio = StringIO()

1299

sio.write(self.HEADER)

1300

sio.writelines(lines)

1301

sio.seek(0)

1302

self._transport.put_file_non_atomic(self._filename, sio,

1303

create_parent_dir=self._create_parent_dir,

1304

mode=self._file_mode,

1305

dir_mode=self._dir_mode)

1306

self._need_to_create = False

1307

except:

1308

# If any problems happen, restore the original values and re-raise

1309

self._history = orig_history

1310

self._cache = orig_cache

1311

raise

1312

1313

def has_version(self, version_id):

1314

"""True if the version is in the index."""

1315

return version_id in self._cache

1316

1317

def get_position(self, version_id):

1318

"""Return data position and size of specified version."""

1319

entry = self._cache[version_id]

1320

return entry[2], entry[3]

1321

1322

def get_method(self, version_id):

1323

"""Return compression method of specified version."""

1324

options = self._cache[version_id][1]

1325

if 'fulltext' in options:

1326

return 'fulltext'

1327

else:

1328

if 'line-delta' not in options:

1329

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1330

return 'line-delta'

1331

1332

def get_options(self, version_id):

1333

"""Return a string represention options.

1334

1335

e.g. foo,bar

1336

"""

1337

return self._cache[version_id][1]

1338

1339

def get_parents(self, version_id):

1340

"""Return parents of specified version ignoring ghosts."""

1341

return [parent for parent in self._cache[version_id][4]

1342

if parent in self._cache]

1343

1344

def get_parents_with_ghosts(self, version_id):

1345

"""Return parents of specified version with ghosts."""

1346

return self._cache[version_id][4]

1347

1348

def check_versions_present(self, version_ids):

1349

"""Check that all specified versions are present."""

1350

cache = self._cache

1351

for version_id in version_ids:

1352

if version_id not in cache:

1353

raise RevisionNotPresent(version_id, self._filename)

1354

1355

1356

class KnitGraphIndex(object):

1357

"""A knit index that builds on GraphIndex."""

1358

1359

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1360

"""Construct a KnitGraphIndex on a graph_index.

1361

1362

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1363

:param deltas: Allow delta-compressed records.

1364

:param add_callback: If not None, allow additions to the index and call

1365

this callback with a list of added GraphIndex nodes:

1366

[(node, value, node_refs), ...]

1367

:param parents: If True, record knits parents, if not do not record

1368

parents.

1369

"""

1370

self._graph_index = graph_index

1371

self._deltas = deltas

1372

self._add_callback = add_callback

1373

self._parents = parents

1374

if deltas and not parents:

1375

raise KnitCorrupt(self, "Cannot do delta compression without "

1376

"parent tracking.")

1377

1378

def _get_entries(self, keys, check_present=False):

1379

"""Get the entries for keys.

1380

1381

:param keys: An iterable of index keys, - 1-tuples.

1382

"""

1383

keys = set(keys)

1384

found_keys = set()

1385

if self._parents:

1386

for node in self._graph_index.iter_entries(keys):

1387

yield node

1388

found_keys.add(node[1])

1389

else:

1390

# adapt parentless index to the rest of the code.

1391

for node in self._graph_index.iter_entries(keys):

1392

yield node[0], node[1], node[2], ()

1393

found_keys.add(node[1])

1394

if check_present:

1395

missing_keys = keys.difference(found_keys)

1396

if missing_keys:

1397

raise RevisionNotPresent(missing_keys.pop(), self)

1398

1399

def _present_keys(self, version_ids):

1400

return set([

1401

node[1] for node in self._get_entries(version_ids)])

1402

1403

def _parentless_ancestry(self, versions):

1404

"""Honour the get_ancestry API for parentless knit indices."""

1405

wanted_keys = self._version_ids_to_keys(versions)

1406

present_keys = self._present_keys(wanted_keys)

1407

missing = set(wanted_keys).difference(present_keys)

1408

if missing:

1409

raise RevisionNotPresent(missing.pop(), self)

1410

return list(self._keys_to_version_ids(present_keys))

1411

1412

def get_ancestry(self, versions, topo_sorted=True):

1413

"""See VersionedFile.get_ancestry."""

1414

if not self._parents:

1415

return self._parentless_ancestry(versions)

1416

# XXX: This will do len(history) index calls - perhaps

1417

# it should be altered to be a index core feature?

1418

# get a graph of all the mentioned versions:

1419

graph = {}

1420

ghosts = set()

1421

versions = self._version_ids_to_keys(versions)

1422

pending = set(versions)

1423

while pending:

1424

# get all pending nodes

1425

this_iteration = pending

1426

new_nodes = self._get_entries(this_iteration)

1427

found = set()

1428

pending = set()

1429

for (index, key, value, node_refs) in new_nodes:

1430

# dont ask for ghosties - otherwise

1431

# we we can end up looping with pending

1432

# being entirely ghosted.

1433

graph[key] = [parent for parent in node_refs[0]

1434

if parent not in ghosts]

1435

# queue parents

1436

for parent in graph[key]:

1437

# dont examine known nodes again

1438

if parent in graph:

1439

continue

1440

pending.add(parent)

1441

found.add(key)

1442

ghosts.update(this_iteration.difference(found))

1443

if versions.difference(graph):

1444

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1445

if topo_sorted:

1446

result_keys = topo_sort(graph.items())

1447

else:

1448

result_keys = graph.iterkeys()

1449

return [key[0] for key in result_keys]

1450

1451

def get_ancestry_with_ghosts(self, versions):

1452

"""See VersionedFile.get_ancestry."""

1453

if not self._parents:

1454

return self._parentless_ancestry(versions)

1455

# XXX: This will do len(history) index calls - perhaps

1456

# it should be altered to be a index core feature?

1457

# get a graph of all the mentioned versions:

1458

graph = {}

1459

versions = self._version_ids_to_keys(versions)

1460

pending = set(versions)

1461

while pending:

1462

# get all pending nodes

1463

this_iteration = pending

1464

new_nodes = self._get_entries(this_iteration)

1465

pending = set()

1466

for (index, key, value, node_refs) in new_nodes:

1467

graph[key] = node_refs[0]

1468

# queue parents

1469

for parent in graph[key]:

1470

# dont examine known nodes again

1471

if parent in graph:

1472

continue

1473

pending.add(parent)

1474

missing_versions = this_iteration.difference(graph)

1475

missing_needed = versions.intersection(missing_versions)

1476

if missing_needed:

1477

raise RevisionNotPresent(missing_needed.pop(), self)

1478

for missing_version in missing_versions:

1479

# add a key, no parents

1480

graph[missing_version] = []

1481

pending.discard(missing_version) # don't look for it

1482

result_keys = topo_sort(graph.items())

1483

return [key[0] for key in result_keys]

1484

1485

def get_graph(self):

1486

"""Return a list of the node:parents lists from this knit index."""

1487

if not self._parents:

1488

return [(key, ()) for key in self.get_versions()]

1489

result = []

1490

for index, key, value, refs in self._graph_index.iter_all_entries():

1491

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1492

return result

1493

1494

def iter_parents(self, version_ids):

1495

"""Iterate through the parents for many version ids.

1496

1497

:param version_ids: An iterable yielding version_ids.

1498

:return: An iterator that yields (version_id, parents). Requested

1499

version_ids not present in the versioned file are simply skipped.

1500

The order is undefined, allowing for different optimisations in

1501

the underlying implementation.

1502

"""

1503

if self._parents:

1504

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1505

all_parents = set()

1506

present_parents = set()

1507

for node in all_nodes:

1508

all_parents.update(node[3][0])

1509

# any node we are querying must be present

1510

present_parents.add(node[1])

1511

unknown_parents = all_parents.difference(present_parents)

1512

present_parents.update(self._present_keys(unknown_parents))

1513

for node in all_nodes:

1514

parents = []

1515

for parent in node[3][0]:

1516

if parent in present_parents:

1517

parents.append(parent[0])

1518

yield node[1][0], tuple(parents)

1519

else:

1520

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1521

yield node[1][0], ()

1522

1523

def num_versions(self):

1524

return len(list(self._graph_index.iter_all_entries()))

1525

1526

__len__ = num_versions

1527

1528

def get_versions(self):

1529

"""Get all the versions in the file. not topologically sorted."""

1530

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1531

1532

def has_version(self, version_id):

1533

"""True if the version is in the index."""

1534

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1535

1536

def _keys_to_version_ids(self, keys):

1537

return tuple(key[0] for key in keys)

1538

1539

def get_position(self, version_id):

1540

"""Return data position and size of specified version."""

1541

bits = self._get_node(version_id)[2][1:].split(' ')

1542

return int(bits[0]), int(bits[1])

1543

1544

def get_method(self, version_id):

1545

"""Return compression method of specified version."""

1546

if not self._deltas:

1547

return 'fulltext'

1548

return self._parent_compression(self._get_node(version_id)[3][1])

1549

1550

def _parent_compression(self, reference_list):

1551

# use the second reference list to decide if this is delta'd or not.

1552

if len(reference_list):

1553

return 'line-delta'

1554

else:

1555

return 'fulltext'

1556

1557

def _get_node(self, version_id):

1558

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1559

1560

def get_options(self, version_id):

1561

"""Return a string represention options.

1562

1563

e.g. foo,bar

1564

"""

1565

node = self._get_node(version_id)

1566

if not self._deltas:

1567

options = ['fulltext']

1568

else:

1569

options = [self._parent_compression(node[3][1])]

1570

if node[2][0] == 'N':

1571

options.append('no-eol')

1572

return ','.join(options)

1573

1574

def get_parents(self, version_id):

1575

"""Return parents of specified version ignoring ghosts."""

1576

parents = list(self.iter_parents([version_id]))

1577

if not parents:

1578

# missing key

1579

raise errors.RevisionNotPresent(version_id, self)

1580

return parents[0][1]

1581

1582

def get_parents_with_ghosts(self, version_id):

1583

"""Return parents of specified version with ghosts."""

1584

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1585

check_present=True))

1586

if not self._parents:

1587

return ()

1588

return self._keys_to_version_ids(nodes[0][3][0])

1589

1590

def check_versions_present(self, version_ids):

1591

"""Check that all specified versions are present."""

1592

keys = self._version_ids_to_keys(version_ids)

1593

present = self._present_keys(keys)

1594

missing = keys.difference(present)

1595

if missing:

1596

raise RevisionNotPresent(missing.pop(), self)

1597

1598

def add_version(self, version_id, options, pos, size, parents):

1599

"""Add a version record to the index."""

1600

return self.add_versions(((version_id, options, pos, size, parents),))

1601

1602

def add_versions(self, versions):

1603

"""Add multiple versions to the index.

1604

1605

This function does not insert data into the Immutable GraphIndex

1606

backing the KnitGraphIndex, instead it prepares data for insertion by

1607

the caller and checks that it is safe to insert then calls

1608

self._add_callback with the prepared GraphIndex nodes.

1609

1610

:param versions: a list of tuples:

1611

(version_id, options, pos, size, parents).

1612

"""

1613

if not self._add_callback:

1614

raise errors.ReadOnlyError(self)

1615

# we hope there are no repositories with inconsistent parentage

1616

# anymore.

1617

# check for dups

1618

1619

keys = {}

1620

for (version_id, options, pos, size, parents) in versions:

1621

# index keys are tuples:

1622

key = (version_id, )

1623

parents = tuple((parent, ) for parent in parents)

1624

if 'no-eol' in options:

1625

value = 'N'

1626

else:

1627

value = ' '

1628

value += "%d %d" % (pos, size)

1629

if not self._deltas:

1630

if 'line-delta' in options:

1631

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1632

if self._parents:

1633

if self._deltas:

1634

if 'line-delta' in options:

1635

node_refs = (parents, (parents[0],))

1636

else:

1637

node_refs = (parents, ())

1638

else:

1639

node_refs = (parents, )

1640

else:

1641

if parents:

1642

raise KnitCorrupt(self, "attempt to add node with parents "

1643

"in parentless index.")

1644

node_refs = ()

1645

keys[key] = (value, node_refs)

1646

present_nodes = self._get_entries(keys)

1647

for (index, key, value, node_refs) in present_nodes:

1648

if (value, node_refs) != keys[key]:

1649

raise KnitCorrupt(self, "inconsistent details in add_versions"

1650

": %s %s" % ((value, node_refs), keys[key]))

1651

del keys[key]

1652

result = []

1653

if self._parents:

1654

for key, (value, node_refs) in keys.iteritems():

1655

result.append((key, value, node_refs))

1656

else:

1657

for key, (value, node_refs) in keys.iteritems():

1658

result.append((key, value))

1659

self._add_callback(result)

1660

1661

def _version_ids_to_keys(self, version_ids):

1662

return set((version_id, ) for version_id in version_ids)

1663

1664

1665

class _KnitData(_KnitComponentFile):

1666

"""Contents of the knit data file"""

1667

1668

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1669

create_parent_dir=False, delay_create=False,

1670

dir_mode=None):

1671

_KnitComponentFile.__init__(self, transport, filename, mode,

1672

file_mode=file_mode,

1673

create_parent_dir=create_parent_dir,

1674

dir_mode=dir_mode)

1675

self._checked = False

1676

# TODO: jam 20060713 conceptually, this could spill to disk

1677

# if the cached size gets larger than a certain amount

1678

# but it complicates the model a bit, so for now just use

1679

# a simple dictionary

1680

self._cache = {}

1681

self._do_cache = False

1682

if create:

1683

if delay_create:

1684

self._need_to_create = create

1685

else:

1686

self._transport.put_bytes_non_atomic(self._filename, '',

1687

mode=self._file_mode)

1688

1689

def enable_cache(self):

1690

"""Enable caching of reads."""

1691

self._do_cache = True

1692

1693

def clear_cache(self):

1694

"""Clear the record cache."""

1695

self._do_cache = False

1696

self._cache = {}

1697

1698

def _open_file(self):

1699

try:

1700

return self._transport.get(self._filename)

1701

except NoSuchFile:

1702

pass

1703

return None

1704

1705

def _record_to_data(self, version_id, digest, lines):

1706

"""Convert version_id, digest, lines into a raw data block.

1707

1708

:return: (len, a StringIO instance with the raw data ready to read.)

1709

"""

1710

sio = StringIO()

1711

data_file = GzipFile(None, mode='wb', fileobj=sio)

1712

1713

assert isinstance(version_id, str)

1714

data_file.writelines(chain(

1715

["version %s %d %s\n" % (version_id,

1716

len(lines),

1717

digest)],

1718

lines,

1719

["end %s\n" % version_id]))

1720

data_file.close()

1721

length= sio.tell()

1722

1723

sio.seek(0)

1724

return length, sio

1725

1726

def add_raw_record(self, raw_data):

1727

"""Append a prepared record to the data file.

1728

1729

:return: the offset in the data file raw_data was written.

1730

"""

1731

assert isinstance(raw_data, str), 'data must be plain bytes'

1732

if not self._need_to_create:

1733

return self._transport.append_bytes(self._filename, raw_data)

1734

else:

1735

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1736

create_parent_dir=self._create_parent_dir,

1737

mode=self._file_mode,

1738

dir_mode=self._dir_mode)

1739

self._need_to_create = False

1740

return 0

1741

1742

def add_record(self, version_id, digest, lines):

1743

"""Write new text record to disk. Returns the position in the

1744

file where it was written."""

1745

size, sio = self._record_to_data(version_id, digest, lines)

1746

# write to disk

1747

if not self._need_to_create:

1748

start_pos = self._transport.append_file(self._filename, sio)

1749

else:

1750

self._transport.put_file_non_atomic(self._filename, sio,

1751

create_parent_dir=self._create_parent_dir,

1752

mode=self._file_mode,

1753

dir_mode=self._dir_mode)

1754

self._need_to_create = False

1755

start_pos = 0

1756

if self._do_cache:

1757

self._cache[version_id] = sio.getvalue()

1758

return start_pos, size

1759

1760

def _parse_record_header(self, version_id, raw_data):

1761

"""Parse a record header for consistency.

1762

1763

:return: the header and the decompressor stream.

1764

as (stream, header_record)

1765

"""

1766

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1767

try:

1768

rec = self._check_header(version_id, df.readline())

1769

except Exception, e:

1770

raise KnitCorrupt(self._filename,

1771

"While reading {%s} got %s(%s)"

1772

% (version_id, e.__class__.__name__, str(e)))

1773

return df, rec

1774

1775

def _check_header(self, version_id, line):

1776

rec = line.split()

1777

if len(rec) != 4:

1778

raise KnitCorrupt(self._filename,

1779

'unexpected number of elements in record header')

1780

if rec[1] != version_id:

1781

raise KnitCorrupt(self._filename,

1782

'unexpected version, wanted %r, got %r'

1783

% (version_id, rec[1]))

1784

return rec

1785

1786

def _parse_record(self, version_id, data):

1787

# profiling notes:

1788

# 4168 calls in 2880 217 internal

1789

# 4168 calls to _parse_record_header in 2121

1790

# 4168 calls to readlines in 330

1791

df = GzipFile(mode='rb', fileobj=StringIO(data))

1792

1793

try:

1794

record_contents = df.readlines()

1795

except Exception, e:

1796

raise KnitCorrupt(self._filename,

1797

"While reading {%s} got %s(%s)"

1798

% (version_id, e.__class__.__name__, str(e)))

1799

header = record_contents.pop(0)

1800

rec = self._check_header(version_id, header)

1801

1802

last_line = record_contents.pop()

1803

if len(record_contents) != int(rec[2]):

1804

raise KnitCorrupt(self._filename,

1805

'incorrect number of lines %s != %s'

1806

' for version {%s}'

1807

% (len(record_contents), int(rec[2]),

1808

version_id))

1809

if last_line != 'end %s\n' % rec[1]:

1810

raise KnitCorrupt(self._filename,

1811

'unexpected version end line %r, wanted %r'

1812

% (last_line, version_id))

1813

df.close()

1814

return record_contents, rec[3]

1815

1816

def read_records_iter_raw(self, records):

1817

"""Read text records from data file and yield raw data.

1818

1819

This unpacks enough of the text record to validate the id is

1820

as expected but thats all.

1821

"""

1822

# setup an iterator of the external records:

1823

# uses readv so nice and fast we hope.

1824

if len(records):

1825

# grab the disk data needed.

1826

if self._cache:

1827

# Don't check _cache if it is empty

1828

needed_offsets = [(pos, size) for version_id, pos, size

1829

in records

1830

if version_id not in self._cache]

1831

else:

1832

needed_offsets = [(pos, size) for version_id, pos, size

1833

in records]

1834

1835

raw_records = self._transport.readv(self._filename, needed_offsets)

1836

1837

for version_id, pos, size in records:

1838

if version_id in self._cache:

1839

# This data has already been validated

1840

data = self._cache[version_id]

1841

else:

1842

pos, data = raw_records.next()

1843

if self._do_cache:

1844

self._cache[version_id] = data

1845

1846

# validate the header

1847

df, rec = self._parse_record_header(version_id, data)

1848

df.close()

1849

yield version_id, data

1850

1851

def read_records_iter(self, records):

1852

"""Read text records from data file and yield result.

1853

1854

The result will be returned in whatever is the fastest to read.

1855

Not by the order requested. Also, multiple requests for the same

1856

record will only yield 1 response.

1857

:param records: A list of (version_id, pos, len) entries

1858

:return: Yields (version_id, contents, digest) in the order

1859

read, not the order requested

1860

"""

1861

if not records:

1862

return

1863

1864

if self._cache:

1865

# Skip records we have alread seen

1866

yielded_records = set()

1867

needed_records = set()

1868

for record in records:

1869

if record[0] in self._cache:

1870

if record[0] in yielded_records:

1871

continue

1872

yielded_records.add(record[0])

1873

data = self._cache[record[0]]

1874

content, digest = self._parse_record(record[0], data)

1875

yield (record[0], content, digest)

1876

else:

1877

needed_records.add(record)

1878

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1879

else:

1880

needed_records = sorted(set(records), key=operator.itemgetter(1))

1881

1882

if not needed_records:

1883

return

1884

1885

# The transport optimizes the fetching as well

1886

# (ie, reads continuous ranges.)

1887

readv_response = self._transport.readv(self._filename,

1888

[(pos, size) for version_id, pos, size in needed_records])

1889

1890

for (version_id, pos, size), (pos, data) in \

1891

izip(iter(needed_records), readv_response):

1892

content, digest = self._parse_record(version_id, data)

1893

if self._do_cache:

1894

self._cache[version_id] = data

1895

yield version_id, content, digest

1896

1897

def read_records(self, records):

1898

"""Read records into a dictionary."""

1899

components = {}

1900

for record_id, content, digest in \

1901

self.read_records_iter(records):

1902

components[record_id] = (content, digest)

1903

return components

1904

1905

1906

class InterKnit(InterVersionedFile):

1907

"""Optimised code paths for knit to knit operations."""

1908

1909

_matching_file_from_factory = KnitVersionedFile

1910

_matching_file_to_factory = KnitVersionedFile

1911

1912

@staticmethod

1913

def is_compatible(source, target):

1914

"""Be compatible with knits. """

1915

try:

1916

return (isinstance(source, KnitVersionedFile) and

1917

isinstance(target, KnitVersionedFile))

1918

except AttributeError:

1919

return False

1920

1921

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1922

"""See InterVersionedFile.join."""

1923

assert isinstance(self.source, KnitVersionedFile)

1924

assert isinstance(self.target, KnitVersionedFile)

1925

1926

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1927

1928

if not version_ids:

1929

return 0

1930

1931

pb = ui.ui_factory.nested_progress_bar()

1932

try:

1933

version_ids = list(version_ids)

1934

if None in version_ids:

1935

version_ids.remove(None)

1936

1937

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1938

this_versions = set(self.target._index.get_versions())

1939

needed_versions = self.source_ancestry - this_versions

1940

cross_check_versions = self.source_ancestry.intersection(this_versions)

1941

mismatched_versions = set()

1942

for version in cross_check_versions:

1943

# scan to include needed parents.

1944

n1 = set(self.target.get_parents_with_ghosts(version))

1945

n2 = set(self.source.get_parents_with_ghosts(version))

1946

if n1 != n2:

1947

# FIXME TEST this check for cycles being introduced works

1948

# the logic is we have a cycle if in our graph we are an

1949

# ancestor of any of the n2 revisions.

1950

for parent in n2:

1951

if parent in n1:

1952

# safe

1953

continue

1954

else:

1955

parent_ancestors = self.source.get_ancestry(parent)

1956

if version in parent_ancestors:

1957

raise errors.GraphCycleError([parent, version])

1958

# ensure this parent will be available later.

1959

new_parents = n2.difference(n1)

1960

needed_versions.update(new_parents.difference(this_versions))

1961

mismatched_versions.add(version)

1962

1963

if not needed_versions and not mismatched_versions:

1964

return 0

1965

full_list = topo_sort(self.source.get_graph())

1966

1967

version_list = [i for i in full_list if (not self.target.has_version(i)

1968

and i in needed_versions)]

1969

1970

# plan the join:

1971

copy_queue = []

1972

copy_queue_records = []

1973

copy_set = set()

1974

for version_id in version_list:

1975

options = self.source._index.get_options(version_id)

1976

parents = self.source._index.get_parents_with_ghosts(version_id)

1977

# check that its will be a consistent copy:

1978

for parent in parents:

1979

# if source has the parent, we must :

1980

# * already have it or

1981

# * have it scheduled already

1982

# otherwise we don't care

1983

assert (self.target.has_version(parent) or

1984

parent in copy_set or

1985

not self.source.has_version(parent))

1986

data_pos, data_size = self.source._index.get_position(version_id)

1987

copy_queue_records.append((version_id, data_pos, data_size))

1988

copy_queue.append((version_id, options, parents))

1989

copy_set.add(version_id)

1990

1991

# data suck the join:

1992

count = 0

1993

total = len(version_list)

1994

raw_datum = []

1995

raw_records = []

1996

for (version_id, raw_data), \

1997

(version_id2, options, parents) in \

1998

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1999

copy_queue):

2000

assert version_id == version_id2, 'logic error, inconsistent results'

2001

count = count + 1

2002

pb.update("Joining knit", count, total)

2003

raw_records.append((version_id, options, parents, len(raw_data)))

2004

raw_datum.append(raw_data)

2005

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2006

2007

for version in mismatched_versions:

2008

# FIXME RBC 20060309 is this needed?

2009

n1 = set(self.target.get_parents_with_ghosts(version))

2010

n2 = set(self.source.get_parents_with_ghosts(version))

2011

# write a combined record to our history preserving the current

2012

# parents as first in the list

2013

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2014

self.target.fix_parents(version, new_parents)

2015

return count

2016

finally:

2017

pb.finished()

2018

2019

2020

InterVersionedFile.register_optimiser(InterKnit)

2021

2022

2023

class WeaveToKnit(InterVersionedFile):

2024

"""Optimised code paths for weave to knit operations."""

2025

2026

_matching_file_from_factory = bzrlib.weave.WeaveFile

2027

_matching_file_to_factory = KnitVersionedFile

2028

2029

@staticmethod

2030

def is_compatible(source, target):

2031

"""Be compatible with weaves to knits."""

2032

try:

2033

return (isinstance(source, bzrlib.weave.Weave) and

2034

isinstance(target, KnitVersionedFile))

2035

except AttributeError:

2036

return False

2037

2038

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2039

"""See InterVersionedFile.join."""

2040

assert isinstance(self.source, bzrlib.weave.Weave)

2041

assert isinstance(self.target, KnitVersionedFile)

2042

2043

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2044

2045

if not version_ids:

2046

return 0

2047

2048

pb = ui.ui_factory.nested_progress_bar()

2049

try:

2050

version_ids = list(version_ids)

2051

2052

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2053

this_versions = set(self.target._index.get_versions())

2054

needed_versions = self.source_ancestry - this_versions

2055

cross_check_versions = self.source_ancestry.intersection(this_versions)

2056

mismatched_versions = set()

2057

for version in cross_check_versions:

2058

# scan to include needed parents.

2059

n1 = set(self.target.get_parents_with_ghosts(version))

2060

n2 = set(self.source.get_parents(version))

2061

# if all of n2's parents are in n1, then its fine.

2062

if n2.difference(n1):

2063

# FIXME TEST this check for cycles being introduced works

2064

# the logic is we have a cycle if in our graph we are an

2065

# ancestor of any of the n2 revisions.

2066

for parent in n2:

2067

if parent in n1:

2068

# safe

2069

continue

2070

else:

2071

parent_ancestors = self.source.get_ancestry(parent)

2072

if version in parent_ancestors:

2073

raise errors.GraphCycleError([parent, version])

2074

# ensure this parent will be available later.

2075

new_parents = n2.difference(n1)

2076

needed_versions.update(new_parents.difference(this_versions))

2077

mismatched_versions.add(version)

2078

2079

if not needed_versions and not mismatched_versions:

2080

return 0

2081

full_list = topo_sort(self.source.get_graph())

2082

2083

version_list = [i for i in full_list if (not self.target.has_version(i)

2084

and i in needed_versions)]

2085

2086

# do the join:

2087

count = 0

2088

total = len(version_list)

2089

for version_id in version_list:

2090

pb.update("Converting to knit", count, total)

2091

parents = self.source.get_parents(version_id)

2092

# check that its will be a consistent copy:

2093

for parent in parents:

2094

# if source has the parent, we must already have it

2095

assert (self.target.has_version(parent))

2096

self.target.add_lines(

2097

version_id, parents, self.source.get_lines(version_id))

2098

count = count + 1

2099

2100

for version in mismatched_versions:

2101

# FIXME RBC 20060309 is this needed?

2102

n1 = set(self.target.get_parents_with_ghosts(version))

2103

n2 = set(self.source.get_parents(version))

2104

# write a combined record to our history preserving the current

2105

# parents as first in the list

2106

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2107

self.target.fix_parents(version, new_parents)

2108

return count

2109

finally:

2110

pb.finished()

2111

2112

2113

InterVersionedFile.register_optimiser(WeaveToKnit)

2114

2115

2116

class KnitSequenceMatcher(difflib.SequenceMatcher):

2117

"""Knit tuned sequence matcher.

2118

2119

This is based on profiling of difflib which indicated some improvements

2120

for our usage pattern.

2121

"""

2122

2123

def find_longest_match(self, alo, ahi, blo, bhi):

2124

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2125

2126

If isjunk is not defined:

2127

2128

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2129

alo <= i <= i+k <= ahi

2130

blo <= j <= j+k <= bhi

2131

and for all (i',j',k') meeting those conditions,

2132

k >= k'

2133

i <= i'

2134

and if i == i', j <= j'

2135

2136

In other words, of all maximal matching blocks, return one that

2137

starts earliest in a, and of all those maximal matching blocks that

2138

start earliest in a, return the one that starts earliest in b.

2139

2140

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2141

>>> s.find_longest_match(0, 5, 0, 9)

2142

(0, 4, 5)

2143

2144

If isjunk is defined, first the longest matching block is

2145

determined as above, but with the additional restriction that no

2146

junk element appears in the block. Then that block is extended as

2147

far as possible by matching (only) junk elements on both sides. So

2148

the resulting block never matches on junk except as identical junk

2149

happens to be adjacent to an "interesting" match.

2150

2151

Here's the same example as before, but considering blanks to be

2152

junk. That prevents " abcd" from matching the " abcd" at the tail

2153

end of the second sequence directly. Instead only the "abcd" can

2154

match, and matches the leftmost "abcd" in the second sequence:

2155

2156

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2157

>>> s.find_longest_match(0, 5, 0, 9)

2158

(1, 0, 4)

2159

2160

If no blocks match, return (alo, blo, 0).

2161

2162

>>> s = SequenceMatcher(None, "ab", "c")

2163

>>> s.find_longest_match(0, 2, 0, 1)

2164

(0, 0, 0)

2165

"""

2166

2167

# CAUTION: stripping common prefix or suffix would be incorrect.

2168

# E.g.,

2169

# ab

2170

# acab

2171

# Longest matching block is "ab", but if common prefix is

2172

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2173

# strip, so ends up claiming that ab is changed to acab by

2174

# inserting "ca" in the middle. That's minimal but unintuitive:

2175

# "it's obvious" that someone inserted "ac" at the front.

2176

# Windiff ends up at the same place as diff, but by pairing up

2177

# the unique 'b's and then matching the first two 'a's.

2178

2179

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2180

besti, bestj, bestsize = alo, blo, 0

2181

# find longest junk-free match

2182

# during an iteration of the loop, j2len[j] = length of longest

2183

# junk-free match ending with a[i-1] and b[j]

2184

j2len = {}

2185

# nothing = []

2186

b2jget = b2j.get

2187

for i in xrange(alo, ahi):

2188

# look at all instances of a[i] in b; note that because

2189

# b2j has no junk keys, the loop is skipped if a[i] is junk

2190

j2lenget = j2len.get

2191

newj2len = {}

2192

2193

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2194

# following improvement

2195

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2196

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2197

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2198

# to

2199

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2200

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2201

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2202

2203

try:

2204

js = b2j[a[i]]

2205

except KeyError:

2206

pass

2207

else:

2208

for j in js:

2209

# a[i] matches b[j]

2210

if j >= blo:

2211

if j >= bhi:

2212

break

2213

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2214

if k > bestsize:

2215

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2216

j2len = newj2len

2217

2218

# Extend the best by non-junk elements on each end. In particular,

2219

# "popular" non-junk elements aren't in b2j, which greatly speeds

2220

# the inner loop above, but also means "the best" match so far

2221

# doesn't contain any junk *or* popular non-junk elements.

2222

while besti > alo and bestj > blo and \

2223

not isbjunk(b[bestj-1]) and \

2224

a[besti-1] == b[bestj-1]:

2225

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2226

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2227

not isbjunk(b[bestj+bestsize]) and \

2228

a[besti+bestsize] == b[bestj+bestsize]:

2229

bestsize += 1

2230

2231

# Now that we have a wholly interesting match (albeit possibly

2232

# empty!), we may as well suck up the matching junk on each

2233

# side of it too. Can't think of a good reason not to, and it

2234

# saves post-processing the (possibly considerable) expense of

2235

# figuring out what to do with it. In the case of an empty

2236

# interesting match, this is clearly the right thing to do,

2237

# because no other kind of match is possible in the regions.

2238

while besti > alo and bestj > blo and \

2239

isbjunk(b[bestj-1]) and \

2240

a[besti-1] == b[bestj-1]:

2241

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2242

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2243

isbjunk(b[bestj+bestsize]) and \

2244

a[besti+bestsize] == b[bestj+bestsize]:

2245

bestsize = bestsize + 1

2246

2247

return besti, bestj, bestsize

2248

2249

2250

try:

2251

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2252

except ImportError:

2253

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »