/brz/remove-bazaar : revision 2791

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-09-03 21:19:07 UTC
mfrom: (2776.1.3 commit)
Revision ID: pqm@pqm.ubuntu.com-20070903211907-igj2uj83hz1yyqs9

(robertc) Don't double-calculate the text sha1 during commit. (Robert Collins)

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

dir.py

dulwich

dulwich/.bzrignore

dulwich/COPYING

dulwich/Makefile

dulwich/README

dulwich/bin

dulwich/bin/dul-daemon

dulwich/bin/dul-receive-pack

dulwich/bin/dul-upload-pack

dulwich/bin/dulwich

dulwich/docs

dulwich/docs/protocol.txt

dulwich/dulwich

dulwich/dulwich/__init__.py

dulwich/dulwich/client.py

dulwich/dulwich/commit.py

dulwich/dulwich/errors.py

dulwich/dulwich/objects.py

dulwich/dulwich/pack.py

dulwich/dulwich/protocol.py

dulwich/dulwich/repo.py

dulwich/dulwich/server.py

dulwich/dulwich/tests

dulwich/dulwich/tests/__init__.py

dulwich/dulwich/tests/data

dulwich/dulwich/tests/data/blobs

dulwich/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/commits

dulwich/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/packs

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/dulwich/tests/data/repos

dulwich/dulwich/tests/data/repos/a

dulwich/dulwich/tests/data/repos/a/.git

dulwich/dulwich/tests/data/repos/a/.git/HEAD

dulwich/dulwich/tests/data/repos/a/.git/index

dulwich/dulwich/tests/data/repos/a/.git/objects

dulwich/dulwich/tests/data/repos/a/.git/objects/2a

dulwich/dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/dulwich/tests/data/repos/a/.git/objects/4e

dulwich/dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/dulwich/tests/data/repos/a/.git/objects/4f

dulwich/dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/dulwich/tests/data/repos/a/.git/objects/7d

dulwich/dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/dulwich/tests/data/repos/a/.git/objects/a2

dulwich/dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/dulwich/tests/data/repos/a/.git/objects/a9

dulwich/dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/dulwich/tests/data/repos/a/.git/objects/ff

dulwich/dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/dulwich/tests/data/repos/a/.git/objects/info

dulwich/dulwich/tests/data/repos/a/.git/objects/pack

dulwich/dulwich/tests/data/repos/a/.git/refs

dulwich/dulwich/tests/data/repos/a/.git/refs/heads

dulwich/dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/a/.git/refs/tags

dulwich/dulwich/tests/data/repos/a/a

dulwich/dulwich/tests/data/repos/a/b

dulwich/dulwich/tests/data/repos/a/c

dulwich/dulwich/tests/data/repos/ooo_merge

dulwich/dulwich/tests/data/repos/ooo_merge/.git

dulwich/dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/ooo_merge/a

dulwich/dulwich/tests/data/repos/ooo_merge/b

dulwich/dulwich/tests/data/repos/ooo_merge/c

dulwich/dulwich/tests/data/repos/simple_merge

dulwich/dulwich/tests/data/repos/simple_merge/.git

dulwich/dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/simple_merge/.git/index

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/simple_merge/a

dulwich/dulwich/tests/data/repos/simple_merge/b

dulwich/dulwich/tests/data/repos/simple_merge/d

dulwich/dulwich/tests/data/repos/simple_merge/e

dulwich/dulwich/tests/data/trees

dulwich/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/test_objects.py

dulwich/dulwich/tests/test_pack.py

dulwich/dulwich/tests/test_repository.py

dulwich/setup.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

remote.py

repository.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_ids.py

tests/test_repository.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

100

KnitHeaderError,

101

RevisionNotPresent,

102

RevisionAlreadyPresent,

103

)

104

from bzrlib.tuned_gzip import GzipFile

105

from bzrlib.osutils import (

106

contains_whitespace,

107

contains_linebreaks,

108

sha_strings,

109

)

110

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

111

from bzrlib.tsort import topo_sort

112

import bzrlib.ui

113

import bzrlib.weave

114

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

115

116

117

# TODO: Split out code specific to this format into an associated object.

118

119

# TODO: Can we put in some kind of value to check that the index and data

120

# files belong together?

121

122

# TODO: accommodate binaries, perhaps by storing a byte count

123

124

# TODO: function to check whole file

125

126

# TODO: atomically append data, then measure backwards from the cursor

127

# position after writing to work out where it was located. we may need to

128

# bypass python file buffering.

129

130

DATA_SUFFIX = '.knit'

131

INDEX_SUFFIX = '.kndx'

132

133

134

class KnitContent(object):

135

"""Content of a knit version to which deltas can be applied."""

136

137

def __init__(self, lines):

138

self._lines = lines

139

140

def annotate_iter(self):

141

"""Yield tuples of (origin, text) for each content line."""

142

return iter(self._lines)

143

144

def annotate(self):

145

"""Return a list of (origin, text) tuples."""

146

return list(self.annotate_iter())

147

148

def line_delta_iter(self, new_lines):

149

"""Generate line-based delta from this content to new_lines."""

150

new_texts = new_lines.text()

151

old_texts = self.text()

152

s = KnitSequenceMatcher(None, old_texts, new_texts)

153

for tag, i1, i2, j1, j2 in s.get_opcodes():

154

if tag == 'equal':

155

continue

156

# ofrom, oto, length, data

157

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

158

159

def line_delta(self, new_lines):

160

return list(self.line_delta_iter(new_lines))

161

162

def text(self):

163

return [text for origin, text in self._lines]

164

165

def copy(self):

166

return KnitContent(self._lines[:])

167

168

@staticmethod

169

def get_line_delta_blocks(knit_delta, source, target):

170

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

171

target_len = len(target)

172

s_pos = 0

173

t_pos = 0

174

for s_begin, s_end, t_len, new_text in knit_delta:

175

true_n = s_begin - s_pos

176

n = true_n

177

if n > 0:

178

# knit deltas do not provide reliable info about whether the

179

# last line of a file matches, due to eol handling.

180

if source[s_pos + n -1] != target[t_pos + n -1]:

181

n-=1

182

if n > 0:

183

yield s_pos, t_pos, n

184

t_pos += t_len + true_n

185

s_pos = s_end

186

n = target_len - t_pos

187

if n > 0:

188

if source[s_pos + n -1] != target[t_pos + n -1]:

189

n-=1

190

if n > 0:

191

yield s_pos, t_pos, n

192

yield s_pos + (target_len - t_pos), target_len, 0

193

194

195

class _KnitFactory(object):

196

"""Base factory for creating content objects."""

197

198

def make(self, lines, version_id):

199

num_lines = len(lines)

200

return KnitContent(zip([version_id] * num_lines, lines))

201

202

203

class KnitAnnotateFactory(_KnitFactory):

204

"""Factory for creating annotated Content objects."""

205

206

annotated = True

207

208

def parse_fulltext(self, content, version_id):

209

"""Convert fulltext to internal representation

210

211

fulltext content is of the format

212

revid(utf8) plaintext\n

213

internal representation is of the format:

214

(revid, plaintext)

215

"""

216

# TODO: jam 20070209 The tests expect this to be returned as tuples,

217

# but the code itself doesn't really depend on that.

218

# Figure out a way to not require the overhead of turning the

219

# list back into tuples.

220

lines = [tuple(line.split(' ', 1)) for line in content]

221

return KnitContent(lines)

222

223

def parse_line_delta_iter(self, lines):

224

return iter(self.parse_line_delta(lines))

225

226

def parse_line_delta(self, lines, version_id):

227

"""Convert a line based delta into internal representation.

228

229

line delta is in the form of:

230

intstart intend intcount

231

1..count lines:

232

revid(utf8) newline\n

233

internal representation is

234

(start, end, count, [1..count tuples (revid, newline)])

235

"""

236

result = []

237

lines = iter(lines)

238

next = lines.next

239

240

cache = {}

241

def cache_and_return(line):

242

origin, text = line.split(' ', 1)

243

return cache.setdefault(origin, origin), text

244

245

# walk through the lines parsing.

246

for header in lines:

247

start, end, count = [int(n) for n in header.split(',')]

248

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

249

result.append((start, end, count, contents))

250

return result

251

252

def get_fulltext_content(self, lines):

253

"""Extract just the content lines from a fulltext."""

254

return (line.split(' ', 1)[1] for line in lines)

255

256

def get_linedelta_content(self, lines):

257

"""Extract just the content from a line delta.

258

259

This doesn't return all of the extra information stored in a delta.

260

Only the actual content lines.

261

"""

262

lines = iter(lines)

263

next = lines.next

264

for header in lines:

265

header = header.split(',')

266

count = int(header[2])

267

for i in xrange(count):

268

origin, text = next().split(' ', 1)

269

yield text

270

271

def lower_fulltext(self, content):

272

"""convert a fulltext content record into a serializable form.

273

274

see parse_fulltext which this inverts.

275

"""

276

# TODO: jam 20070209 We only do the caching thing to make sure that

277

# the origin is a valid utf-8 line, eventually we could remove it

278

return ['%s %s' % (o, t) for o, t in content._lines]

279

280

def lower_line_delta(self, delta):

281

"""convert a delta into a serializable form.

282

283

See parse_line_delta which this inverts.

284

"""

285

# TODO: jam 20070209 We only do the caching thing to make sure that

286

# the origin is a valid utf-8 line, eventually we could remove it

287

out = []

288

for start, end, c, lines in delta:

289

out.append('%d,%d,%d\n' % (start, end, c))

290

out.extend(origin + ' ' + text

291

for origin, text in lines)

292

return out

293

294

def annotate_iter(self, knit, version_id):

295

content = knit._get_content(version_id)

296

return content.annotate_iter()

297

298

299

class KnitPlainFactory(_KnitFactory):

300

"""Factory for creating plain Content objects."""

301

302

annotated = False

303

304

def parse_fulltext(self, content, version_id):

305

"""This parses an unannotated fulltext.

306

307

Note that this is not a noop - the internal representation

308

has (versionid, line) - its just a constant versionid.

309

"""

310

return self.make(content, version_id)

311

312

def parse_line_delta_iter(self, lines, version_id):

313

cur = 0

314

num_lines = len(lines)

315

while cur < num_lines:

316

header = lines[cur]

317

cur += 1

318

start, end, c = [int(n) for n in header.split(',')]

319

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

320

cur += c

321

322

def parse_line_delta(self, lines, version_id):

323

return list(self.parse_line_delta_iter(lines, version_id))

324

325

def get_fulltext_content(self, lines):

326

"""Extract just the content lines from a fulltext."""

327

return iter(lines)

328

329

def get_linedelta_content(self, lines):

330

"""Extract just the content from a line delta.

331

332

This doesn't return all of the extra information stored in a delta.

333

Only the actual content lines.

334

"""

335

lines = iter(lines)

336

next = lines.next

337

for header in lines:

338

header = header.split(',')

339

count = int(header[2])

340

for i in xrange(count):

341

yield next()

342

343

def lower_fulltext(self, content):

344

return content.text()

345

346

def lower_line_delta(self, delta):

347

out = []

348

for start, end, c, lines in delta:

349

out.append('%d,%d,%d\n' % (start, end, c))

350

out.extend([text for origin, text in lines])

351

return out

352

353

def annotate_iter(self, knit, version_id):

354

return annotate_knit(knit, version_id)

355

356

357

def make_empty_knit(transport, relpath):

358

"""Construct a empty knit at the specified location."""

359

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

360

361

362

class KnitVersionedFile(VersionedFile):

363

"""Weave-like structure with faster random access.

364

365

A knit stores a number of texts and a summary of the relationships

366

between them. Texts are identified by a string version-id. Texts

367

are normally stored and retrieved as a series of lines, but can

368

also be passed as single strings.

369

370

Lines are stored with the trailing newline (if any) included, to

371

avoid special cases for files with no final newline. Lines are

372

composed of 8-bit characters, not unicode. The combination of

373

these approaches should mean any 'binary' file can be safely

374

stored and retrieved.

375

"""

376

377

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

378

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

379

create=False, create_parent_dir=False, delay_create=False,

380

dir_mode=None, index=None, access_method=None):

381

"""Construct a knit at location specified by relpath.

382

383

:param create: If not True, only open an existing knit.

384

:param create_parent_dir: If True, create the parent directory if

385

creating the file fails. (This is used for stores with

386

hash-prefixes that may not exist yet)

387

:param delay_create: The calling code is aware that the knit won't

388

actually be created until the first data is stored.

389

:param index: An index to use for the knit.

390

"""

391

if deprecated_passed(basis_knit):

392

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

393

" deprecated as of bzr 0.9.",

394

DeprecationWarning, stacklevel=2)

395

if access_mode is None:

396

access_mode = 'w'

397

super(KnitVersionedFile, self).__init__(access_mode)

398

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

399

self.transport = transport

400

self.filename = relpath

401

self.factory = factory or KnitAnnotateFactory()

402

self.writable = (access_mode == 'w')

403

self.delta = delta

404

405

self._max_delta_chain = 200

406

407

if index is None:

408

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

409

access_mode, create=create, file_mode=file_mode,

410

create_parent_dir=create_parent_dir, delay_create=delay_create,

411

dir_mode=dir_mode)

412

else:

413

self._index = index

414

if access_method is None:

415

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

416

((create and not len(self)) and delay_create), create_parent_dir)

417

else:

418

_access = access_method

419

if create and not len(self) and not delay_create:

420

_access.create()

421

self._data = _KnitData(_access)

422

423

def __repr__(self):

424

return '%s(%s)' % (self.__class__.__name__,

425

self.transport.abspath(self.filename))

426

427

def _check_should_delta(self, first_parents):

428

"""Iterate back through the parent listing, looking for a fulltext.

429

430

This is used when we want to decide whether to add a delta or a new

431

fulltext. It searches for _max_delta_chain parents. When it finds a

432

fulltext parent, it sees if the total size of the deltas leading up to

433

it is large enough to indicate that we want a new full text anyway.

434

435

Return True if we should create a new delta, False if we should use a

436

full text.

437

"""

438

delta_size = 0

439

fulltext_size = None

440

delta_parents = first_parents

441

for count in xrange(self._max_delta_chain):

442

parent = delta_parents[0]

443

method = self._index.get_method(parent)

444

index, pos, size = self._index.get_position(parent)

445

if method == 'fulltext':

446

fulltext_size = size

447

break

448

delta_size += size

449

delta_parents = self._index.get_parents(parent)

450

else:

451

# We couldn't find a fulltext, so we must create a new one

452

return False

453

454

return fulltext_size > delta_size

455

456

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

457

"""See VersionedFile._add_delta()."""

458

self._check_add(version_id, []) # should we check the lines ?

459

self._check_versions_present(parents)

460

present_parents = []

461

ghosts = []

462

parent_texts = {}

463

for parent in parents:

464

if not self.has_version(parent):

465

ghosts.append(parent)

466

else:

467

present_parents.append(parent)

468

469

if delta_parent is None:

470

# reconstitute as full text.

471

assert len(delta) == 1 or len(delta) == 0

472

if len(delta):

473

assert delta[0][0] == 0

474

assert delta[0][1] == 0, delta[0][1]

475

return super(KnitVersionedFile, self)._add_delta(version_id,

476

parents,

477

delta_parent,

478

sha1,

479

noeol,

480

delta)

481

482

digest = sha1

483

484

options = []

485

if noeol:

486

options.append('no-eol')

487

488

if delta_parent is not None:

489

# determine the current delta chain length.

490

# To speed the extract of texts the delta chain is limited

491

# to a fixed number of deltas. This should minimize both

492

# I/O and the time spend applying deltas.

493

# The window was changed to a maximum of 200 deltas, but also added

494

# was a check that the total compressed size of the deltas is

495

# smaller than the compressed size of the fulltext.

496

if not self._check_should_delta([delta_parent]):

497

# We don't want a delta here, just do a normal insertion.

498

return super(KnitVersionedFile, self)._add_delta(version_id,

499

parents,

500

delta_parent,

501

sha1,

502

noeol,

503

delta)

504

505

options.append('line-delta')

506

store_lines = self.factory.lower_line_delta(delta)

507

508

access_memo = self._data.add_record(version_id, digest, store_lines)

509

self._index.add_version(version_id, options, access_memo, parents)

510

511

def _add_raw_records(self, records, data):

512

"""Add all the records 'records' with data pre-joined in 'data'.

513

514

:param records: A list of tuples(version_id, options, parents, size).

515

:param data: The data for the records. When it is written, the records

516

are adjusted to have pos pointing into data by the sum of

517

the preceding records sizes.

518

"""

519

# write all the data

520

raw_record_sizes = [record[3] for record in records]

521

positions = self._data.add_raw_records(raw_record_sizes, data)

522

offset = 0

523

index_entries = []

524

for (version_id, options, parents, size), access_memo in zip(

525

records, positions):

526

index_entries.append((version_id, options, access_memo, parents))

527

if self._data._do_cache:

528

self._data._cache[version_id] = data[offset:offset+size]

529

offset += size

530

self._index.add_versions(index_entries)

531

532

def enable_cache(self):

533

"""Start caching data for this knit"""

534

self._data.enable_cache()

535

536

def clear_cache(self):

537

"""Clear the data cache only."""

538

self._data.clear_cache()

539

540

def copy_to(self, name, transport):

541

"""See VersionedFile.copy_to()."""

542

# copy the current index to a temp index to avoid racing with local

543

# writes

544

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

545

self.transport.get(self._index._filename))

546

# copy the data file

547

f = self._data._open_file()

548

try:

549

transport.put_file(name + DATA_SUFFIX, f)

550

finally:

551

f.close()

552

# move the copied index into place

553

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

554

555

def create_empty(self, name, transport, mode=None):

556

return KnitVersionedFile(name, transport, factory=self.factory,

557

delta=self.delta, create=True)

558

559

def _fix_parents(self, version_id, new_parents):

560

"""Fix the parents list for version.

561

562

This is done by appending a new version to the index

563

with identical data except for the parents list.

564

the parents list must be a superset of the current

565

list.

566

"""

567

current_values = self._index._cache[version_id]

568

assert set(current_values[4]).difference(set(new_parents)) == set()

569

self._index.add_version(version_id,

570

current_values[1],

571

(None, current_values[2], current_values[3]),

572

new_parents)

573

574

def get_data_stream(self, required_versions):

575

"""Get a data stream for the specified versions.

576

577

Versions may be returned in any order, not necessarily the order

578

specified.

579

580

:param required_versions: The exact set of versions to be extracted.

581

Unlike some other knit methods, this is not used to generate a

582

transitive closure, rather it is used precisely as given.

583

584

:returns: format_signature, list of (version, options, length, parents),

585

reader_callable.

586

"""

587

required_versions = set([osutils.safe_revision_id(v) for v in

588

required_versions])

589

# we don't care about inclusions, the caller cares.

590

# but we need to setup a list of records to visit.

591

for version_id in required_versions:

592

if not self.has_version(version_id):

593

raise RevisionNotPresent(version_id, self.filename)

594

# Pick the desired versions out of the index in oldest-to-newest order

595

version_list = []

596

for version_id in self.versions():

597

if version_id in required_versions:

598

version_list.append(version_id)

599

600

# create the list of version information for the result

601

copy_queue_records = []

602

copy_set = set()

603

result_version_list = []

604

for version_id in version_list:

605

options = self._index.get_options(version_id)

606

parents = self._index.get_parents_with_ghosts(version_id)

607

index_memo = self._index.get_position(version_id)

608

copy_queue_records.append((version_id, index_memo))

609

none, data_pos, data_size = index_memo

610

copy_set.add(version_id)

611

# version, options, length, parents

612

result_version_list.append((version_id, options, data_size,

613

parents))

614

615

# Read the compressed record data.

616

# XXX:

617

# From here down to the return should really be logic in the returned

618

# callable -- in a class that adapts read_records_iter_raw to read

619

# requests.

620

raw_datum = []

621

for (version_id, raw_data), \

622

(version_id2, options, _, parents) in \

623

izip(self._data.read_records_iter_raw(copy_queue_records),

624

result_version_list):

625

assert version_id == version_id2, 'logic error, inconsistent results'

626

raw_datum.append(raw_data)

627

pseudo_file = StringIO(''.join(raw_datum))

628

def read(length):

629

if length is None:

630

return pseudo_file.read()

631

else:

632

return pseudo_file.read(length)

633

return (self.get_format_signature(), result_version_list, read)

634

635

def _extract_blocks(self, version_id, source, target):

636

if self._index.get_method(version_id) != 'line-delta':

637

return None

638

parent, sha1, noeol, delta = self.get_delta(version_id)

639

return KnitContent.get_line_delta_blocks(delta, source, target)

640

641

def get_delta(self, version_id):

642

"""Get a delta for constructing version from some other version."""

643

version_id = osutils.safe_revision_id(version_id)

644

self.check_not_reserved_id(version_id)

645

if not self.has_version(version_id):

646

raise RevisionNotPresent(version_id, self.filename)

647

648

parents = self.get_parents(version_id)

649

if len(parents):

650

parent = parents[0]

651

else:

652

parent = None

653

index_memo = self._index.get_position(version_id)

654

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

655

noeol = 'no-eol' in self._index.get_options(version_id)

656

if 'fulltext' == self._index.get_method(version_id):

657

new_content = self.factory.parse_fulltext(data, version_id)

658

if parent is not None:

659

reference_content = self._get_content(parent)

660

old_texts = reference_content.text()

661

else:

662

old_texts = []

663

new_texts = new_content.text()

664

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

665

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

666

else:

667

delta = self.factory.parse_line_delta(data, version_id)

668

return parent, sha1, noeol, delta

669

670

def get_format_signature(self):

671

"""See VersionedFile.get_format_signature()."""

672

if self.factory.annotated:

673

annotated_part = "annotated"

674

else:

675

annotated_part = "plain"

676

return "knit-%s" % (annotated_part,)

677

678

def get_graph_with_ghosts(self):

679

"""See VersionedFile.get_graph_with_ghosts()."""

680

graph_items = self._index.get_graph()

681

return dict(graph_items)

682

683

def get_sha1(self, version_id):

684

return self.get_sha1s([version_id])[0]

685

686

def get_sha1s(self, version_ids):

687

"""See VersionedFile.get_sha1()."""

688

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

689

record_map = self._get_record_map(version_ids)

690

# record entry 2 is the 'digest'.

691

return [record_map[v][2] for v in version_ids]

692

693

@staticmethod

694

def get_suffixes():

695

"""See VersionedFile.get_suffixes()."""

696

return [DATA_SUFFIX, INDEX_SUFFIX]

697

698

def has_ghost(self, version_id):

699

"""True if there is a ghost reference in the file to version_id."""

700

version_id = osutils.safe_revision_id(version_id)

701

# maybe we have it

702

if self.has_version(version_id):

703

return False

704

# optimisable if needed by memoising the _ghosts set.

705

items = self._index.get_graph()

706

for node, parents in items:

707

for parent in parents:

708

if parent not in self._index._cache:

709

if parent == version_id:

710

return True

711

return False

712

713

def insert_data_stream(self, (format, data_list, reader_callable)):

714

"""Insert knit records from a data stream into this knit.

715

716

If a version in the stream is already present in this knit, it will not

717

be inserted a second time. It will be checked for consistency with the

718

stored version however, and may cause a KnitCorrupt error to be raised

719

if the data in the stream disagrees with the already stored data.

720

721

:seealso: get_data_stream

722

"""

723

if format != self.get_format_signature():

724

trace.mutter('incompatible format signature inserting to %r', self)

725

raise KnitDataStreamIncompatible(

726

format, self.get_format_signature())

727

728

for version_id, options, length, parents in data_list:

729

if self.has_version(version_id):

730

# First check: the list of parents.

731

my_parents = self.get_parents_with_ghosts(version_id)

732

if my_parents != parents:

733

# XXX: KnitCorrupt is not quite the right exception here.

734

raise KnitCorrupt(

735

self.filename,

736

'parents list %r from data stream does not match '

737

'already recorded parents %r for %s'

738

% (parents, my_parents, version_id))

739

740

# Also check the SHA-1 of the fulltext this content will

741

# produce.

742

raw_data = reader_callable(length)

743

my_fulltext_sha1 = self.get_sha1(version_id)

744

df, rec = self._data._parse_record_header(version_id, raw_data)

745

stream_fulltext_sha1 = rec[3]

746

if my_fulltext_sha1 != stream_fulltext_sha1:

747

# Actually, we don't know if it's this knit that's corrupt,

748

# or the data stream we're trying to insert.

749

raise KnitCorrupt(

750

self.filename, 'sha-1 does not match %s' % version_id)

751

else:

752

self._add_raw_records(

753

[(version_id, options, parents, length)],

754

reader_callable(length))

755

756

def versions(self):

757

"""See VersionedFile.versions."""

758

if 'evil' in debug.debug_flags:

759

trace.mutter_callsite(2, "versions scales with size of history")

760

return self._index.get_versions()

761

762

def has_version(self, version_id):

763

"""See VersionedFile.has_version."""

764

if 'evil' in debug.debug_flags:

765

trace.mutter_callsite(2, "has_version is a LBYL scenario")

766

version_id = osutils.safe_revision_id(version_id)

767

return self._index.has_version(version_id)

768

769

__contains__ = has_version

770

771

def _merge_annotations(self, content, parents, parent_texts={},

772

delta=None, annotated=None,

773

left_matching_blocks=None):

774

"""Merge annotations for content. This is done by comparing

775

the annotations based on changed to the text.

776

"""

777

if left_matching_blocks is not None:

778

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

779

else:

780

delta_seq = None

781

if annotated:

782

for parent_id in parents:

783

merge_content = self._get_content(parent_id, parent_texts)

784

if (parent_id == parents[0] and delta_seq is not None):

785

seq = delta_seq

786

else:

787

seq = patiencediff.PatienceSequenceMatcher(

788

None, merge_content.text(), content.text())

789

for i, j, n in seq.get_matching_blocks():

790

if n == 0:

791

continue

792

# this appears to copy (origin, text) pairs across to the

793

# new content for any line that matches the last-checked

794

# parent.

795

content._lines[j:j+n] = merge_content._lines[i:i+n]

796

if delta:

797

if delta_seq is None:

798

reference_content = self._get_content(parents[0], parent_texts)

799

new_texts = content.text()

800

old_texts = reference_content.text()

801

delta_seq = patiencediff.PatienceSequenceMatcher(

802

None, old_texts, new_texts)

803

return self._make_line_delta(delta_seq, content)

804

805

def _make_line_delta(self, delta_seq, new_content):

806

"""Generate a line delta from delta_seq and new_content."""

807

diff_hunks = []

808

for op in delta_seq.get_opcodes():

809

if op[0] == 'equal':

810

continue

811

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

812

return diff_hunks

813

814

def _get_components_positions(self, version_ids):

815

"""Produce a map of position data for the components of versions.

816

817

This data is intended to be used for retrieving the knit records.

818

819

A dict of version_id to (method, data_pos, data_size, next) is

820

returned.

821

method is the way referenced data should be applied.

822

data_pos is the position of the data in the knit.

823

data_size is the size of the data in the knit.

824

next is the build-parent of the version, or None for fulltexts.

825

"""

826

component_data = {}

827

for version_id in version_ids:

828

cursor = version_id

829

830

while cursor is not None and cursor not in component_data:

831

method = self._index.get_method(cursor)

832

if method == 'fulltext':

833

next = None

834

else:

835

next = self.get_parents(cursor)[0]

836

index_memo = self._index.get_position(cursor)

837

component_data[cursor] = (method, index_memo, next)

838

cursor = next

839

return component_data

840

841

def _get_content(self, version_id, parent_texts={}):

842

"""Returns a content object that makes up the specified

843

version."""

844

if not self.has_version(version_id):

845

raise RevisionNotPresent(version_id, self.filename)

846

847

cached_version = parent_texts.get(version_id, None)

848

if cached_version is not None:

849

return cached_version

850

851

text_map, contents_map = self._get_content_maps([version_id])

852

return contents_map[version_id]

853

854

def _check_versions_present(self, version_ids):

855

"""Check that all specified versions are present."""

856

self._index.check_versions_present(version_ids)

857

858

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

859

"""See VersionedFile.add_lines_with_ghosts()."""

860

self._check_add(version_id, lines)

861

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

862

863

def _add_lines(self, version_id, parents, lines, parent_texts,

864

left_matching_blocks=None):

865

"""See VersionedFile.add_lines."""

866

self._check_add(version_id, lines)

867

self._check_versions_present(parents)

868

return self._add(version_id, lines[:], parents, self.delta,

869

parent_texts, left_matching_blocks)

870

871

def _check_add(self, version_id, lines):

872

"""check that version_id and lines are safe to add."""

873

assert self.writable, "knit is not opened for write"

874

### FIXME escape. RBC 20060228

875

if contains_whitespace(version_id):

876

raise InvalidRevisionId(version_id, self.filename)

877

self.check_not_reserved_id(version_id)

878

if self.has_version(version_id):

879

raise RevisionAlreadyPresent(version_id, self.filename)

880

self._check_lines_not_unicode(lines)

881

self._check_lines_are_lines(lines)

882

883

def _add(self, version_id, lines, parents, delta, parent_texts,

884

left_matching_blocks=None):

885

"""Add a set of lines on top of version specified by parents.

886

887

If delta is true, compress the text as a line-delta against

888

the first parent.

889

890

Any versions not present will be converted into ghosts.

891

"""

892

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

893

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

894

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

895

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

896

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

897

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

898

# +1383 0 8.0370 8.0370 +<len>

899

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

900

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

901

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

902

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

903

904

present_parents = []

905

ghosts = []

906

if parent_texts is None:

907

parent_texts = {}

908

for parent in parents:

909

if not self.has_version(parent):

910

ghosts.append(parent)

911

else:

912

present_parents.append(parent)

913

914

if delta and not len(present_parents):

915

delta = False

916

917

digest = sha_strings(lines)

918

text_length = sum(map(len, lines))

919

options = []

920

if lines:

921

if lines[-1][-1] != '\n':

922

options.append('no-eol')

923

lines[-1] = lines[-1] + '\n'

924

925

if len(present_parents) and delta:

926

# To speed the extract of texts the delta chain is limited

927

# to a fixed number of deltas. This should minimize both

928

# I/O and the time spend applying deltas.

929

delta = self._check_should_delta(present_parents)

930

931

assert isinstance(version_id, str)

932

lines = self.factory.make(lines, version_id)

933

if delta or (self.factory.annotated and len(present_parents) > 0):

934

# Merge annotations from parent texts if so is needed.

935

delta_hunks = self._merge_annotations(lines, present_parents,

936

parent_texts, delta, self.factory.annotated,

937

left_matching_blocks)

938

939

if delta:

940

options.append('line-delta')

941

store_lines = self.factory.lower_line_delta(delta_hunks)

942

else:

943

options.append('fulltext')

944

store_lines = self.factory.lower_fulltext(lines)

945

946

access_memo = self._data.add_record(version_id, digest, store_lines)

947

self._index.add_version(version_id, options, access_memo, parents)

948

return digest, text_length, lines

949

950

def check(self, progress_bar=None):

951

"""See VersionedFile.check()."""

952

953

def _clone_text(self, new_version_id, old_version_id, parents):

954

"""See VersionedFile.clone_text()."""

955

# FIXME RBC 20060228 make fast by only inserting an index with null

956

# delta.

957

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

958

959

def get_lines(self, version_id):

960

"""See VersionedFile.get_lines()."""

961

return self.get_line_list([version_id])[0]

962

963

def _get_record_map(self, version_ids):

964

"""Produce a dictionary of knit records.

965

966

The keys are version_ids, the values are tuples of (method, content,

967

digest, next).

968

method is the way the content should be applied.

969

content is a KnitContent object.

970

digest is the SHA1 digest of this version id after all steps are done

971

next is the build-parent of the version, i.e. the leftmost ancestor.

972

If the method is fulltext, next will be None.

973

"""

974

position_map = self._get_components_positions(version_ids)

975

# c = component_id, m = method, i_m = index_memo, n = next

976

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

977

record_map = {}

978

for component_id, content, digest in \

979

self._data.read_records_iter(records):

980

method, index_memo, next = position_map[component_id]

981

record_map[component_id] = method, content, digest, next

982

983

return record_map

984

985

def get_text(self, version_id):

986

"""See VersionedFile.get_text"""

987

return self.get_texts([version_id])[0]

988

989

def get_texts(self, version_ids):

990

return [''.join(l) for l in self.get_line_list(version_ids)]

991

992

def get_line_list(self, version_ids):

993

"""Return the texts of listed versions as a list of strings."""

994

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

995

for version_id in version_ids:

996

self.check_not_reserved_id(version_id)

997

text_map, content_map = self._get_content_maps(version_ids)

998

return [text_map[v] for v in version_ids]

999

1000

_get_lf_split_line_list = get_line_list

1001

1002

def _get_content_maps(self, version_ids):

1003

"""Produce maps of text and KnitContents

1004

1005

:return: (text_map, content_map) where text_map contains the texts for

1006

the requested versions and content_map contains the KnitContents.

1007

Both dicts take version_ids as their keys.

1008

"""

1009

for version_id in version_ids:

1010

if not self.has_version(version_id):

1011

raise RevisionNotPresent(version_id, self.filename)

1012

record_map = self._get_record_map(version_ids)

1013

1014

text_map = {}

1015

content_map = {}

1016

final_content = {}

1017

for version_id in version_ids:

1018

components = []

1019

cursor = version_id

1020

while cursor is not None:

1021

method, data, digest, next = record_map[cursor]

1022

components.append((cursor, method, data, digest))

1023

if cursor in content_map:

1024

break

1025

cursor = next

1026

1027

content = None

1028

for component_id, method, data, digest in reversed(components):

1029

if component_id in content_map:

1030

content = content_map[component_id]

1031

else:

1032

if method == 'fulltext':

1033

assert content is None

1034

content = self.factory.parse_fulltext(data, version_id)

1035

elif method == 'line-delta':

1036

delta = self.factory.parse_line_delta(data, version_id)

1037

content = content.copy()

1038

content._lines = self._apply_delta(content._lines,

1039

delta)

1040

content_map[component_id] = content

1041

1042

if 'no-eol' in self._index.get_options(version_id):

1043

content = content.copy()

1044

line = content._lines[-1][1].rstrip('\n')

1045

content._lines[-1] = (content._lines[-1][0], line)

1046

final_content[version_id] = content

1047

1048

# digest here is the digest from the last applied component.

1049

text = content.text()

1050

if sha_strings(text) != digest:

1051

raise KnitCorrupt(self.filename,

1052

'sha-1 does not match %s' % version_id)

1053

1054

text_map[version_id] = text

1055

return text_map, final_content

1056

1057

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1058

pb=None):

1059

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1060

if version_ids is None:

1061

version_ids = self.versions()

1062

else:

1063

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1064

if pb is None:

1065

pb = progress.DummyProgress()

1066

# we don't care about inclusions, the caller cares.

1067

# but we need to setup a list of records to visit.

1068

# we need version_id, position, length

1069

version_id_records = []

1070

requested_versions = set(version_ids)

1071

# filter for available versions

1072

for version_id in requested_versions:

1073

if not self.has_version(version_id):

1074

raise RevisionNotPresent(version_id, self.filename)

1075

# get a in-component-order queue:

1076

for version_id in self.versions():

1077

if version_id in requested_versions:

1078

index_memo = self._index.get_position(version_id)

1079

version_id_records.append((version_id, index_memo))

1080

1081

total = len(version_id_records)

1082

for version_idx, (version_id, data, sha_value) in \

1083

enumerate(self._data.read_records_iter(version_id_records)):

1084

pb.update('Walking content.', version_idx, total)

1085

method = self._index.get_method(version_id)

1086

1087

assert method in ('fulltext', 'line-delta')

1088

if method == 'fulltext':

1089

line_iterator = self.factory.get_fulltext_content(data)

1090

else:

1091

line_iterator = self.factory.get_linedelta_content(data)

1092

for line in line_iterator:

1093

yield line

1094

1095

pb.update('Walking content.', total, total)

1096

1097

def iter_parents(self, version_ids):

1098

"""Iterate through the parents for many version ids.

1099

1100

:param version_ids: An iterable yielding version_ids.

1101

:return: An iterator that yields (version_id, parents). Requested

1102

version_ids not present in the versioned file are simply skipped.

1103

The order is undefined, allowing for different optimisations in

1104

the underlying implementation.

1105

"""

1106

version_ids = [osutils.safe_revision_id(version_id) for

1107

version_id in version_ids]

1108

return self._index.iter_parents(version_ids)

1109

1110

def num_versions(self):

1111

"""See VersionedFile.num_versions()."""

1112

return self._index.num_versions()

1113

1114

__len__ = num_versions

1115

1116

def annotate_iter(self, version_id):

1117

"""See VersionedFile.annotate_iter."""

1118

version_id = osutils.safe_revision_id(version_id)

1119

return self.factory.annotate_iter(self, version_id)

1120

1121

def get_parents(self, version_id):

1122

"""See VersionedFile.get_parents."""

1123

# perf notes:

1124

# optimism counts!

1125

# 52554 calls in 1264 872 internal down from 3674

1126

version_id = osutils.safe_revision_id(version_id)

1127

try:

1128

return self._index.get_parents(version_id)

1129

except KeyError:

1130

raise RevisionNotPresent(version_id, self.filename)

1131

1132

def get_parents_with_ghosts(self, version_id):

1133

"""See VersionedFile.get_parents."""

1134

version_id = osutils.safe_revision_id(version_id)

1135

try:

1136

return self._index.get_parents_with_ghosts(version_id)

1137

except KeyError:

1138

raise RevisionNotPresent(version_id, self.filename)

1139

1140

def get_ancestry(self, versions, topo_sorted=True):

1141

"""See VersionedFile.get_ancestry."""

1142

if isinstance(versions, basestring):

1143

versions = [versions]

1144

if not versions:

1145

return []

1146

versions = [osutils.safe_revision_id(v) for v in versions]

1147

return self._index.get_ancestry(versions, topo_sorted)

1148

1149

def get_ancestry_with_ghosts(self, versions):

1150

"""See VersionedFile.get_ancestry_with_ghosts."""

1151

if isinstance(versions, basestring):

1152

versions = [versions]

1153

if not versions:

1154

return []

1155

versions = [osutils.safe_revision_id(v) for v in versions]

1156

return self._index.get_ancestry_with_ghosts(versions)

1157

1158

def plan_merge(self, ver_a, ver_b):

1159

"""See VersionedFile.plan_merge."""

1160

ver_a = osutils.safe_revision_id(ver_a)

1161

ver_b = osutils.safe_revision_id(ver_b)

1162

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1163

1164

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1165

annotated_a = self.annotate(ver_a)

1166

annotated_b = self.annotate(ver_b)

1167

return merge._plan_annotate_merge(annotated_a, annotated_b,

1168

ancestors_a, ancestors_b)

1169

1170

1171

class _KnitComponentFile(object):

1172

"""One of the files used to implement a knit database"""

1173

1174

def __init__(self, transport, filename, mode, file_mode=None,

1175

create_parent_dir=False, dir_mode=None):

1176

self._transport = transport

1177

self._filename = filename

1178

self._mode = mode

1179

self._file_mode = file_mode

1180

self._dir_mode = dir_mode

1181

self._create_parent_dir = create_parent_dir

1182

self._need_to_create = False

1183

1184

def _full_path(self):

1185

"""Return the full path to this file."""

1186

return self._transport.base + self._filename

1187

1188

def check_header(self, fp):

1189

line = fp.readline()

1190

if line == '':

1191

# An empty file can actually be treated as though the file doesn't

1192

# exist yet.

1193

raise errors.NoSuchFile(self._full_path())

1194

if line != self.HEADER:

1195

raise KnitHeaderError(badline=line,

1196

filename=self._transport.abspath(self._filename))

1197

1198

def __repr__(self):

1199

return '%s(%s)' % (self.__class__.__name__, self._filename)

1200

1201

1202

class _KnitIndex(_KnitComponentFile):

1203

"""Manages knit index file.

1204

1205

The index is already kept in memory and read on startup, to enable

1206

fast lookups of revision information. The cursor of the index

1207

file is always pointing to the end, making it easy to append

1208

entries.

1209

1210

_cache is a cache for fast mapping from version id to a Index

1211

object.

1212

1213

_history is a cache for fast mapping from indexes to version ids.

1214

1215

The index data format is dictionary compressed when it comes to

1216

parent references; a index entry may only have parents that with a

1217

lover index number. As a result, the index is topological sorted.

1218

1219

Duplicate entries may be written to the index for a single version id

1220

if this is done then the latter one completely replaces the former:

1221

this allows updates to correct version and parent information.

1222

Note that the two entries may share the delta, and that successive

1223

annotations and references MUST point to the first entry.

1224

1225

The index file on disc contains a header, followed by one line per knit

1226

record. The same revision can be present in an index file more than once.

1227

The first occurrence gets assigned a sequence number starting from 0.

1228

1229

The format of a single line is

1230

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1231

REVISION_ID is a utf8-encoded revision id

1232

FLAGS is a comma separated list of flags about the record. Values include

1233

no-eol, line-delta, fulltext.

1234

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1235

that the the compressed data starts at.

1236

LENGTH is the ascii representation of the length of the data file.

1237

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1238

REVISION_ID.

1239

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1240

revision id already in the knit that is a parent of REVISION_ID.

1241

The ' :' marker is the end of record marker.

1242

1243

partial writes:

1244

when a write is interrupted to the index file, it will result in a line

1245

that does not end in ' :'. If the ' :' is not present at the end of a line,

1246

or at the end of the file, then the record that is missing it will be

1247

ignored by the parser.

1248

1249

When writing new records to the index file, the data is preceded by '\n'

1250

to ensure that records always start on new lines even if the last write was

1251

interrupted. As a result its normal for the last line in the index to be

1252

missing a trailing newline. One can be added with no harmful effects.

1253

"""

1254

1255

HEADER = "# bzr knit index 8\n"

1256

1257

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1258

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1259

1260

def _cache_version(self, version_id, options, pos, size, parents):

1261

"""Cache a version record in the history array and index cache.

1262

1263

This is inlined into _load_data for performance. KEEP IN SYNC.

1264

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1265

indexes).

1266

"""

1267

# only want the _history index to reference the 1st index entry

1268

# for version_id

1269

if version_id not in self._cache:

1270

index = len(self._history)

1271

self._history.append(version_id)

1272

else:

1273

index = self._cache[version_id][5]

1274

self._cache[version_id] = (version_id,

1275

options,

1276

pos,

1277

size,

1278

parents,

1279

index)

1280

1281

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1282

create_parent_dir=False, delay_create=False, dir_mode=None):

1283

_KnitComponentFile.__init__(self, transport, filename, mode,

1284

file_mode=file_mode,

1285

create_parent_dir=create_parent_dir,

1286

dir_mode=dir_mode)

1287

self._cache = {}

1288

# position in _history is the 'official' index for a revision

1289

# but the values may have come from a newer entry.

1290

# so - wc -l of a knit index is != the number of unique names

1291

# in the knit.

1292

self._history = []

1293

try:

1294

fp = self._transport.get(self._filename)

1295

try:

1296

# _load_data may raise NoSuchFile if the target knit is

1297

# completely empty.

1298

_load_data(self, fp)

1299

finally:

1300

fp.close()

1301

except NoSuchFile:

1302

if mode != 'w' or not create:

1303

raise

1304

elif delay_create:

1305

self._need_to_create = True

1306

else:

1307

self._transport.put_bytes_non_atomic(

1308

self._filename, self.HEADER, mode=self._file_mode)

1309

1310

def get_graph(self):

1311

"""Return a list of the node:parents lists from this knit index."""

1312

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1313

1314

def get_ancestry(self, versions, topo_sorted=True):

1315

"""See VersionedFile.get_ancestry."""

1316

# get a graph of all the mentioned versions:

1317

graph = {}

1318

pending = set(versions)

1319

cache = self._cache

1320

while pending:

1321

version = pending.pop()

1322

# trim ghosts

1323

try:

1324

parents = [p for p in cache[version][4] if p in cache]

1325

except KeyError:

1326

raise RevisionNotPresent(version, self._filename)

1327

# if not completed and not a ghost

1328

pending.update([p for p in parents if p not in graph])

1329

graph[version] = parents

1330

if not topo_sorted:

1331

return graph.keys()

1332

return topo_sort(graph.items())

1333

1334

def get_ancestry_with_ghosts(self, versions):

1335

"""See VersionedFile.get_ancestry_with_ghosts."""

1336

# get a graph of all the mentioned versions:

1337

self.check_versions_present(versions)

1338

cache = self._cache

1339

graph = {}

1340

pending = set(versions)

1341

while pending:

1342

version = pending.pop()

1343

try:

1344

parents = cache[version][4]

1345

except KeyError:

1346

# ghost, fake it

1347

graph[version] = []

1348

else:

1349

# if not completed

1350

pending.update([p for p in parents if p not in graph])

1351

graph[version] = parents

1352

return topo_sort(graph.items())

1353

1354

def iter_parents(self, version_ids):

1355

"""Iterate through the parents for many version ids.

1356

1357

:param version_ids: An iterable yielding version_ids.

1358

:return: An iterator that yields (version_id, parents). Requested

1359

version_ids not present in the versioned file are simply skipped.

1360

The order is undefined, allowing for different optimisations in

1361

the underlying implementation.

1362

"""

1363

for version_id in version_ids:

1364

try:

1365

yield version_id, tuple(self.get_parents(version_id))

1366

except KeyError:

1367

pass

1368

1369

def num_versions(self):

1370

return len(self._history)

1371

1372

__len__ = num_versions

1373

1374

def get_versions(self):

1375

"""Get all the versions in the file. not topologically sorted."""

1376

return self._history

1377

1378

def _version_list_to_index(self, versions):

1379

result_list = []

1380

cache = self._cache

1381

for version in versions:

1382

if version in cache:

1383

# -- inlined lookup() --

1384

result_list.append(str(cache[version][5]))

1385

# -- end lookup () --

1386

else:

1387

result_list.append('.' + version)

1388

return ' '.join(result_list)

1389

1390

def add_version(self, version_id, options, index_memo, parents):

1391

"""Add a version record to the index."""

1392

self.add_versions(((version_id, options, index_memo, parents),))

1393

1394

def add_versions(self, versions):

1395

"""Add multiple versions to the index.

1396

1397

:param versions: a list of tuples:

1398

(version_id, options, pos, size, parents).

1399

"""

1400

lines = []

1401

orig_history = self._history[:]

1402

orig_cache = self._cache.copy()

1403

1404

try:

1405

for version_id, options, (index, pos, size), parents in versions:

1406

line = "\n%s %s %s %s %s :" % (version_id,

1407

','.join(options),

1408

pos,

1409

size,

1410

self._version_list_to_index(parents))

1411

assert isinstance(line, str), \

1412

'content must be utf-8 encoded: %r' % (line,)

1413

lines.append(line)

1414

self._cache_version(version_id, options, pos, size, parents)

1415

if not self._need_to_create:

1416

self._transport.append_bytes(self._filename, ''.join(lines))

1417

else:

1418

sio = StringIO()

1419

sio.write(self.HEADER)

1420

sio.writelines(lines)

1421

sio.seek(0)

1422

self._transport.put_file_non_atomic(self._filename, sio,

1423

create_parent_dir=self._create_parent_dir,

1424

mode=self._file_mode,

1425

dir_mode=self._dir_mode)

1426

self._need_to_create = False

1427

except:

1428

# If any problems happen, restore the original values and re-raise

1429

self._history = orig_history

1430

self._cache = orig_cache

1431

raise

1432

1433

def has_version(self, version_id):

1434

"""True if the version is in the index."""

1435

return version_id in self._cache

1436

1437

def get_position(self, version_id):

1438

"""Return details needed to access the version.

1439

1440

.kndx indices do not support split-out data, so return None for the

1441

index field.

1442

1443

:return: a tuple (None, data position, size) to hand to the access

1444

logic to get the record.

1445

"""

1446

entry = self._cache[version_id]

1447

return None, entry[2], entry[3]

1448

1449

def get_method(self, version_id):

1450

"""Return compression method of specified version."""

1451

options = self._cache[version_id][1]

1452

if 'fulltext' in options:

1453

return 'fulltext'

1454

else:

1455

if 'line-delta' not in options:

1456

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1457

return 'line-delta'

1458

1459

def get_options(self, version_id):

1460

"""Return a string represention options.

1461

1462

e.g. foo,bar

1463

"""

1464

return self._cache[version_id][1]

1465

1466

def get_parents(self, version_id):

1467

"""Return parents of specified version ignoring ghosts."""

1468

return [parent for parent in self._cache[version_id][4]

1469

if parent in self._cache]

1470

1471

def get_parents_with_ghosts(self, version_id):

1472

"""Return parents of specified version with ghosts."""

1473

return self._cache[version_id][4]

1474

1475

def check_versions_present(self, version_ids):

1476

"""Check that all specified versions are present."""

1477

cache = self._cache

1478

for version_id in version_ids:

1479

if version_id not in cache:

1480

raise RevisionNotPresent(version_id, self._filename)

1481

1482

1483

class KnitGraphIndex(object):

1484

"""A knit index that builds on GraphIndex."""

1485

1486

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1487

"""Construct a KnitGraphIndex on a graph_index.

1488

1489

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1490

:param deltas: Allow delta-compressed records.

1491

:param add_callback: If not None, allow additions to the index and call

1492

this callback with a list of added GraphIndex nodes:

1493

[(node, value, node_refs), ...]

1494

:param parents: If True, record knits parents, if not do not record

1495

parents.

1496

"""

1497

self._graph_index = graph_index

1498

self._deltas = deltas

1499

self._add_callback = add_callback

1500

self._parents = parents

1501

if deltas and not parents:

1502

raise KnitCorrupt(self, "Cannot do delta compression without "

1503

"parent tracking.")

1504

1505

def _get_entries(self, keys, check_present=False):

1506

"""Get the entries for keys.

1507

1508

:param keys: An iterable of index keys, - 1-tuples.

1509

"""

1510

keys = set(keys)

1511

found_keys = set()

1512

if self._parents:

1513

for node in self._graph_index.iter_entries(keys):

1514

yield node

1515

found_keys.add(node[1])

1516

else:

1517

# adapt parentless index to the rest of the code.

1518

for node in self._graph_index.iter_entries(keys):

1519

yield node[0], node[1], node[2], ()

1520

found_keys.add(node[1])

1521

if check_present:

1522

missing_keys = keys.difference(found_keys)

1523

if missing_keys:

1524

raise RevisionNotPresent(missing_keys.pop(), self)

1525

1526

def _present_keys(self, version_ids):

1527

return set([

1528

node[1] for node in self._get_entries(version_ids)])

1529

1530

def _parentless_ancestry(self, versions):

1531

"""Honour the get_ancestry API for parentless knit indices."""

1532

wanted_keys = self._version_ids_to_keys(versions)

1533

present_keys = self._present_keys(wanted_keys)

1534

missing = set(wanted_keys).difference(present_keys)

1535

if missing:

1536

raise RevisionNotPresent(missing.pop(), self)

1537

return list(self._keys_to_version_ids(present_keys))

1538

1539

def get_ancestry(self, versions, topo_sorted=True):

1540

"""See VersionedFile.get_ancestry."""

1541

if not self._parents:

1542

return self._parentless_ancestry(versions)

1543

# XXX: This will do len(history) index calls - perhaps

1544

# it should be altered to be a index core feature?

1545

# get a graph of all the mentioned versions:

1546

graph = {}

1547

ghosts = set()

1548

versions = self._version_ids_to_keys(versions)

1549

pending = set(versions)

1550

while pending:

1551

# get all pending nodes

1552

this_iteration = pending

1553

new_nodes = self._get_entries(this_iteration)

1554

found = set()

1555

pending = set()

1556

for (index, key, value, node_refs) in new_nodes:

1557

# dont ask for ghosties - otherwise

1558

# we we can end up looping with pending

1559

# being entirely ghosted.

1560

graph[key] = [parent for parent in node_refs[0]

1561

if parent not in ghosts]

1562

# queue parents

1563

for parent in graph[key]:

1564

# dont examine known nodes again

1565

if parent in graph:

1566

continue

1567

pending.add(parent)

1568

found.add(key)

1569

ghosts.update(this_iteration.difference(found))

1570

if versions.difference(graph):

1571

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1572

if topo_sorted:

1573

result_keys = topo_sort(graph.items())

1574

else:

1575

result_keys = graph.iterkeys()

1576

return [key[0] for key in result_keys]

1577

1578

def get_ancestry_with_ghosts(self, versions):

1579

"""See VersionedFile.get_ancestry."""

1580

if not self._parents:

1581

return self._parentless_ancestry(versions)

1582

# XXX: This will do len(history) index calls - perhaps

1583

# it should be altered to be a index core feature?

1584

# get a graph of all the mentioned versions:

1585

graph = {}

1586

versions = self._version_ids_to_keys(versions)

1587

pending = set(versions)

1588

while pending:

1589

# get all pending nodes

1590

this_iteration = pending

1591

new_nodes = self._get_entries(this_iteration)

1592

pending = set()

1593

for (index, key, value, node_refs) in new_nodes:

1594

graph[key] = node_refs[0]

1595

# queue parents

1596

for parent in graph[key]:

1597

# dont examine known nodes again

1598

if parent in graph:

1599

continue

1600

pending.add(parent)

1601

missing_versions = this_iteration.difference(graph)

1602

missing_needed = versions.intersection(missing_versions)

1603

if missing_needed:

1604

raise RevisionNotPresent(missing_needed.pop(), self)

1605

for missing_version in missing_versions:

1606

# add a key, no parents

1607

graph[missing_version] = []

1608

pending.discard(missing_version) # don't look for it

1609

result_keys = topo_sort(graph.items())

1610

return [key[0] for key in result_keys]

1611

1612

def get_graph(self):

1613

"""Return a list of the node:parents lists from this knit index."""

1614

if not self._parents:

1615

return [(key, ()) for key in self.get_versions()]

1616

result = []

1617

for index, key, value, refs in self._graph_index.iter_all_entries():

1618

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1619

return result

1620

1621

def iter_parents(self, version_ids):

1622

"""Iterate through the parents for many version ids.

1623

1624

:param version_ids: An iterable yielding version_ids.

1625

:return: An iterator that yields (version_id, parents). Requested

1626

version_ids not present in the versioned file are simply skipped.

1627

The order is undefined, allowing for different optimisations in

1628

the underlying implementation.

1629

"""

1630

if self._parents:

1631

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1632

all_parents = set()

1633

present_parents = set()

1634

for node in all_nodes:

1635

all_parents.update(node[3][0])

1636

# any node we are querying must be present

1637

present_parents.add(node[1])

1638

unknown_parents = all_parents.difference(present_parents)

1639

present_parents.update(self._present_keys(unknown_parents))

1640

for node in all_nodes:

1641

parents = []

1642

for parent in node[3][0]:

1643

if parent in present_parents:

1644

parents.append(parent[0])

1645

yield node[1][0], tuple(parents)

1646

else:

1647

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1648

yield node[1][0], ()

1649

1650

def num_versions(self):

1651

return len(list(self._graph_index.iter_all_entries()))

1652

1653

__len__ = num_versions

1654

1655

def get_versions(self):

1656

"""Get all the versions in the file. not topologically sorted."""

1657

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1658

1659

def has_version(self, version_id):

1660

"""True if the version is in the index."""

1661

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1662

1663

def _keys_to_version_ids(self, keys):

1664

return tuple(key[0] for key in keys)

1665

1666

def get_position(self, version_id):

1667

"""Return details needed to access the version.

1668

1669

:return: a tuple (index, data position, size) to hand to the access

1670

logic to get the record.

1671

"""

1672

node = self._get_node(version_id)

1673

bits = node[2][1:].split(' ')

1674

return node[0], int(bits[0]), int(bits[1])

1675

1676

def get_method(self, version_id):

1677

"""Return compression method of specified version."""

1678

if not self._deltas:

1679

return 'fulltext'

1680

return self._parent_compression(self._get_node(version_id)[3][1])

1681

1682

def _parent_compression(self, reference_list):

1683

# use the second reference list to decide if this is delta'd or not.

1684

if len(reference_list):

1685

return 'line-delta'

1686

else:

1687

return 'fulltext'

1688

1689

def _get_node(self, version_id):

1690

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1691

1692

def get_options(self, version_id):

1693

"""Return a string represention options.

1694

1695

e.g. foo,bar

1696

"""

1697

node = self._get_node(version_id)

1698

if not self._deltas:

1699

options = ['fulltext']

1700

else:

1701

options = [self._parent_compression(node[3][1])]

1702

if node[2][0] == 'N':

1703

options.append('no-eol')

1704

return options

1705

1706

def get_parents(self, version_id):

1707

"""Return parents of specified version ignoring ghosts."""

1708

parents = list(self.iter_parents([version_id]))

1709

if not parents:

1710

# missing key

1711

raise errors.RevisionNotPresent(version_id, self)

1712

return parents[0][1]

1713

1714

def get_parents_with_ghosts(self, version_id):

1715

"""Return parents of specified version with ghosts."""

1716

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1717

check_present=True))

1718

if not self._parents:

1719

return ()

1720

return self._keys_to_version_ids(nodes[0][3][0])

1721

1722

def check_versions_present(self, version_ids):

1723

"""Check that all specified versions are present."""

1724

keys = self._version_ids_to_keys(version_ids)

1725

present = self._present_keys(keys)

1726

missing = keys.difference(present)

1727

if missing:

1728

raise RevisionNotPresent(missing.pop(), self)

1729

1730

def add_version(self, version_id, options, access_memo, parents):

1731

"""Add a version record to the index."""

1732

return self.add_versions(((version_id, options, access_memo, parents),))

1733

1734

def add_versions(self, versions):

1735

"""Add multiple versions to the index.

1736

1737

This function does not insert data into the Immutable GraphIndex

1738

backing the KnitGraphIndex, instead it prepares data for insertion by

1739

the caller and checks that it is safe to insert then calls

1740

self._add_callback with the prepared GraphIndex nodes.

1741

1742

:param versions: a list of tuples:

1743

(version_id, options, pos, size, parents).

1744

"""

1745

if not self._add_callback:

1746

raise errors.ReadOnlyError(self)

1747

# we hope there are no repositories with inconsistent parentage

1748

# anymore.

1749

# check for dups

1750

1751

keys = {}

1752

for (version_id, options, access_memo, parents) in versions:

1753

index, pos, size = access_memo

1754

key = (version_id, )

1755

parents = tuple((parent, ) for parent in parents)

1756

if 'no-eol' in options:

1757

value = 'N'

1758

else:

1759

value = ' '

1760

value += "%d %d" % (pos, size)

1761

if not self._deltas:

1762

if 'line-delta' in options:

1763

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1764

if self._parents:

1765

if self._deltas:

1766

if 'line-delta' in options:

1767

node_refs = (parents, (parents[0],))

1768

else:

1769

node_refs = (parents, ())

1770

else:

1771

node_refs = (parents, )

1772

else:

1773

if parents:

1774

raise KnitCorrupt(self, "attempt to add node with parents "

1775

"in parentless index.")

1776

node_refs = ()

1777

keys[key] = (value, node_refs)

1778

present_nodes = self._get_entries(keys)

1779

for (index, key, value, node_refs) in present_nodes:

1780

if (value, node_refs) != keys[key]:

1781

raise KnitCorrupt(self, "inconsistent details in add_versions"

1782

": %s %s" % ((value, node_refs), keys[key]))

1783

del keys[key]

1784

result = []

1785

if self._parents:

1786

for key, (value, node_refs) in keys.iteritems():

1787

result.append((key, value, node_refs))

1788

else:

1789

for key, (value, node_refs) in keys.iteritems():

1790

result.append((key, value))

1791

self._add_callback(result)

1792

1793

def _version_ids_to_keys(self, version_ids):

1794

return set((version_id, ) for version_id in version_ids)

1795

1796

1797

class _KnitAccess(object):

1798

"""Access to knit records in a .knit file."""

1799

1800

def __init__(self, transport, filename, _file_mode, _dir_mode,

1801

_need_to_create, _create_parent_dir):

1802

"""Create a _KnitAccess for accessing and inserting data.

1803

1804

:param transport: The transport the .knit is located on.

1805

:param filename: The filename of the .knit.

1806

"""

1807

self._transport = transport

1808

self._filename = filename

1809

self._file_mode = _file_mode

1810

self._dir_mode = _dir_mode

1811

self._need_to_create = _need_to_create

1812

self._create_parent_dir = _create_parent_dir

1813

1814

def add_raw_records(self, sizes, raw_data):

1815

"""Add raw knit bytes to a storage area.

1816

1817

The data is spooled to whereever the access method is storing data.

1818

1819

:param sizes: An iterable containing the size of each raw data segment.

1820

:param raw_data: A bytestring containing the data.

1821

:return: A list of memos to retrieve the record later. Each memo is a

1822

tuple - (index, pos, length), where the index field is always None

1823

for the .knit access method.

1824

"""

1825

assert type(raw_data) == str, \

1826

'data must be plain bytes was %s' % type(raw_data)

1827

if not self._need_to_create:

1828

base = self._transport.append_bytes(self._filename, raw_data)

1829

else:

1830

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1831

create_parent_dir=self._create_parent_dir,

1832

mode=self._file_mode,

1833

dir_mode=self._dir_mode)

1834

self._need_to_create = False

1835

base = 0

1836

result = []

1837

for size in sizes:

1838

result.append((None, base, size))

1839

base += size

1840

return result

1841

1842

def create(self):

1843

"""IFF this data access has its own storage area, initialise it.

1844

1845

:return: None.

1846

"""

1847

self._transport.put_bytes_non_atomic(self._filename, '',

1848

mode=self._file_mode)

1849

1850

def open_file(self):

1851

"""IFF this data access can be represented as a single file, open it.

1852

1853

For knits that are not mapped to a single file on disk this will

1854

always return None.

1855

1856

:return: None or a file handle.

1857

"""

1858

try:

1859

return self._transport.get(self._filename)

1860

except NoSuchFile:

1861

pass

1862

return None

1863

1864

def get_raw_records(self, memos_for_retrieval):

1865

"""Get the raw bytes for a records.

1866

1867

:param memos_for_retrieval: An iterable containing the (index, pos,

1868

length) memo for retrieving the bytes. The .knit method ignores

1869

the index as there is always only a single file.

1870

:return: An iterator over the bytes of the records.

1871

"""

1872

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1873

for pos, data in self._transport.readv(self._filename, read_vector):

1874

yield data

1875

1876

1877

class _PackAccess(object):

1878

"""Access to knit records via a collection of packs."""

1879

1880

def __init__(self, index_to_packs, writer=None):

1881

"""Create a _PackAccess object.

1882

1883

:param index_to_packs: A dict mapping index objects to the transport

1884

and file names for obtaining data.

1885

:param writer: A tuple (pack.ContainerWriter, write_index) which

1886

contains the pack to write, and the index that reads from it will

1887

be associated with.

1888

"""

1889

if writer:

1890

self.container_writer = writer[0]

1891

self.write_index = writer[1]

1892

else:

1893

self.container_writer = None

1894

self.write_index = None

1895

self.indices = index_to_packs

1896

1897

def add_raw_records(self, sizes, raw_data):

1898

"""Add raw knit bytes to a storage area.

1899

1900

The data is spooled to the container writer in one bytes-record per

1901

raw data item.

1902

1903

:param sizes: An iterable containing the size of each raw data segment.

1904

:param raw_data: A bytestring containing the data.

1905

:return: A list of memos to retrieve the record later. Each memo is a

1906

tuple - (index, pos, length), where the index field is the

1907

write_index object supplied to the PackAccess object.

1908

"""

1909

assert type(raw_data) == str, \

1910

'data must be plain bytes was %s' % type(raw_data)

1911

result = []

1912

offset = 0

1913

for size in sizes:

1914

p_offset, p_length = self.container_writer.add_bytes_record(

1915

raw_data[offset:offset+size], [])

1916

offset += size

1917

result.append((self.write_index, p_offset, p_length))

1918

return result

1919

1920

def create(self):

1921

"""Pack based knits do not get individually created."""

1922

1923

def get_raw_records(self, memos_for_retrieval):

1924

"""Get the raw bytes for a records.

1925

1926

:param memos_for_retrieval: An iterable containing the (index, pos,

1927

length) memo for retrieving the bytes. The Pack access method

1928

looks up the pack to use for a given record in its index_to_pack

1929

map.

1930

:return: An iterator over the bytes of the records.

1931

"""

1932

# first pass, group into same-index requests

1933

request_lists = []

1934

current_index = None

1935

for (index, offset, length) in memos_for_retrieval:

1936

if current_index == index:

1937

current_list.append((offset, length))

1938

else:

1939

if current_index is not None:

1940

request_lists.append((current_index, current_list))

1941

current_index = index

1942

current_list = [(offset, length)]

1943

# handle the last entry

1944

if current_index is not None:

1945

request_lists.append((current_index, current_list))

1946

for index, offsets in request_lists:

1947

transport, path = self.indices[index]

1948

reader = pack.make_readv_reader(transport, path, offsets)

1949

for names, read_func in reader.iter_records():

1950

yield read_func(None)

1951

1952

def open_file(self):

1953

"""Pack based knits have no single file."""

1954

return None

1955

1956

def set_writer(self, writer, index, (transport, packname)):

1957

"""Set a writer to use for adding data."""

1958

self.indices[index] = (transport, packname)

1959

self.container_writer = writer

1960

self.write_index = index

1961

1962

1963

class _KnitData(object):

1964

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1965

1966

The KnitData class provides the logic for parsing and using knit records,

1967

making use of an access method for the low level read and write operations.

1968

"""

1969

1970

def __init__(self, access):

1971

"""Create a KnitData object.

1972

1973

:param access: The access method to use. Access methods such as

1974

_KnitAccess manage the insertion of raw records and the subsequent

1975

retrieval of the same.

1976

"""

1977

self._access = access

1978

self._checked = False

1979

# TODO: jam 20060713 conceptually, this could spill to disk

1980

# if the cached size gets larger than a certain amount

1981

# but it complicates the model a bit, so for now just use

1982

# a simple dictionary

1983

self._cache = {}

1984

self._do_cache = False

1985

1986

def enable_cache(self):

1987

"""Enable caching of reads."""

1988

self._do_cache = True

1989

1990

def clear_cache(self):

1991

"""Clear the record cache."""

1992

self._do_cache = False

1993

self._cache = {}

1994

1995

def _open_file(self):

1996

return self._access.open_file()

1997

1998

def _record_to_data(self, version_id, digest, lines):

1999

"""Convert version_id, digest, lines into a raw data block.

2000

2001

:return: (len, a StringIO instance with the raw data ready to read.)

2002

"""

2003

sio = StringIO()

2004

data_file = GzipFile(None, mode='wb', fileobj=sio,

2005

compresslevel=Z_DEFAULT_COMPRESSION)

2006

2007

assert isinstance(version_id, str)

2008

data_file.writelines(chain(

2009

["version %s %d %s\n" % (version_id,

2010

len(lines),

2011

digest)],

2012

lines,

2013

["end %s\n" % version_id]))

2014

data_file.close()

2015

length= sio.tell()

2016

2017

sio.seek(0)

2018

return length, sio

2019

2020

def add_raw_records(self, sizes, raw_data):

2021

"""Append a prepared record to the data file.

2022

2023

:param sizes: An iterable containing the size of each raw data segment.

2024

:param raw_data: A bytestring containing the data.

2025

:return: a list of index data for the way the data was stored.

2026

See the access method add_raw_records documentation for more

2027

details.

2028

"""

2029

return self._access.add_raw_records(sizes, raw_data)

2030

2031

def add_record(self, version_id, digest, lines):

2032

"""Write new text record to disk.

2033

2034

Returns index data for retrieving it later, as per add_raw_records.

2035

"""

2036

size, sio = self._record_to_data(version_id, digest, lines)

2037

result = self.add_raw_records([size], sio.getvalue())

2038

if self._do_cache:

2039

self._cache[version_id] = sio.getvalue()

2040

return result[0]

2041

2042

def _parse_record_header(self, version_id, raw_data):

2043

"""Parse a record header for consistency.

2044

2045

:return: the header and the decompressor stream.

2046

as (stream, header_record)

2047

"""

2048

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2049

try:

2050

rec = self._check_header(version_id, df.readline())

2051

except Exception, e:

2052

raise KnitCorrupt(self._access,

2053

"While reading {%s} got %s(%s)"

2054

% (version_id, e.__class__.__name__, str(e)))

2055

return df, rec

2056

2057

def _check_header(self, version_id, line):

2058

rec = line.split()

2059

if len(rec) != 4:

2060

raise KnitCorrupt(self._access,

2061

'unexpected number of elements in record header')

2062

if rec[1] != version_id:

2063

raise KnitCorrupt(self._access,

2064

'unexpected version, wanted %r, got %r'

2065

% (version_id, rec[1]))

2066

return rec

2067

2068

def _parse_record(self, version_id, data):

2069

# profiling notes:

2070

# 4168 calls in 2880 217 internal

2071

# 4168 calls to _parse_record_header in 2121

2072

# 4168 calls to readlines in 330

2073

df = GzipFile(mode='rb', fileobj=StringIO(data))

2074

2075

try:

2076

record_contents = df.readlines()

2077

except Exception, e:

2078

raise KnitCorrupt(self._access,

2079

"While reading {%s} got %s(%s)"

2080

% (version_id, e.__class__.__name__, str(e)))

2081

header = record_contents.pop(0)

2082

rec = self._check_header(version_id, header)

2083

2084

last_line = record_contents.pop()

2085

if len(record_contents) != int(rec[2]):

2086

raise KnitCorrupt(self._access,

2087

'incorrect number of lines %s != %s'

2088

' for version {%s}'

2089

% (len(record_contents), int(rec[2]),

2090

version_id))

2091

if last_line != 'end %s\n' % rec[1]:

2092

raise KnitCorrupt(self._access,

2093

'unexpected version end line %r, wanted %r'

2094

% (last_line, version_id))

2095

df.close()

2096

return record_contents, rec[3]

2097

2098

def read_records_iter_raw(self, records):

2099

"""Read text records from data file and yield raw data.

2100

2101

This unpacks enough of the text record to validate the id is

2102

as expected but thats all.

2103

"""

2104

# setup an iterator of the external records:

2105

# uses readv so nice and fast we hope.

2106

if len(records):

2107

# grab the disk data needed.

2108

if self._cache:

2109

# Don't check _cache if it is empty

2110

needed_offsets = [index_memo for version_id, index_memo

2111

in records

2112

if version_id not in self._cache]

2113

else:

2114

needed_offsets = [index_memo for version_id, index_memo

2115

in records]

2116

2117

raw_records = self._access.get_raw_records(needed_offsets)

2118

2119

for version_id, index_memo in records:

2120

if version_id in self._cache:

2121

# This data has already been validated

2122

data = self._cache[version_id]

2123

else:

2124

data = raw_records.next()

2125

if self._do_cache:

2126

self._cache[version_id] = data

2127

2128

# validate the header

2129

df, rec = self._parse_record_header(version_id, data)

2130

df.close()

2131

yield version_id, data

2132

2133

def read_records_iter(self, records):

2134

"""Read text records from data file and yield result.

2135

2136

The result will be returned in whatever is the fastest to read.

2137

Not by the order requested. Also, multiple requests for the same

2138

record will only yield 1 response.

2139

:param records: A list of (version_id, pos, len) entries

2140

:return: Yields (version_id, contents, digest) in the order

2141

read, not the order requested

2142

"""

2143

if not records:

2144

return

2145

2146

if self._cache:

2147

# Skip records we have alread seen

2148

yielded_records = set()

2149

needed_records = set()

2150

for record in records:

2151

if record[0] in self._cache:

2152

if record[0] in yielded_records:

2153

continue

2154

yielded_records.add(record[0])

2155

data = self._cache[record[0]]

2156

content, digest = self._parse_record(record[0], data)

2157

yield (record[0], content, digest)

2158

else:

2159

needed_records.add(record)

2160

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2161

else:

2162

needed_records = sorted(set(records), key=operator.itemgetter(1))

2163

2164

if not needed_records:

2165

return

2166

2167

# The transport optimizes the fetching as well

2168

# (ie, reads continuous ranges.)

2169

raw_data = self._access.get_raw_records(

2170

[index_memo for version_id, index_memo in needed_records])

2171

2172

for (version_id, index_memo), data in \

2173

izip(iter(needed_records), raw_data):

2174

content, digest = self._parse_record(version_id, data)

2175

if self._do_cache:

2176

self._cache[version_id] = data

2177

yield version_id, content, digest

2178

2179

def read_records(self, records):

2180

"""Read records into a dictionary."""

2181

components = {}

2182

for record_id, content, digest in \

2183

self.read_records_iter(records):

2184

components[record_id] = (content, digest)

2185

return components

2186

2187

2188

class InterKnit(InterVersionedFile):

2189

"""Optimised code paths for knit to knit operations."""

2190

2191

_matching_file_from_factory = KnitVersionedFile

2192

_matching_file_to_factory = KnitVersionedFile

2193

2194

@staticmethod

2195

def is_compatible(source, target):

2196

"""Be compatible with knits. """

2197

try:

2198

return (isinstance(source, KnitVersionedFile) and

2199

isinstance(target, KnitVersionedFile))

2200

except AttributeError:

2201

return False

2202

2203

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2204

"""See InterVersionedFile.join."""

2205

assert isinstance(self.source, KnitVersionedFile)

2206

assert isinstance(self.target, KnitVersionedFile)

2207

2208

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2209

2210

if not version_ids:

2211

return 0

2212

2213

pb = ui.ui_factory.nested_progress_bar()

2214

try:

2215

version_ids = list(version_ids)

2216

if None in version_ids:

2217

version_ids.remove(None)

2218

2219

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2220

this_versions = set(self.target._index.get_versions())

2221

needed_versions = self.source_ancestry - this_versions

2222

cross_check_versions = self.source_ancestry.intersection(this_versions)

2223

mismatched_versions = set()

2224

for version in cross_check_versions:

2225

# scan to include needed parents.

2226

n1 = set(self.target.get_parents_with_ghosts(version))

2227

n2 = set(self.source.get_parents_with_ghosts(version))

2228

if n1 != n2:

2229

# FIXME TEST this check for cycles being introduced works

2230

# the logic is we have a cycle if in our graph we are an

2231

# ancestor of any of the n2 revisions.

2232

for parent in n2:

2233

if parent in n1:

2234

# safe

2235

continue

2236

else:

2237

parent_ancestors = self.source.get_ancestry(parent)

2238

if version in parent_ancestors:

2239

raise errors.GraphCycleError([parent, version])

2240

# ensure this parent will be available later.

2241

new_parents = n2.difference(n1)

2242

needed_versions.update(new_parents.difference(this_versions))

2243

mismatched_versions.add(version)

2244

2245

if not needed_versions and not mismatched_versions:

2246

return 0

2247

full_list = topo_sort(self.source.get_graph())

2248

2249

version_list = [i for i in full_list if (not self.target.has_version(i)

2250

and i in needed_versions)]

2251

2252

# plan the join:

2253

copy_queue = []

2254

copy_queue_records = []

2255

copy_set = set()

2256

for version_id in version_list:

2257

options = self.source._index.get_options(version_id)

2258

parents = self.source._index.get_parents_with_ghosts(version_id)

2259

# check that its will be a consistent copy:

2260

for parent in parents:

2261

# if source has the parent, we must :

2262

# * already have it or

2263

# * have it scheduled already

2264

# otherwise we don't care

2265

assert (self.target.has_version(parent) or

2266

parent in copy_set or

2267

not self.source.has_version(parent))

2268

index_memo = self.source._index.get_position(version_id)

2269

copy_queue_records.append((version_id, index_memo))

2270

copy_queue.append((version_id, options, parents))

2271

copy_set.add(version_id)

2272

2273

# data suck the join:

2274

count = 0

2275

total = len(version_list)

2276

raw_datum = []

2277

raw_records = []

2278

for (version_id, raw_data), \

2279

(version_id2, options, parents) in \

2280

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2281

copy_queue):

2282

assert version_id == version_id2, 'logic error, inconsistent results'

2283

count = count + 1

2284

pb.update("Joining knit", count, total)

2285

raw_records.append((version_id, options, parents, len(raw_data)))

2286

raw_datum.append(raw_data)

2287

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2288

2289

for version in mismatched_versions:

2290

# FIXME RBC 20060309 is this needed?

2291

n1 = set(self.target.get_parents_with_ghosts(version))

2292

n2 = set(self.source.get_parents_with_ghosts(version))

2293

# write a combined record to our history preserving the current

2294

# parents as first in the list

2295

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2296

self.target.fix_parents(version, new_parents)

2297

return count

2298

finally:

2299

pb.finished()

2300

2301

2302

InterVersionedFile.register_optimiser(InterKnit)

2303

2304

2305

class WeaveToKnit(InterVersionedFile):

2306

"""Optimised code paths for weave to knit operations."""

2307

2308

_matching_file_from_factory = bzrlib.weave.WeaveFile

2309

_matching_file_to_factory = KnitVersionedFile

2310

2311

@staticmethod

2312

def is_compatible(source, target):

2313

"""Be compatible with weaves to knits."""

2314

try:

2315

return (isinstance(source, bzrlib.weave.Weave) and

2316

isinstance(target, KnitVersionedFile))

2317

except AttributeError:

2318

return False

2319

2320

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2321

"""See InterVersionedFile.join."""

2322

assert isinstance(self.source, bzrlib.weave.Weave)

2323

assert isinstance(self.target, KnitVersionedFile)

2324

2325

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2326

2327

if not version_ids:

2328

return 0

2329

2330

pb = ui.ui_factory.nested_progress_bar()

2331

try:

2332

version_ids = list(version_ids)

2333

2334

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2335

this_versions = set(self.target._index.get_versions())

2336

needed_versions = self.source_ancestry - this_versions

2337

cross_check_versions = self.source_ancestry.intersection(this_versions)

2338

mismatched_versions = set()

2339

for version in cross_check_versions:

2340

# scan to include needed parents.

2341

n1 = set(self.target.get_parents_with_ghosts(version))

2342

n2 = set(self.source.get_parents(version))

2343

# if all of n2's parents are in n1, then its fine.

2344

if n2.difference(n1):

2345

# FIXME TEST this check for cycles being introduced works

2346

# the logic is we have a cycle if in our graph we are an

2347

# ancestor of any of the n2 revisions.

2348

for parent in n2:

2349

if parent in n1:

2350

# safe

2351

continue

2352

else:

2353

parent_ancestors = self.source.get_ancestry(parent)

2354

if version in parent_ancestors:

2355

raise errors.GraphCycleError([parent, version])

2356

# ensure this parent will be available later.

2357

new_parents = n2.difference(n1)

2358

needed_versions.update(new_parents.difference(this_versions))

2359

mismatched_versions.add(version)

2360

2361

if not needed_versions and not mismatched_versions:

2362

return 0

2363

full_list = topo_sort(self.source.get_graph())

2364

2365

version_list = [i for i in full_list if (not self.target.has_version(i)

2366

and i in needed_versions)]

2367

2368

# do the join:

2369

count = 0

2370

total = len(version_list)

2371

for version_id in version_list:

2372

pb.update("Converting to knit", count, total)

2373

parents = self.source.get_parents(version_id)

2374

# check that its will be a consistent copy:

2375

for parent in parents:

2376

# if source has the parent, we must already have it

2377

assert (self.target.has_version(parent))

2378

self.target.add_lines(

2379

version_id, parents, self.source.get_lines(version_id))

2380

count = count + 1

2381

2382

for version in mismatched_versions:

2383

# FIXME RBC 20060309 is this needed?

2384

n1 = set(self.target.get_parents_with_ghosts(version))

2385

n2 = set(self.source.get_parents(version))

2386

# write a combined record to our history preserving the current

2387

# parents as first in the list

2388

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2389

self.target.fix_parents(version, new_parents)

2390

return count

2391

finally:

2392

pb.finished()

2393

2394

2395

InterVersionedFile.register_optimiser(WeaveToKnit)

2396

2397

2398

class KnitSequenceMatcher(difflib.SequenceMatcher):

2399

"""Knit tuned sequence matcher.

2400

2401

This is based on profiling of difflib which indicated some improvements

2402

for our usage pattern.

2403

"""

2404

2405

def find_longest_match(self, alo, ahi, blo, bhi):

2406

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2407

2408

If isjunk is not defined:

2409

2410

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2411

alo <= i <= i+k <= ahi

2412

blo <= j <= j+k <= bhi

2413

and for all (i',j',k') meeting those conditions,

2414

k >= k'

2415

i <= i'

2416

and if i == i', j <= j'

2417

2418

In other words, of all maximal matching blocks, return one that

2419

starts earliest in a, and of all those maximal matching blocks that

2420

start earliest in a, return the one that starts earliest in b.

2421

2422

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2423

>>> s.find_longest_match(0, 5, 0, 9)

2424

(0, 4, 5)

2425

2426

If isjunk is defined, first the longest matching block is

2427

determined as above, but with the additional restriction that no

2428

junk element appears in the block. Then that block is extended as

2429

far as possible by matching (only) junk elements on both sides. So

2430

the resulting block never matches on junk except as identical junk

2431

happens to be adjacent to an "interesting" match.

2432

2433

Here's the same example as before, but considering blanks to be

2434

junk. That prevents " abcd" from matching the " abcd" at the tail

2435

end of the second sequence directly. Instead only the "abcd" can

2436

match, and matches the leftmost "abcd" in the second sequence:

2437

2438

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2439

>>> s.find_longest_match(0, 5, 0, 9)

2440

(1, 0, 4)

2441

2442

If no blocks match, return (alo, blo, 0).

2443

2444

>>> s = SequenceMatcher(None, "ab", "c")

2445

>>> s.find_longest_match(0, 2, 0, 1)

2446

(0, 0, 0)

2447

"""

2448

2449

# CAUTION: stripping common prefix or suffix would be incorrect.

2450

# E.g.,

2451

# ab

2452

# acab

2453

# Longest matching block is "ab", but if common prefix is

2454

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2455

# strip, so ends up claiming that ab is changed to acab by

2456

# inserting "ca" in the middle. That's minimal but unintuitive:

2457

# "it's obvious" that someone inserted "ac" at the front.

2458

# Windiff ends up at the same place as diff, but by pairing up

2459

# the unique 'b's and then matching the first two 'a's.

2460

2461

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2462

besti, bestj, bestsize = alo, blo, 0

2463

# find longest junk-free match

2464

# during an iteration of the loop, j2len[j] = length of longest

2465

# junk-free match ending with a[i-1] and b[j]

2466

j2len = {}

2467

# nothing = []

2468

b2jget = b2j.get

2469

for i in xrange(alo, ahi):

2470

# look at all instances of a[i] in b; note that because

2471

# b2j has no junk keys, the loop is skipped if a[i] is junk

2472

j2lenget = j2len.get

2473

newj2len = {}

2474

2475

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2476

# following improvement

2477

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2478

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2479

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2480

# to

2481

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2482

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2483

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2484

2485

try:

2486

js = b2j[a[i]]

2487

except KeyError:

2488

pass

2489

else:

2490

for j in js:

2491

# a[i] matches b[j]

2492

if j >= blo:

2493

if j >= bhi:

2494

break

2495

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2496

if k > bestsize:

2497

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2498

j2len = newj2len

2499

2500

# Extend the best by non-junk elements on each end. In particular,

2501

# "popular" non-junk elements aren't in b2j, which greatly speeds

2502

# the inner loop above, but also means "the best" match so far

2503

# doesn't contain any junk *or* popular non-junk elements.

2504

while besti > alo and bestj > blo and \

2505

not isbjunk(b[bestj-1]) and \

2506

a[besti-1] == b[bestj-1]:

2507

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2508

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2509

not isbjunk(b[bestj+bestsize]) and \

2510

a[besti+bestsize] == b[bestj+bestsize]:

2511

bestsize += 1

2512

2513

# Now that we have a wholly interesting match (albeit possibly

2514

# empty!), we may as well suck up the matching junk on each

2515

# side of it too. Can't think of a good reason not to, and it

2516

# saves post-processing the (possibly considerable) expense of

2517

# figuring out what to do with it. In the case of an empty

2518

# interesting match, this is clearly the right thing to do,

2519

# because no other kind of match is possible in the regions.

2520

while besti > alo and bestj > blo and \

2521

isbjunk(b[bestj-1]) and \

2522

a[besti-1] == b[bestj-1]:

2523

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2524

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2525

isbjunk(b[bestj+bestsize]) and \

2526

a[besti+bestsize] == b[bestj+bestsize]:

2527

bestsize = bestsize + 1

2528

2529

return besti, bestj, bestsize

2530

2531

2532

def annotate_knit(knit, revision_id):

2533

"""Annotate a knit with no cached annotations.

2534

2535

This implementation is for knits with no cached annotations.

2536

It will work for knits with cached annotations, but this is not

2537

recommended.

2538

"""

2539

ancestry = knit.get_ancestry(revision_id)

2540

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2541

annotations = {}

2542

for candidate in ancestry:

2543

if candidate in annotations:

2544

continue

2545

parents = knit.get_parents(candidate)

2546

if len(parents) == 0:

2547

blocks = None

2548

elif knit._index.get_method(candidate) != 'line-delta':

2549

blocks = None

2550

else:

2551

parent, sha1, noeol, delta = knit.get_delta(candidate)

2552

blocks = KnitContent.get_line_delta_blocks(delta,

2553

fulltext[parents[0]], fulltext[candidate])

2554

annotations[candidate] = list(annotate.reannotate([annotations[p]

2555

for p in parents], fulltext[candidate], candidate, blocks))

2556

return iter(annotations[revision_id])

2557

2558

2559

try:

2560

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2561

except ImportError:

2562

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »