/brz/remove-bazaar : revision 1551.18.6

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Aaron Bentley
Date: 2007-08-17 13:47:21 UTC
mto: (1551.19.24 Aaron's mergeable stuff)
mto: This revision was merged to the branch mainline in revision 2725.
Revision ID: abentley@panoramicfeedback.com-20070817134721-2urlrk8nqt19jvom

Add support for diff -p-style diffs to patch parser

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/bug_trackers.txt

doc/centralized_workflow.txt

doc/configuration.txt

doc/conflicts.txt

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/scratch.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/shared_repository_layouts.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

pack,

)

""")

from bzrlib import (

cache_utf8,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

100

from bzrlib.trace import mutter

101

from bzrlib.osutils import (

102

contains_whitespace,

103

contains_linebreaks,

104

sha_strings,

105

)

106

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

107

from bzrlib.tsort import topo_sort

108

import bzrlib.ui

109

import bzrlib.weave

110

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

111

112

113

# TODO: Split out code specific to this format into an associated object.

114

115

# TODO: Can we put in some kind of value to check that the index and data

116

# files belong together?

117

118

# TODO: accommodate binaries, perhaps by storing a byte count

119

120

# TODO: function to check whole file

121

122

# TODO: atomically append data, then measure backwards from the cursor

123

# position after writing to work out where it was located. we may need to

124

# bypass python file buffering.

125

126

DATA_SUFFIX = '.knit'

127

INDEX_SUFFIX = '.kndx'

128

129

130

class KnitContent(object):

131

"""Content of a knit version to which deltas can be applied."""

132

133

def __init__(self, lines):

134

self._lines = lines

135

136

def annotate_iter(self):

137

"""Yield tuples of (origin, text) for each content line."""

138

return iter(self._lines)

139

140

def annotate(self):

141

"""Return a list of (origin, text) tuples."""

142

return list(self.annotate_iter())

143

144

def line_delta_iter(self, new_lines):

145

"""Generate line-based delta from this content to new_lines."""

146

new_texts = new_lines.text()

147

old_texts = self.text()

148

s = KnitSequenceMatcher(None, old_texts, new_texts)

149

for tag, i1, i2, j1, j2 in s.get_opcodes():

150

if tag == 'equal':

151

continue

152

# ofrom, oto, length, data

153

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

154

155

def line_delta(self, new_lines):

156

return list(self.line_delta_iter(new_lines))

157

158

def text(self):

159

return [text for origin, text in self._lines]

160

161

def copy(self):

162

return KnitContent(self._lines[:])

163

164

@staticmethod

165

def get_line_delta_blocks(knit_delta, source, target):

166

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

167

target_len = len(target)

168

s_pos = 0

169

t_pos = 0

170

for s_begin, s_end, t_len, new_text in knit_delta:

171

true_n = s_begin - s_pos

172

n = true_n

173

if n > 0:

174

# knit deltas do not provide reliable info about whether the

175

# last line of a file matches, due to eol handling.

176

if source[s_pos + n -1] != target[t_pos + n -1]:

177

n-=1

178

if n > 0:

179

yield s_pos, t_pos, n

180

t_pos += t_len + true_n

181

s_pos = s_end

182

n = target_len - t_pos

183

if n > 0:

184

if source[s_pos + n -1] != target[t_pos + n -1]:

185

n-=1

186

if n > 0:

187

yield s_pos, t_pos, n

188

yield s_pos + (target_len - t_pos), target_len, 0

189

190

191

class _KnitFactory(object):

192

"""Base factory for creating content objects."""

193

194

def make(self, lines, version_id):

195

num_lines = len(lines)

196

return KnitContent(zip([version_id] * num_lines, lines))

197

198

199

class KnitAnnotateFactory(_KnitFactory):

200

"""Factory for creating annotated Content objects."""

201

202

annotated = True

203

204

def parse_fulltext(self, content, version_id):

205

"""Convert fulltext to internal representation

206

207

fulltext content is of the format

208

revid(utf8) plaintext\n

209

internal representation is of the format:

210

(revid, plaintext)

211

"""

212

# TODO: jam 20070209 The tests expect this to be returned as tuples,

213

# but the code itself doesn't really depend on that.

214

# Figure out a way to not require the overhead of turning the

215

# list back into tuples.

216

lines = [tuple(line.split(' ', 1)) for line in content]

217

return KnitContent(lines)

218

219

def parse_line_delta_iter(self, lines):

220

return iter(self.parse_line_delta(lines))

221

222

def parse_line_delta(self, lines, version_id):

223

"""Convert a line based delta into internal representation.

224

225

line delta is in the form of:

226

intstart intend intcount

227

1..count lines:

228

revid(utf8) newline\n

229

internal representation is

230

(start, end, count, [1..count tuples (revid, newline)])

231

"""

232

result = []

233

lines = iter(lines)

234

next = lines.next

235

236

cache = {}

237

def cache_and_return(line):

238

origin, text = line.split(' ', 1)

239

return cache.setdefault(origin, origin), text

240

241

# walk through the lines parsing.

242

for header in lines:

243

start, end, count = [int(n) for n in header.split(',')]

244

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

245

result.append((start, end, count, contents))

246

return result

247

248

def get_fulltext_content(self, lines):

249

"""Extract just the content lines from a fulltext."""

250

return (line.split(' ', 1)[1] for line in lines)

251

252

def get_linedelta_content(self, lines):

253

"""Extract just the content from a line delta.

254

255

This doesn't return all of the extra information stored in a delta.

256

Only the actual content lines.

257

"""

258

lines = iter(lines)

259

next = lines.next

260

for header in lines:

261

header = header.split(',')

262

count = int(header[2])

263

for i in xrange(count):

264

origin, text = next().split(' ', 1)

265

yield text

266

267

def lower_fulltext(self, content):

268

"""convert a fulltext content record into a serializable form.

269

270

see parse_fulltext which this inverts.

271

"""

272

# TODO: jam 20070209 We only do the caching thing to make sure that

273

# the origin is a valid utf-8 line, eventually we could remove it

274

return ['%s %s' % (o, t) for o, t in content._lines]

275

276

def lower_line_delta(self, delta):

277

"""convert a delta into a serializable form.

278

279

See parse_line_delta which this inverts.

280

"""

281

# TODO: jam 20070209 We only do the caching thing to make sure that

282

# the origin is a valid utf-8 line, eventually we could remove it

283

out = []

284

for start, end, c, lines in delta:

285

out.append('%d,%d,%d\n' % (start, end, c))

286

out.extend(origin + ' ' + text

287

for origin, text in lines)

288

return out

289

290

291

class KnitPlainFactory(_KnitFactory):

292

"""Factory for creating plain Content objects."""

293

294

annotated = False

295

296

def parse_fulltext(self, content, version_id):

297

"""This parses an unannotated fulltext.

298

299

Note that this is not a noop - the internal representation

300

has (versionid, line) - its just a constant versionid.

301

"""

302

return self.make(content, version_id)

303

304

def parse_line_delta_iter(self, lines, version_id):

305

cur = 0

306

num_lines = len(lines)

307

while cur < num_lines:

308

header = lines[cur]

309

cur += 1

310

start, end, c = [int(n) for n in header.split(',')]

311

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

312

cur += c

313

314

def parse_line_delta(self, lines, version_id):

315

return list(self.parse_line_delta_iter(lines, version_id))

316

317

def get_fulltext_content(self, lines):

318

"""Extract just the content lines from a fulltext."""

319

return iter(lines)

320

321

def get_linedelta_content(self, lines):

322

"""Extract just the content from a line delta.

323

324

This doesn't return all of the extra information stored in a delta.

325

Only the actual content lines.

326

"""

327

lines = iter(lines)

328

next = lines.next

329

for header in lines:

330

header = header.split(',')

331

count = int(header[2])

332

for i in xrange(count):

333

yield next()

334

335

def lower_fulltext(self, content):

336

return content.text()

337

338

def lower_line_delta(self, delta):

339

out = []

340

for start, end, c, lines in delta:

341

out.append('%d,%d,%d\n' % (start, end, c))

342

out.extend([text for origin, text in lines])

343

return out

344

345

346

def make_empty_knit(transport, relpath):

347

"""Construct a empty knit at the specified location."""

348

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

349

350

351

class KnitVersionedFile(VersionedFile):

352

"""Weave-like structure with faster random access.

353

354

A knit stores a number of texts and a summary of the relationships

355

between them. Texts are identified by a string version-id. Texts

356

are normally stored and retrieved as a series of lines, but can

357

also be passed as single strings.

358

359

Lines are stored with the trailing newline (if any) included, to

360

avoid special cases for files with no final newline. Lines are

361

composed of 8-bit characters, not unicode. The combination of

362

these approaches should mean any 'binary' file can be safely

363

stored and retrieved.

364

"""

365

366

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

367

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

368

create=False, create_parent_dir=False, delay_create=False,

369

dir_mode=None, index=None, access_method=None):

370

"""Construct a knit at location specified by relpath.

371

372

:param create: If not True, only open an existing knit.

373

:param create_parent_dir: If True, create the parent directory if

374

creating the file fails. (This is used for stores with

375

hash-prefixes that may not exist yet)

376

:param delay_create: The calling code is aware that the knit won't

377

actually be created until the first data is stored.

378

:param index: An index to use for the knit.

379

"""

380

if deprecated_passed(basis_knit):

381

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

382

" deprecated as of bzr 0.9.",

383

DeprecationWarning, stacklevel=2)

384

if access_mode is None:

385

access_mode = 'w'

386

super(KnitVersionedFile, self).__init__(access_mode)

387

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

388

self.transport = transport

389

self.filename = relpath

390

self.factory = factory or KnitAnnotateFactory()

391

self.writable = (access_mode == 'w')

392

self.delta = delta

393

394

self._max_delta_chain = 200

395

396

if index is None:

397

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

398

access_mode, create=create, file_mode=file_mode,

399

create_parent_dir=create_parent_dir, delay_create=delay_create,

400

dir_mode=dir_mode)

401

else:

402

self._index = index

403

if access_method is None:

404

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

405

((create and not len(self)) and delay_create), create_parent_dir)

406

else:

407

_access = access_method

408

if create and not len(self) and not delay_create:

409

_access.create()

410

self._data = _KnitData(_access)

411

412

def __repr__(self):

413

return '%s(%s)' % (self.__class__.__name__,

414

self.transport.abspath(self.filename))

415

416

def _check_should_delta(self, first_parents):

417

"""Iterate back through the parent listing, looking for a fulltext.

418

419

This is used when we want to decide whether to add a delta or a new

420

fulltext. It searches for _max_delta_chain parents. When it finds a

421

fulltext parent, it sees if the total size of the deltas leading up to

422

it is large enough to indicate that we want a new full text anyway.

423

424

Return True if we should create a new delta, False if we should use a

425

full text.

426

"""

427

delta_size = 0

428

fulltext_size = None

429

delta_parents = first_parents

430

for count in xrange(self._max_delta_chain):

431

parent = delta_parents[0]

432

method = self._index.get_method(parent)

433

index, pos, size = self._index.get_position(parent)

434

if method == 'fulltext':

435

fulltext_size = size

436

break

437

delta_size += size

438

delta_parents = self._index.get_parents(parent)

439

else:

440

# We couldn't find a fulltext, so we must create a new one

441

return False

442

443

return fulltext_size > delta_size

444

445

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

446

"""See VersionedFile._add_delta()."""

447

self._check_add(version_id, []) # should we check the lines ?

448

self._check_versions_present(parents)

449

present_parents = []

450

ghosts = []

451

parent_texts = {}

452

for parent in parents:

453

if not self.has_version(parent):

454

ghosts.append(parent)

455

else:

456

present_parents.append(parent)

457

458

if delta_parent is None:

459

# reconstitute as full text.

460

assert len(delta) == 1 or len(delta) == 0

461

if len(delta):

462

assert delta[0][0] == 0

463

assert delta[0][1] == 0, delta[0][1]

464

return super(KnitVersionedFile, self)._add_delta(version_id,

465

parents,

466

delta_parent,

467

sha1,

468

noeol,

469

delta)

470

471

digest = sha1

472

473

options = []

474

if noeol:

475

options.append('no-eol')

476

477

if delta_parent is not None:

478

# determine the current delta chain length.

479

# To speed the extract of texts the delta chain is limited

480

# to a fixed number of deltas. This should minimize both

481

# I/O and the time spend applying deltas.

482

# The window was changed to a maximum of 200 deltas, but also added

483

# was a check that the total compressed size of the deltas is

484

# smaller than the compressed size of the fulltext.

485

if not self._check_should_delta([delta_parent]):

486

# We don't want a delta here, just do a normal insertion.

487

return super(KnitVersionedFile, self)._add_delta(version_id,

488

parents,

489

delta_parent,

490

sha1,

491

noeol,

492

delta)

493

494

options.append('line-delta')

495

store_lines = self.factory.lower_line_delta(delta)

496

497

access_memo = self._data.add_record(version_id, digest, store_lines)

498

self._index.add_version(version_id, options, access_memo, parents)

499

500

def _add_raw_records(self, records, data):

501

"""Add all the records 'records' with data pre-joined in 'data'.

502

503

:param records: A list of tuples(version_id, options, parents, size).

504

:param data: The data for the records. When it is written, the records

505

are adjusted to have pos pointing into data by the sum of

506

the preceding records sizes.

507

"""

508

# write all the data

509

raw_record_sizes = [record[3] for record in records]

510

positions = self._data.add_raw_records(raw_record_sizes, data)

511

offset = 0

512

index_entries = []

513

for (version_id, options, parents, size), access_memo in zip(

514

records, positions):

515

index_entries.append((version_id, options, access_memo, parents))

516

if self._data._do_cache:

517

self._data._cache[version_id] = data[offset:offset+size]

518

offset += size

519

self._index.add_versions(index_entries)

520

521

def enable_cache(self):

522

"""Start caching data for this knit"""

523

self._data.enable_cache()

524

525

def clear_cache(self):

526

"""Clear the data cache only."""

527

self._data.clear_cache()

528

529

def copy_to(self, name, transport):

530

"""See VersionedFile.copy_to()."""

531

# copy the current index to a temp index to avoid racing with local

532

# writes

533

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

534

self.transport.get(self._index._filename))

535

# copy the data file

536

f = self._data._open_file()

537

try:

538

transport.put_file(name + DATA_SUFFIX, f)

539

finally:

540

f.close()

541

# move the copied index into place

542

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

543

544

def create_empty(self, name, transport, mode=None):

545

return KnitVersionedFile(name, transport, factory=self.factory,

546

delta=self.delta, create=True)

547

548

def _fix_parents(self, version_id, new_parents):

549

"""Fix the parents list for version.

550

551

This is done by appending a new version to the index

552

with identical data except for the parents list.

553

the parents list must be a superset of the current

554

list.

555

"""

556

current_values = self._index._cache[version_id]

557

assert set(current_values[4]).difference(set(new_parents)) == set()

558

self._index.add_version(version_id,

559

current_values[1],

560

(None, current_values[2], current_values[3]),

561

new_parents)

562

563

def _extract_blocks(self, version_id, source, target):

564

if self._index.get_method(version_id) != 'line-delta':

565

return None

566

parent, sha1, noeol, delta = self.get_delta(version_id)

567

return KnitContent.get_line_delta_blocks(delta, source, target)

568

569

def get_delta(self, version_id):

570

"""Get a delta for constructing version from some other version."""

571

version_id = osutils.safe_revision_id(version_id)

572

self.check_not_reserved_id(version_id)

573

if not self.has_version(version_id):

574

raise RevisionNotPresent(version_id, self.filename)

575

576

parents = self.get_parents(version_id)

577

if len(parents):

578

parent = parents[0]

579

else:

580

parent = None

581

index_memo = self._index.get_position(version_id)

582

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

583

noeol = 'no-eol' in self._index.get_options(version_id)

584

if 'fulltext' == self._index.get_method(version_id):

585

new_content = self.factory.parse_fulltext(data, version_id)

586

if parent is not None:

587

reference_content = self._get_content(parent)

588

old_texts = reference_content.text()

589

else:

590

old_texts = []

591

new_texts = new_content.text()

592

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

593

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

594

else:

595

delta = self.factory.parse_line_delta(data, version_id)

596

return parent, sha1, noeol, delta

597

598

def get_graph_with_ghosts(self):

599

"""See VersionedFile.get_graph_with_ghosts()."""

600

graph_items = self._index.get_graph()

601

return dict(graph_items)

602

603

def get_sha1(self, version_id):

604

return self.get_sha1s([version_id])[0]

605

606

def get_sha1s(self, version_ids):

607

"""See VersionedFile.get_sha1()."""

608

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

609

record_map = self._get_record_map(version_ids)

610

# record entry 2 is the 'digest'.

611

return [record_map[v][2] for v in version_ids]

612

613

@staticmethod

614

def get_suffixes():

615

"""See VersionedFile.get_suffixes()."""

616

return [DATA_SUFFIX, INDEX_SUFFIX]

617

618

def has_ghost(self, version_id):

619

"""True if there is a ghost reference in the file to version_id."""

620

version_id = osutils.safe_revision_id(version_id)

621

# maybe we have it

622

if self.has_version(version_id):

623

return False

624

# optimisable if needed by memoising the _ghosts set.

625

items = self._index.get_graph()

626

for node, parents in items:

627

for parent in parents:

628

if parent not in self._index._cache:

629

if parent == version_id:

630

return True

631

return False

632

633

def versions(self):

634

"""See VersionedFile.versions."""

635

return self._index.get_versions()

636

637

def has_version(self, version_id):

638

"""See VersionedFile.has_version."""

639

version_id = osutils.safe_revision_id(version_id)

640

return self._index.has_version(version_id)

641

642

__contains__ = has_version

643

644

def _merge_annotations(self, content, parents, parent_texts={},

645

delta=None, annotated=None,

646

left_matching_blocks=None):

647

"""Merge annotations for content. This is done by comparing

648

the annotations based on changed to the text.

649

"""

650

if left_matching_blocks is not None:

651

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

652

else:

653

delta_seq = None

654

if annotated:

655

for parent_id in parents:

656

merge_content = self._get_content(parent_id, parent_texts)

657

if (parent_id == parents[0] and delta_seq is not None):

658

seq = delta_seq

659

else:

660

seq = patiencediff.PatienceSequenceMatcher(

661

None, merge_content.text(), content.text())

662

for i, j, n in seq.get_matching_blocks():

663

if n == 0:

664

continue

665

# this appears to copy (origin, text) pairs across to the

666

# new content for any line that matches the last-checked

667

# parent.

668

content._lines[j:j+n] = merge_content._lines[i:i+n]

669

if delta:

670

if delta_seq is None:

671

reference_content = self._get_content(parents[0], parent_texts)

672

new_texts = content.text()

673

old_texts = reference_content.text()

674

delta_seq = patiencediff.PatienceSequenceMatcher(

675

None, old_texts, new_texts)

676

return self._make_line_delta(delta_seq, content)

677

678

def _make_line_delta(self, delta_seq, new_content):

679

"""Generate a line delta from delta_seq and new_content."""

680

diff_hunks = []

681

for op in delta_seq.get_opcodes():

682

if op[0] == 'equal':

683

continue

684

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

685

return diff_hunks

686

687

def _get_components_positions(self, version_ids):

688

"""Produce a map of position data for the components of versions.

689

690

This data is intended to be used for retrieving the knit records.

691

692

A dict of version_id to (method, data_pos, data_size, next) is

693

returned.

694

method is the way referenced data should be applied.

695

data_pos is the position of the data in the knit.

696

data_size is the size of the data in the knit.

697

next is the build-parent of the version, or None for fulltexts.

698

"""

699

component_data = {}

700

for version_id in version_ids:

701

cursor = version_id

702

703

while cursor is not None and cursor not in component_data:

704

method = self._index.get_method(cursor)

705

if method == 'fulltext':

706

next = None

707

else:

708

next = self.get_parents(cursor)[0]

709

index_memo = self._index.get_position(cursor)

710

component_data[cursor] = (method, index_memo, next)

711

cursor = next

712

return component_data

713

714

def _get_content(self, version_id, parent_texts={}):

715

"""Returns a content object that makes up the specified

716

version."""

717

if not self.has_version(version_id):

718

raise RevisionNotPresent(version_id, self.filename)

719

720

cached_version = parent_texts.get(version_id, None)

721

if cached_version is not None:

722

return cached_version

723

724

text_map, contents_map = self._get_content_maps([version_id])

725

return contents_map[version_id]

726

727

def _check_versions_present(self, version_ids):

728

"""Check that all specified versions are present."""

729

self._index.check_versions_present(version_ids)

730

731

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

732

"""See VersionedFile.add_lines_with_ghosts()."""

733

self._check_add(version_id, lines)

734

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

735

736

def _add_lines(self, version_id, parents, lines, parent_texts,

737

left_matching_blocks=None):

738

"""See VersionedFile.add_lines."""

739

self._check_add(version_id, lines)

740

self._check_versions_present(parents)

741

return self._add(version_id, lines[:], parents, self.delta,

742

parent_texts, left_matching_blocks)

743

744

def _check_add(self, version_id, lines):

745

"""check that version_id and lines are safe to add."""

746

assert self.writable, "knit is not opened for write"

747

### FIXME escape. RBC 20060228

748

if contains_whitespace(version_id):

749

raise InvalidRevisionId(version_id, self.filename)

750

self.check_not_reserved_id(version_id)

751

if self.has_version(version_id):

752

raise RevisionAlreadyPresent(version_id, self.filename)

753

self._check_lines_not_unicode(lines)

754

self._check_lines_are_lines(lines)

755

756

def _add(self, version_id, lines, parents, delta, parent_texts,

757

left_matching_blocks=None):

758

"""Add a set of lines on top of version specified by parents.

759

760

If delta is true, compress the text as a line-delta against

761

the first parent.

762

763

Any versions not present will be converted into ghosts.

764

"""

765

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

766

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

767

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

768

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

769

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

770

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

771

# +1383 0 8.0370 8.0370 +<len>

772

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

773

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

774

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

775

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

776

777

present_parents = []

778

ghosts = []

779

if parent_texts is None:

780

parent_texts = {}

781

for parent in parents:

782

if not self.has_version(parent):

783

ghosts.append(parent)

784

else:

785

present_parents.append(parent)

786

787

if delta and not len(present_parents):

788

delta = False

789

790

digest = sha_strings(lines)

791

options = []

792

if lines:

793

if lines[-1][-1] != '\n':

794

options.append('no-eol')

795

lines[-1] = lines[-1] + '\n'

796

797

if len(present_parents) and delta:

798

# To speed the extract of texts the delta chain is limited

799

# to a fixed number of deltas. This should minimize both

800

# I/O and the time spend applying deltas.

801

delta = self._check_should_delta(present_parents)

802

803

assert isinstance(version_id, str)

804

lines = self.factory.make(lines, version_id)

805

if delta or (self.factory.annotated and len(present_parents) > 0):

806

# Merge annotations from parent texts if so is needed.

807

delta_hunks = self._merge_annotations(lines, present_parents,

808

parent_texts, delta, self.factory.annotated,

809

left_matching_blocks)

810

811

if delta:

812

options.append('line-delta')

813

store_lines = self.factory.lower_line_delta(delta_hunks)

814

else:

815

options.append('fulltext')

816

store_lines = self.factory.lower_fulltext(lines)

817

818

access_memo = self._data.add_record(version_id, digest, store_lines)

819

self._index.add_version(version_id, options, access_memo, parents)

820

return lines

821

822

def check(self, progress_bar=None):

823

"""See VersionedFile.check()."""

824

825

def _clone_text(self, new_version_id, old_version_id, parents):

826

"""See VersionedFile.clone_text()."""

827

# FIXME RBC 20060228 make fast by only inserting an index with null

828

# delta.

829

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

830

831

def get_lines(self, version_id):

832

"""See VersionedFile.get_lines()."""

833

return self.get_line_list([version_id])[0]

834

835

def _get_record_map(self, version_ids):

836

"""Produce a dictionary of knit records.

837

838

The keys are version_ids, the values are tuples of (method, content,

839

digest, next).

840

method is the way the content should be applied.

841

content is a KnitContent object.

842

digest is the SHA1 digest of this version id after all steps are done

843

next is the build-parent of the version, i.e. the leftmost ancestor.

844

If the method is fulltext, next will be None.

845

"""

846

position_map = self._get_components_positions(version_ids)

847

# c = component_id, m = method, i_m = index_memo, n = next

848

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

849

record_map = {}

850

for component_id, content, digest in \

851

self._data.read_records_iter(records):

852

method, index_memo, next = position_map[component_id]

853

record_map[component_id] = method, content, digest, next

854

855

return record_map

856

857

def get_text(self, version_id):

858

"""See VersionedFile.get_text"""

859

return self.get_texts([version_id])[0]

860

861

def get_texts(self, version_ids):

862

return [''.join(l) for l in self.get_line_list(version_ids)]

863

864

def get_line_list(self, version_ids):

865

"""Return the texts of listed versions as a list of strings."""

866

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

867

for version_id in version_ids:

868

self.check_not_reserved_id(version_id)

869

text_map, content_map = self._get_content_maps(version_ids)

870

return [text_map[v] for v in version_ids]

871

872

_get_lf_split_line_list = get_line_list

873

874

def _get_content_maps(self, version_ids):

875

"""Produce maps of text and KnitContents

876

877

:return: (text_map, content_map) where text_map contains the texts for

878

the requested versions and content_map contains the KnitContents.

879

Both dicts take version_ids as their keys.

880

"""

881

for version_id in version_ids:

882

if not self.has_version(version_id):

883

raise RevisionNotPresent(version_id, self.filename)

884

record_map = self._get_record_map(version_ids)

885

886

text_map = {}

887

content_map = {}

888

final_content = {}

889

for version_id in version_ids:

890

components = []

891

cursor = version_id

892

while cursor is not None:

893

method, data, digest, next = record_map[cursor]

894

components.append((cursor, method, data, digest))

895

if cursor in content_map:

896

break

897

cursor = next

898

899

content = None

900

for component_id, method, data, digest in reversed(components):

901

if component_id in content_map:

902

content = content_map[component_id]

903

else:

904

if method == 'fulltext':

905

assert content is None

906

content = self.factory.parse_fulltext(data, version_id)

907

elif method == 'line-delta':

908

delta = self.factory.parse_line_delta(data, version_id)

909

content = content.copy()

910

content._lines = self._apply_delta(content._lines,

911

delta)

912

content_map[component_id] = content

913

914

if 'no-eol' in self._index.get_options(version_id):

915

content = content.copy()

916

line = content._lines[-1][1].rstrip('\n')

917

content._lines[-1] = (content._lines[-1][0], line)

918

final_content[version_id] = content

919

920

# digest here is the digest from the last applied component.

921

text = content.text()

922

if sha_strings(text) != digest:

923

raise KnitCorrupt(self.filename,

924

'sha-1 does not match %s' % version_id)

925

926

text_map[version_id] = text

927

return text_map, final_content

928

929

def iter_lines_added_or_present_in_versions(self, version_ids=None,

930

pb=None):

931

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

932

if version_ids is None:

933

version_ids = self.versions()

934

else:

935

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

936

if pb is None:

937

pb = progress.DummyProgress()

938

# we don't care about inclusions, the caller cares.

939

# but we need to setup a list of records to visit.

940

# we need version_id, position, length

941

version_id_records = []

942

requested_versions = set(version_ids)

943

# filter for available versions

944

for version_id in requested_versions:

945

if not self.has_version(version_id):

946

raise RevisionNotPresent(version_id, self.filename)

947

# get a in-component-order queue:

948

for version_id in self.versions():

949

if version_id in requested_versions:

950

index_memo = self._index.get_position(version_id)

951

version_id_records.append((version_id, index_memo))

952

953

total = len(version_id_records)

954

for version_idx, (version_id, data, sha_value) in \

955

enumerate(self._data.read_records_iter(version_id_records)):

956

pb.update('Walking content.', version_idx, total)

957

method = self._index.get_method(version_id)

958

959

assert method in ('fulltext', 'line-delta')

960

if method == 'fulltext':

961

line_iterator = self.factory.get_fulltext_content(data)

962

else:

963

line_iterator = self.factory.get_linedelta_content(data)

964

for line in line_iterator:

965

yield line

966

967

pb.update('Walking content.', total, total)

968

969

def iter_parents(self, version_ids):

970

"""Iterate through the parents for many version ids.

971

972

:param version_ids: An iterable yielding version_ids.

973

:return: An iterator that yields (version_id, parents). Requested

974

version_ids not present in the versioned file are simply skipped.

975

The order is undefined, allowing for different optimisations in

976

the underlying implementation.

977

"""

978

version_ids = [osutils.safe_revision_id(version_id) for

979

version_id in version_ids]

980

return self._index.iter_parents(version_ids)

981

982

def num_versions(self):

983

"""See VersionedFile.num_versions()."""

984

return self._index.num_versions()

985

986

__len__ = num_versions

987

988

def annotate_iter(self, version_id):

989

"""See VersionedFile.annotate_iter."""

990

version_id = osutils.safe_revision_id(version_id)

991

content = self._get_content(version_id)

992

for origin, text in content.annotate_iter():

993

yield origin, text

994

995

def get_parents(self, version_id):

996

"""See VersionedFile.get_parents."""

997

# perf notes:

998

# optimism counts!

999

# 52554 calls in 1264 872 internal down from 3674

1000

version_id = osutils.safe_revision_id(version_id)

1001

try:

1002

return self._index.get_parents(version_id)

1003

except KeyError:

1004

raise RevisionNotPresent(version_id, self.filename)

1005

1006

def get_parents_with_ghosts(self, version_id):

1007

"""See VersionedFile.get_parents."""

1008

version_id = osutils.safe_revision_id(version_id)

1009

try:

1010

return self._index.get_parents_with_ghosts(version_id)

1011

except KeyError:

1012

raise RevisionNotPresent(version_id, self.filename)

1013

1014

def get_ancestry(self, versions, topo_sorted=True):

1015

"""See VersionedFile.get_ancestry."""

1016

if isinstance(versions, basestring):

1017

versions = [versions]

1018

if not versions:

1019

return []

1020

versions = [osutils.safe_revision_id(v) for v in versions]

1021

return self._index.get_ancestry(versions, topo_sorted)

1022

1023

def get_ancestry_with_ghosts(self, versions):

1024

"""See VersionedFile.get_ancestry_with_ghosts."""

1025

if isinstance(versions, basestring):

1026

versions = [versions]

1027

if not versions:

1028

return []

1029

versions = [osutils.safe_revision_id(v) for v in versions]

1030

return self._index.get_ancestry_with_ghosts(versions)

1031

1032

#@deprecated_method(zero_eight)

1033

def walk(self, version_ids):

1034

"""See VersionedFile.walk."""

1035

# We take the short path here, and extract all relevant texts

1036

# and put them in a weave and let that do all the work. Far

1037

# from optimal, but is much simpler.

1038

# FIXME RB 20060228 this really is inefficient!

1039

from bzrlib.weave import Weave

1040

1041

w = Weave(self.filename)

1042

ancestry = set(self.get_ancestry(version_ids, topo_sorted=False))

1043

sorted_graph = topo_sort(self._index.get_graph())

1044

version_list = [vid for vid in sorted_graph if vid in ancestry]

1045

1046

for version_id in version_list:

1047

lines = self.get_lines(version_id)

1048

w.add_lines(version_id, self.get_parents(version_id), lines)

1049

1050

for lineno, insert_id, dset, line in w.walk(version_ids):

1051

yield lineno, insert_id, dset, line

1052

1053

def plan_merge(self, ver_a, ver_b):

1054

"""See VersionedFile.plan_merge."""

1055

ver_a = osutils.safe_revision_id(ver_a)

1056

ver_b = osutils.safe_revision_id(ver_b)

1057

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1058

1059

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1060

annotated_a = self.annotate(ver_a)

1061

annotated_b = self.annotate(ver_b)

1062

return merge._plan_annotate_merge(annotated_a, annotated_b,

1063

ancestors_a, ancestors_b)

1064

1065

1066

class _KnitComponentFile(object):

1067

"""One of the files used to implement a knit database"""

1068

1069

def __init__(self, transport, filename, mode, file_mode=None,

1070

create_parent_dir=False, dir_mode=None):

1071

self._transport = transport

1072

self._filename = filename

1073

self._mode = mode

1074

self._file_mode = file_mode

1075

self._dir_mode = dir_mode

1076

self._create_parent_dir = create_parent_dir

1077

self._need_to_create = False

1078

1079

def _full_path(self):

1080

"""Return the full path to this file."""

1081

return self._transport.base + self._filename

1082

1083

def check_header(self, fp):

1084

line = fp.readline()

1085

if line == '':

1086

# An empty file can actually be treated as though the file doesn't

1087

# exist yet.

1088

raise errors.NoSuchFile(self._full_path())

1089

if line != self.HEADER:

1090

raise KnitHeaderError(badline=line,

1091

filename=self._transport.abspath(self._filename))

1092

1093

def __repr__(self):

1094

return '%s(%s)' % (self.__class__.__name__, self._filename)

1095

1096

1097

class _KnitIndex(_KnitComponentFile):

1098

"""Manages knit index file.

1099

1100

The index is already kept in memory and read on startup, to enable

1101

fast lookups of revision information. The cursor of the index

1102

file is always pointing to the end, making it easy to append

1103

entries.

1104

1105

_cache is a cache for fast mapping from version id to a Index

1106

object.

1107

1108

_history is a cache for fast mapping from indexes to version ids.

1109

1110

The index data format is dictionary compressed when it comes to

1111

parent references; a index entry may only have parents that with a

1112

lover index number. As a result, the index is topological sorted.

1113

1114

Duplicate entries may be written to the index for a single version id

1115

if this is done then the latter one completely replaces the former:

1116

this allows updates to correct version and parent information.

1117

Note that the two entries may share the delta, and that successive

1118

annotations and references MUST point to the first entry.

1119

1120

The index file on disc contains a header, followed by one line per knit

1121

record. The same revision can be present in an index file more than once.

1122

The first occurrence gets assigned a sequence number starting from 0.

1123

1124

The format of a single line is

1125

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1126

REVISION_ID is a utf8-encoded revision id

1127

FLAGS is a comma separated list of flags about the record. Values include

1128

no-eol, line-delta, fulltext.

1129

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1130

that the the compressed data starts at.

1131

LENGTH is the ascii representation of the length of the data file.

1132

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1133

REVISION_ID.

1134

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1135

revision id already in the knit that is a parent of REVISION_ID.

1136

The ' :' marker is the end of record marker.

1137

1138

partial writes:

1139

when a write is interrupted to the index file, it will result in a line

1140

that does not end in ' :'. If the ' :' is not present at the end of a line,

1141

or at the end of the file, then the record that is missing it will be

1142

ignored by the parser.

1143

1144

When writing new records to the index file, the data is preceded by '\n'

1145

to ensure that records always start on new lines even if the last write was

1146

interrupted. As a result its normal for the last line in the index to be

1147

missing a trailing newline. One can be added with no harmful effects.

1148

"""

1149

1150

HEADER = "# bzr knit index 8\n"

1151

1152

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1153

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1154

1155

def _cache_version(self, version_id, options, pos, size, parents):

1156

"""Cache a version record in the history array and index cache.

1157

1158

This is inlined into _load_data for performance. KEEP IN SYNC.

1159

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1160

indexes).

1161

"""

1162

# only want the _history index to reference the 1st index entry

1163

# for version_id

1164

if version_id not in self._cache:

1165

index = len(self._history)

1166

self._history.append(version_id)

1167

else:

1168

index = self._cache[version_id][5]

1169

self._cache[version_id] = (version_id,

1170

options,

1171

pos,

1172

size,

1173

parents,

1174

index)

1175

1176

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1177

create_parent_dir=False, delay_create=False, dir_mode=None):

1178

_KnitComponentFile.__init__(self, transport, filename, mode,

1179

file_mode=file_mode,

1180

create_parent_dir=create_parent_dir,

1181

dir_mode=dir_mode)

1182

self._cache = {}

1183

# position in _history is the 'official' index for a revision

1184

# but the values may have come from a newer entry.

1185

# so - wc -l of a knit index is != the number of unique names

1186

# in the knit.

1187

self._history = []

1188

try:

1189

fp = self._transport.get(self._filename)

1190

try:

1191

# _load_data may raise NoSuchFile if the target knit is

1192

# completely empty.

1193

_load_data(self, fp)

1194

finally:

1195

fp.close()

1196

except NoSuchFile:

1197

if mode != 'w' or not create:

1198

raise

1199

elif delay_create:

1200

self._need_to_create = True

1201

else:

1202

self._transport.put_bytes_non_atomic(

1203

self._filename, self.HEADER, mode=self._file_mode)

1204

1205

def get_graph(self):

1206

"""Return a list of the node:parents lists from this knit index."""

1207

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1208

1209

def get_ancestry(self, versions, topo_sorted=True):

1210

"""See VersionedFile.get_ancestry."""

1211

# get a graph of all the mentioned versions:

1212

graph = {}

1213

pending = set(versions)

1214

cache = self._cache

1215

while pending:

1216

version = pending.pop()

1217

# trim ghosts

1218

try:

1219

parents = [p for p in cache[version][4] if p in cache]

1220

except KeyError:

1221

raise RevisionNotPresent(version, self._filename)

1222

# if not completed and not a ghost

1223

pending.update([p for p in parents if p not in graph])

1224

graph[version] = parents

1225

if not topo_sorted:

1226

return graph.keys()

1227

return topo_sort(graph.items())

1228

1229

def get_ancestry_with_ghosts(self, versions):

1230

"""See VersionedFile.get_ancestry_with_ghosts."""

1231

# get a graph of all the mentioned versions:

1232

self.check_versions_present(versions)

1233

cache = self._cache

1234

graph = {}

1235

pending = set(versions)

1236

while pending:

1237

version = pending.pop()

1238

try:

1239

parents = cache[version][4]

1240

except KeyError:

1241

# ghost, fake it

1242

graph[version] = []

1243

else:

1244

# if not completed

1245

pending.update([p for p in parents if p not in graph])

1246

graph[version] = parents

1247

return topo_sort(graph.items())

1248

1249

def iter_parents(self, version_ids):

1250

"""Iterate through the parents for many version ids.

1251

1252

:param version_ids: An iterable yielding version_ids.

1253

:return: An iterator that yields (version_id, parents). Requested

1254

version_ids not present in the versioned file are simply skipped.

1255

The order is undefined, allowing for different optimisations in

1256

the underlying implementation.

1257

"""

1258

for version_id in version_ids:

1259

try:

1260

yield version_id, tuple(self.get_parents(version_id))

1261

except KeyError:

1262

pass

1263

1264

def num_versions(self):

1265

return len(self._history)

1266

1267

__len__ = num_versions

1268

1269

def get_versions(self):

1270

"""Get all the versions in the file. not topologically sorted."""

1271

return self._history

1272

1273

def _version_list_to_index(self, versions):

1274

result_list = []

1275

cache = self._cache

1276

for version in versions:

1277

if version in cache:

1278

# -- inlined lookup() --

1279

result_list.append(str(cache[version][5]))

1280

# -- end lookup () --

1281

else:

1282

result_list.append('.' + version)

1283

return ' '.join(result_list)

1284

1285

def add_version(self, version_id, options, index_memo, parents):

1286

"""Add a version record to the index."""

1287

self.add_versions(((version_id, options, index_memo, parents),))

1288

1289

def add_versions(self, versions):

1290

"""Add multiple versions to the index.

1291

1292

:param versions: a list of tuples:

1293

(version_id, options, pos, size, parents).

1294

"""

1295

lines = []

1296

orig_history = self._history[:]

1297

orig_cache = self._cache.copy()

1298

1299

try:

1300

for version_id, options, (index, pos, size), parents in versions:

1301

line = "\n%s %s %s %s %s :" % (version_id,

1302

','.join(options),

1303

pos,

1304

size,

1305

self._version_list_to_index(parents))

1306

assert isinstance(line, str), \

1307

'content must be utf-8 encoded: %r' % (line,)

1308

lines.append(line)

1309

self._cache_version(version_id, options, pos, size, parents)

1310

if not self._need_to_create:

1311

self._transport.append_bytes(self._filename, ''.join(lines))

1312

else:

1313

sio = StringIO()

1314

sio.write(self.HEADER)

1315

sio.writelines(lines)

1316

sio.seek(0)

1317

self._transport.put_file_non_atomic(self._filename, sio,

1318

create_parent_dir=self._create_parent_dir,

1319

mode=self._file_mode,

1320

dir_mode=self._dir_mode)

1321

self._need_to_create = False

1322

except:

1323

# If any problems happen, restore the original values and re-raise

1324

self._history = orig_history

1325

self._cache = orig_cache

1326

raise

1327

1328

def has_version(self, version_id):

1329

"""True if the version is in the index."""

1330

return version_id in self._cache

1331

1332

def get_position(self, version_id):

1333

"""Return details needed to access the version.

1334

1335

.kndx indices do not support split-out data, so return None for the

1336

index field.

1337

1338

:return: a tuple (None, data position, size) to hand to the access

1339

logic to get the record.

1340

"""

1341

entry = self._cache[version_id]

1342

return None, entry[2], entry[3]

1343

1344

def get_method(self, version_id):

1345

"""Return compression method of specified version."""

1346

options = self._cache[version_id][1]

1347

if 'fulltext' in options:

1348

return 'fulltext'

1349

else:

1350

if 'line-delta' not in options:

1351

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1352

return 'line-delta'

1353

1354

def get_options(self, version_id):

1355

"""Return a string represention options.

1356

1357

e.g. foo,bar

1358

"""

1359

return self._cache[version_id][1]

1360

1361

def get_parents(self, version_id):

1362

"""Return parents of specified version ignoring ghosts."""

1363

return [parent for parent in self._cache[version_id][4]

1364

if parent in self._cache]

1365

1366

def get_parents_with_ghosts(self, version_id):

1367

"""Return parents of specified version with ghosts."""

1368

return self._cache[version_id][4]

1369

1370

def check_versions_present(self, version_ids):

1371

"""Check that all specified versions are present."""

1372

cache = self._cache

1373

for version_id in version_ids:

1374

if version_id not in cache:

1375

raise RevisionNotPresent(version_id, self._filename)

1376

1377

1378

class KnitGraphIndex(object):

1379

"""A knit index that builds on GraphIndex."""

1380

1381

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1382

"""Construct a KnitGraphIndex on a graph_index.

1383

1384

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1385

:param deltas: Allow delta-compressed records.

1386

:param add_callback: If not None, allow additions to the index and call

1387

this callback with a list of added GraphIndex nodes:

1388

[(node, value, node_refs), ...]

1389

:param parents: If True, record knits parents, if not do not record

1390

parents.

1391

"""

1392

self._graph_index = graph_index

1393

self._deltas = deltas

1394

self._add_callback = add_callback

1395

self._parents = parents

1396

if deltas and not parents:

1397

raise KnitCorrupt(self, "Cannot do delta compression without "

1398

"parent tracking.")

1399

1400

def _get_entries(self, keys, check_present=False):

1401

"""Get the entries for keys.

1402

1403

:param keys: An iterable of index keys, - 1-tuples.

1404

"""

1405

keys = set(keys)

1406

found_keys = set()

1407

if self._parents:

1408

for node in self._graph_index.iter_entries(keys):

1409

yield node

1410

found_keys.add(node[1])

1411

else:

1412

# adapt parentless index to the rest of the code.

1413

for node in self._graph_index.iter_entries(keys):

1414

yield node[0], node[1], node[2], ()

1415

found_keys.add(node[1])

1416

if check_present:

1417

missing_keys = keys.difference(found_keys)

1418

if missing_keys:

1419

raise RevisionNotPresent(missing_keys.pop(), self)

1420

1421

def _present_keys(self, version_ids):

1422

return set([

1423

node[1] for node in self._get_entries(version_ids)])

1424

1425

def _parentless_ancestry(self, versions):

1426

"""Honour the get_ancestry API for parentless knit indices."""

1427

wanted_keys = self._version_ids_to_keys(versions)

1428

present_keys = self._present_keys(wanted_keys)

1429

missing = set(wanted_keys).difference(present_keys)

1430

if missing:

1431

raise RevisionNotPresent(missing.pop(), self)

1432

return list(self._keys_to_version_ids(present_keys))

1433

1434

def get_ancestry(self, versions, topo_sorted=True):

1435

"""See VersionedFile.get_ancestry."""

1436

if not self._parents:

1437

return self._parentless_ancestry(versions)

1438

# XXX: This will do len(history) index calls - perhaps

1439

# it should be altered to be a index core feature?

1440

# get a graph of all the mentioned versions:

1441

graph = {}

1442

ghosts = set()

1443

versions = self._version_ids_to_keys(versions)

1444

pending = set(versions)

1445

while pending:

1446

# get all pending nodes

1447

this_iteration = pending

1448

new_nodes = self._get_entries(this_iteration)

1449

found = set()

1450

pending = set()

1451

for (index, key, value, node_refs) in new_nodes:

1452

# dont ask for ghosties - otherwise

1453

# we we can end up looping with pending

1454

# being entirely ghosted.

1455

graph[key] = [parent for parent in node_refs[0]

1456

if parent not in ghosts]

1457

# queue parents

1458

for parent in graph[key]:

1459

# dont examine known nodes again

1460

if parent in graph:

1461

continue

1462

pending.add(parent)

1463

found.add(key)

1464

ghosts.update(this_iteration.difference(found))

1465

if versions.difference(graph):

1466

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1467

if topo_sorted:

1468

result_keys = topo_sort(graph.items())

1469

else:

1470

result_keys = graph.iterkeys()

1471

return [key[0] for key in result_keys]

1472

1473

def get_ancestry_with_ghosts(self, versions):

1474

"""See VersionedFile.get_ancestry."""

1475

if not self._parents:

1476

return self._parentless_ancestry(versions)

1477

# XXX: This will do len(history) index calls - perhaps

1478

# it should be altered to be a index core feature?

1479

# get a graph of all the mentioned versions:

1480

graph = {}

1481

versions = self._version_ids_to_keys(versions)

1482

pending = set(versions)

1483

while pending:

1484

# get all pending nodes

1485

this_iteration = pending

1486

new_nodes = self._get_entries(this_iteration)

1487

pending = set()

1488

for (index, key, value, node_refs) in new_nodes:

1489

graph[key] = node_refs[0]

1490

# queue parents

1491

for parent in graph[key]:

1492

# dont examine known nodes again

1493

if parent in graph:

1494

continue

1495

pending.add(parent)

1496

missing_versions = this_iteration.difference(graph)

1497

missing_needed = versions.intersection(missing_versions)

1498

if missing_needed:

1499

raise RevisionNotPresent(missing_needed.pop(), self)

1500

for missing_version in missing_versions:

1501

# add a key, no parents

1502

graph[missing_version] = []

1503

pending.discard(missing_version) # don't look for it

1504

result_keys = topo_sort(graph.items())

1505

return [key[0] for key in result_keys]

1506

1507

def get_graph(self):

1508

"""Return a list of the node:parents lists from this knit index."""

1509

if not self._parents:

1510

return [(key, ()) for key in self.get_versions()]

1511

result = []

1512

for index, key, value, refs in self._graph_index.iter_all_entries():

1513

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1514

return result

1515

1516

def iter_parents(self, version_ids):

1517

"""Iterate through the parents for many version ids.

1518

1519

:param version_ids: An iterable yielding version_ids.

1520

:return: An iterator that yields (version_id, parents). Requested

1521

version_ids not present in the versioned file are simply skipped.

1522

The order is undefined, allowing for different optimisations in

1523

the underlying implementation.

1524

"""

1525

if self._parents:

1526

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1527

all_parents = set()

1528

present_parents = set()

1529

for node in all_nodes:

1530

all_parents.update(node[3][0])

1531

# any node we are querying must be present

1532

present_parents.add(node[1])

1533

unknown_parents = all_parents.difference(present_parents)

1534

present_parents.update(self._present_keys(unknown_parents))

1535

for node in all_nodes:

1536

parents = []

1537

for parent in node[3][0]:

1538

if parent in present_parents:

1539

parents.append(parent[0])

1540

yield node[1][0], tuple(parents)

1541

else:

1542

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1543

yield node[1][0], ()

1544

1545

def num_versions(self):

1546

return len(list(self._graph_index.iter_all_entries()))

1547

1548

__len__ = num_versions

1549

1550

def get_versions(self):

1551

"""Get all the versions in the file. not topologically sorted."""

1552

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1553

1554

def has_version(self, version_id):

1555

"""True if the version is in the index."""

1556

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1557

1558

def _keys_to_version_ids(self, keys):

1559

return tuple(key[0] for key in keys)

1560

1561

def get_position(self, version_id):

1562

"""Return details needed to access the version.

1563

1564

:return: a tuple (index, data position, size) to hand to the access

1565

logic to get the record.

1566

"""

1567

node = self._get_node(version_id)

1568

bits = node[2][1:].split(' ')

1569

return node[0], int(bits[0]), int(bits[1])

1570

1571

def get_method(self, version_id):

1572

"""Return compression method of specified version."""

1573

if not self._deltas:

1574

return 'fulltext'

1575

return self._parent_compression(self._get_node(version_id)[3][1])

1576

1577

def _parent_compression(self, reference_list):

1578

# use the second reference list to decide if this is delta'd or not.

1579

if len(reference_list):

1580

return 'line-delta'

1581

else:

1582

return 'fulltext'

1583

1584

def _get_node(self, version_id):

1585

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1586

1587

def get_options(self, version_id):

1588

"""Return a string represention options.

1589

1590

e.g. foo,bar

1591

"""

1592

node = self._get_node(version_id)

1593

if not self._deltas:

1594

options = ['fulltext']

1595

else:

1596

options = [self._parent_compression(node[3][1])]

1597

if node[2][0] == 'N':

1598

options.append('no-eol')

1599

return options

1600

1601

def get_parents(self, version_id):

1602

"""Return parents of specified version ignoring ghosts."""

1603

parents = list(self.iter_parents([version_id]))

1604

if not parents:

1605

# missing key

1606

raise errors.RevisionNotPresent(version_id, self)

1607

return parents[0][1]

1608

1609

def get_parents_with_ghosts(self, version_id):

1610

"""Return parents of specified version with ghosts."""

1611

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1612

check_present=True))

1613

if not self._parents:

1614

return ()

1615

return self._keys_to_version_ids(nodes[0][3][0])

1616

1617

def check_versions_present(self, version_ids):

1618

"""Check that all specified versions are present."""

1619

keys = self._version_ids_to_keys(version_ids)

1620

present = self._present_keys(keys)

1621

missing = keys.difference(present)

1622

if missing:

1623

raise RevisionNotPresent(missing.pop(), self)

1624

1625

def add_version(self, version_id, options, access_memo, parents):

1626

"""Add a version record to the index."""

1627

return self.add_versions(((version_id, options, access_memo, parents),))

1628

1629

def add_versions(self, versions):

1630

"""Add multiple versions to the index.

1631

1632

This function does not insert data into the Immutable GraphIndex

1633

backing the KnitGraphIndex, instead it prepares data for insertion by

1634

the caller and checks that it is safe to insert then calls

1635

self._add_callback with the prepared GraphIndex nodes.

1636

1637

:param versions: a list of tuples:

1638

(version_id, options, pos, size, parents).

1639

"""

1640

if not self._add_callback:

1641

raise errors.ReadOnlyError(self)

1642

# we hope there are no repositories with inconsistent parentage

1643

# anymore.

1644

# check for dups

1645

1646

keys = {}

1647

for (version_id, options, access_memo, parents) in versions:

1648

index, pos, size = access_memo

1649

key = (version_id, )

1650

parents = tuple((parent, ) for parent in parents)

1651

if 'no-eol' in options:

1652

value = 'N'

1653

else:

1654

value = ' '

1655

value += "%d %d" % (pos, size)

1656

if not self._deltas:

1657

if 'line-delta' in options:

1658

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1659

if self._parents:

1660

if self._deltas:

1661

if 'line-delta' in options:

1662

node_refs = (parents, (parents[0],))

1663

else:

1664

node_refs = (parents, ())

1665

else:

1666

node_refs = (parents, )

1667

else:

1668

if parents:

1669

raise KnitCorrupt(self, "attempt to add node with parents "

1670

"in parentless index.")

1671

node_refs = ()

1672

keys[key] = (value, node_refs)

1673

present_nodes = self._get_entries(keys)

1674

for (index, key, value, node_refs) in present_nodes:

1675

if (value, node_refs) != keys[key]:

1676

raise KnitCorrupt(self, "inconsistent details in add_versions"

1677

": %s %s" % ((value, node_refs), keys[key]))

1678

del keys[key]

1679

result = []

1680

if self._parents:

1681

for key, (value, node_refs) in keys.iteritems():

1682

result.append((key, value, node_refs))

1683

else:

1684

for key, (value, node_refs) in keys.iteritems():

1685

result.append((key, value))

1686

self._add_callback(result)

1687

1688

def _version_ids_to_keys(self, version_ids):

1689

return set((version_id, ) for version_id in version_ids)

1690

1691

1692

class _KnitAccess(object):

1693

"""Access to knit records in a .knit file."""

1694

1695

def __init__(self, transport, filename, _file_mode, _dir_mode,

1696

_need_to_create, _create_parent_dir):

1697

"""Create a _KnitAccess for accessing and inserting data.

1698

1699

:param transport: The transport the .knit is located on.

1700

:param filename: The filename of the .knit.

1701

"""

1702

self._transport = transport

1703

self._filename = filename

1704

self._file_mode = _file_mode

1705

self._dir_mode = _dir_mode

1706

self._need_to_create = _need_to_create

1707

self._create_parent_dir = _create_parent_dir

1708

1709

def add_raw_records(self, sizes, raw_data):

1710

"""Add raw knit bytes to a storage area.

1711

1712

The data is spooled to whereever the access method is storing data.

1713

1714

:param sizes: An iterable containing the size of each raw data segment.

1715

:param raw_data: A bytestring containing the data.

1716

:return: A list of memos to retrieve the record later. Each memo is a

1717

tuple - (index, pos, length), where the index field is always None

1718

for the .knit access method.

1719

"""

1720

assert type(raw_data) == str, \

1721

'data must be plain bytes was %s' % type(raw_data)

1722

if not self._need_to_create:

1723

base = self._transport.append_bytes(self._filename, raw_data)

1724

else:

1725

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1726

create_parent_dir=self._create_parent_dir,

1727

mode=self._file_mode,

1728

dir_mode=self._dir_mode)

1729

self._need_to_create = False

1730

base = 0

1731

result = []

1732

for size in sizes:

1733

result.append((None, base, size))

1734

base += size

1735

return result

1736

1737

def create(self):

1738

"""IFF this data access has its own storage area, initialise it.

1739

1740

:return: None.

1741

"""

1742

self._transport.put_bytes_non_atomic(self._filename, '',

1743

mode=self._file_mode)

1744

1745

def open_file(self):

1746

"""IFF this data access can be represented as a single file, open it.

1747

1748

For knits that are not mapped to a single file on disk this will

1749

always return None.

1750

1751

:return: None or a file handle.

1752

"""

1753

try:

1754

return self._transport.get(self._filename)

1755

except NoSuchFile:

1756

pass

1757

return None

1758

1759

def get_raw_records(self, memos_for_retrieval):

1760

"""Get the raw bytes for a records.

1761

1762

:param memos_for_retrieval: An iterable containing the (index, pos,

1763

length) memo for retrieving the bytes. The .knit method ignores

1764

the index as there is always only a single file.

1765

:return: An iterator over the bytes of the records.

1766

"""

1767

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1768

for pos, data in self._transport.readv(self._filename, read_vector):

1769

yield data

1770

1771

1772

class _PackAccess(object):

1773

"""Access to knit records via a collection of packs."""

1774

1775

def __init__(self, index_to_packs, writer=None):

1776

"""Create a _PackAccess object.

1777

1778

:param index_to_packs: A dict mapping index objects to the transport

1779

and file names for obtaining data.

1780

:param writer: A tuple (pack.ContainerWriter, write_index) which

1781

contains the pack to write, and the index that reads from it will

1782

be associated with.

1783

"""

1784

if writer:

1785

self.container_writer = writer[0]

1786

self.write_index = writer[1]

1787

else:

1788

self.container_writer = None

1789

self.write_index = None

1790

self.indices = index_to_packs

1791

1792

def add_raw_records(self, sizes, raw_data):

1793

"""Add raw knit bytes to a storage area.

1794

1795

The data is spooled to the container writer in one bytes-record per

1796

raw data item.

1797

1798

:param sizes: An iterable containing the size of each raw data segment.

1799

:param raw_data: A bytestring containing the data.

1800

:return: A list of memos to retrieve the record later. Each memo is a

1801

tuple - (index, pos, length), where the index field is the

1802

write_index object supplied to the PackAccess object.

1803

"""

1804

assert type(raw_data) == str, \

1805

'data must be plain bytes was %s' % type(raw_data)

1806

result = []

1807

offset = 0

1808

for size in sizes:

1809

p_offset, p_length = self.container_writer.add_bytes_record(

1810

raw_data[offset:offset+size], [])

1811

offset += size

1812

result.append((self.write_index, p_offset, p_length))

1813

return result

1814

1815

def create(self):

1816

"""Pack based knits do not get individually created."""

1817

1818

def get_raw_records(self, memos_for_retrieval):

1819

"""Get the raw bytes for a records.

1820

1821

:param memos_for_retrieval: An iterable containing the (index, pos,

1822

length) memo for retrieving the bytes. The Pack access method

1823

looks up the pack to use for a given record in its index_to_pack

1824

map.

1825

:return: An iterator over the bytes of the records.

1826

"""

1827

# first pass, group into same-index requests

1828

request_lists = []

1829

current_index = None

1830

for (index, offset, length) in memos_for_retrieval:

1831

if current_index == index:

1832

current_list.append((offset, length))

1833

else:

1834

if current_index is not None:

1835

request_lists.append((current_index, current_list))

1836

current_index = index

1837

current_list = [(offset, length)]

1838

# handle the last entry

1839

if current_index is not None:

1840

request_lists.append((current_index, current_list))

1841

for index, offsets in request_lists:

1842

transport, path = self.indices[index]

1843

reader = pack.make_readv_reader(transport, path, offsets)

1844

for names, read_func in reader.iter_records():

1845

yield read_func(None)

1846

1847

def open_file(self):

1848

"""Pack based knits have no single file."""

1849

return None

1850

1851

def set_writer(self, writer, index, (transport, packname)):

1852

"""Set a writer to use for adding data."""

1853

self.indices[index] = (transport, packname)

1854

self.container_writer = writer

1855

self.write_index = index

1856

1857

1858

class _KnitData(object):

1859

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1860

1861

The KnitData class provides the logic for parsing and using knit records,

1862

making use of an access method for the low level read and write operations.

1863

"""

1864

1865

def __init__(self, access):

1866

"""Create a KnitData object.

1867

1868

:param access: The access method to use. Access methods such as

1869

_KnitAccess manage the insertion of raw records and the subsequent

1870

retrieval of the same.

1871

"""

1872

self._access = access

1873

self._checked = False

1874

# TODO: jam 20060713 conceptually, this could spill to disk

1875

# if the cached size gets larger than a certain amount

1876

# but it complicates the model a bit, so for now just use

1877

# a simple dictionary

1878

self._cache = {}

1879

self._do_cache = False

1880

1881

def enable_cache(self):

1882

"""Enable caching of reads."""

1883

self._do_cache = True

1884

1885

def clear_cache(self):

1886

"""Clear the record cache."""

1887

self._do_cache = False

1888

self._cache = {}

1889

1890

def _open_file(self):

1891

return self._access.open_file()

1892

1893

def _record_to_data(self, version_id, digest, lines):

1894

"""Convert version_id, digest, lines into a raw data block.

1895

1896

:return: (len, a StringIO instance with the raw data ready to read.)

1897

"""

1898

sio = StringIO()

1899

data_file = GzipFile(None, mode='wb', fileobj=sio)

1900

1901

assert isinstance(version_id, str)

1902

data_file.writelines(chain(

1903

["version %s %d %s\n" % (version_id,

1904

len(lines),

1905

digest)],

1906

lines,

1907

["end %s\n" % version_id]))

1908

data_file.close()

1909

length= sio.tell()

1910

1911

sio.seek(0)

1912

return length, sio

1913

1914

def add_raw_records(self, sizes, raw_data):

1915

"""Append a prepared record to the data file.

1916

1917

:param sizes: An iterable containing the size of each raw data segment.

1918

:param raw_data: A bytestring containing the data.

1919

:return: a list of index data for the way the data was stored.

1920

See the access method add_raw_records documentation for more

1921

details.

1922

"""

1923

return self._access.add_raw_records(sizes, raw_data)

1924

1925

def add_record(self, version_id, digest, lines):

1926

"""Write new text record to disk.

1927

1928

Returns index data for retrieving it later, as per add_raw_records.

1929

"""

1930

size, sio = self._record_to_data(version_id, digest, lines)

1931

result = self.add_raw_records([size], sio.getvalue())

1932

if self._do_cache:

1933

self._cache[version_id] = sio.getvalue()

1934

return result[0]

1935

1936

def _parse_record_header(self, version_id, raw_data):

1937

"""Parse a record header for consistency.

1938

1939

:return: the header and the decompressor stream.

1940

as (stream, header_record)

1941

"""

1942

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1943

try:

1944

rec = self._check_header(version_id, df.readline())

1945

except Exception, e:

1946

raise KnitCorrupt(self._access,

1947

"While reading {%s} got %s(%s)"

1948

% (version_id, e.__class__.__name__, str(e)))

1949

return df, rec

1950

1951

def _check_header(self, version_id, line):

1952

rec = line.split()

1953

if len(rec) != 4:

1954

raise KnitCorrupt(self._access,

1955

'unexpected number of elements in record header')

1956

if rec[1] != version_id:

1957

raise KnitCorrupt(self._access,

1958

'unexpected version, wanted %r, got %r'

1959

% (version_id, rec[1]))

1960

return rec

1961

1962

def _parse_record(self, version_id, data):

1963

# profiling notes:

1964

# 4168 calls in 2880 217 internal

1965

# 4168 calls to _parse_record_header in 2121

1966

# 4168 calls to readlines in 330

1967

df = GzipFile(mode='rb', fileobj=StringIO(data))

1968

1969

try:

1970

record_contents = df.readlines()

1971

except Exception, e:

1972

raise KnitCorrupt(self._access,

1973

"While reading {%s} got %s(%s)"

1974

% (version_id, e.__class__.__name__, str(e)))

1975

header = record_contents.pop(0)

1976

rec = self._check_header(version_id, header)

1977

1978

last_line = record_contents.pop()

1979

if len(record_contents) != int(rec[2]):

1980

raise KnitCorrupt(self._access,

1981

'incorrect number of lines %s != %s'

1982

' for version {%s}'

1983

% (len(record_contents), int(rec[2]),

1984

version_id))

1985

if last_line != 'end %s\n' % rec[1]:

1986

raise KnitCorrupt(self._access,

1987

'unexpected version end line %r, wanted %r'

1988

% (last_line, version_id))

1989

df.close()

1990

return record_contents, rec[3]

1991

1992

def read_records_iter_raw(self, records):

1993

"""Read text records from data file and yield raw data.

1994

1995

This unpacks enough of the text record to validate the id is

1996

as expected but thats all.

1997

"""

1998

# setup an iterator of the external records:

1999

# uses readv so nice and fast we hope.

2000

if len(records):

2001

# grab the disk data needed.

2002

if self._cache:

2003

# Don't check _cache if it is empty

2004

needed_offsets = [index_memo for version_id, index_memo

2005

in records

2006

if version_id not in self._cache]

2007

else:

2008

needed_offsets = [index_memo for version_id, index_memo

2009

in records]

2010

2011

raw_records = self._access.get_raw_records(needed_offsets)

2012

2013

for version_id, index_memo in records:

2014

if version_id in self._cache:

2015

# This data has already been validated

2016

data = self._cache[version_id]

2017

else:

2018

data = raw_records.next()

2019

if self._do_cache:

2020

self._cache[version_id] = data

2021

2022

# validate the header

2023

df, rec = self._parse_record_header(version_id, data)

2024

df.close()

2025

yield version_id, data

2026

2027

def read_records_iter(self, records):

2028

"""Read text records from data file and yield result.

2029

2030

The result will be returned in whatever is the fastest to read.

2031

Not by the order requested. Also, multiple requests for the same

2032

record will only yield 1 response.

2033

:param records: A list of (version_id, pos, len) entries

2034

:return: Yields (version_id, contents, digest) in the order

2035

read, not the order requested

2036

"""

2037

if not records:

2038

return

2039

2040

if self._cache:

2041

# Skip records we have alread seen

2042

yielded_records = set()

2043

needed_records = set()

2044

for record in records:

2045

if record[0] in self._cache:

2046

if record[0] in yielded_records:

2047

continue

2048

yielded_records.add(record[0])

2049

data = self._cache[record[0]]

2050

content, digest = self._parse_record(record[0], data)

2051

yield (record[0], content, digest)

2052

else:

2053

needed_records.add(record)

2054

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2055

else:

2056

needed_records = sorted(set(records), key=operator.itemgetter(1))

2057

2058

if not needed_records:

2059

return

2060

2061

# The transport optimizes the fetching as well

2062

# (ie, reads continuous ranges.)

2063

raw_data = self._access.get_raw_records(

2064

[index_memo for version_id, index_memo in needed_records])

2065

2066

for (version_id, index_memo), data in \

2067

izip(iter(needed_records), raw_data):

2068

content, digest = self._parse_record(version_id, data)

2069

if self._do_cache:

2070

self._cache[version_id] = data

2071

yield version_id, content, digest

2072

2073

def read_records(self, records):

2074

"""Read records into a dictionary."""

2075

components = {}

2076

for record_id, content, digest in \

2077

self.read_records_iter(records):

2078

components[record_id] = (content, digest)

2079

return components

2080

2081

2082

class InterKnit(InterVersionedFile):

2083

"""Optimised code paths for knit to knit operations."""

2084

2085

_matching_file_from_factory = KnitVersionedFile

2086

_matching_file_to_factory = KnitVersionedFile

2087

2088

@staticmethod

2089

def is_compatible(source, target):

2090

"""Be compatible with knits. """

2091

try:

2092

return (isinstance(source, KnitVersionedFile) and

2093

isinstance(target, KnitVersionedFile))

2094

except AttributeError:

2095

return False

2096

2097

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2098

"""See InterVersionedFile.join."""

2099

assert isinstance(self.source, KnitVersionedFile)

2100

assert isinstance(self.target, KnitVersionedFile)

2101

2102

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2103

2104

if not version_ids:

2105

return 0

2106

2107

pb = ui.ui_factory.nested_progress_bar()

2108

try:

2109

version_ids = list(version_ids)

2110

if None in version_ids:

2111

version_ids.remove(None)

2112

2113

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2114

this_versions = set(self.target._index.get_versions())

2115

needed_versions = self.source_ancestry - this_versions

2116

cross_check_versions = self.source_ancestry.intersection(this_versions)

2117

mismatched_versions = set()

2118

for version in cross_check_versions:

2119

# scan to include needed parents.

2120

n1 = set(self.target.get_parents_with_ghosts(version))

2121

n2 = set(self.source.get_parents_with_ghosts(version))

2122

if n1 != n2:

2123

# FIXME TEST this check for cycles being introduced works

2124

# the logic is we have a cycle if in our graph we are an

2125

# ancestor of any of the n2 revisions.

2126

for parent in n2:

2127

if parent in n1:

2128

# safe

2129

continue

2130

else:

2131

parent_ancestors = self.source.get_ancestry(parent)

2132

if version in parent_ancestors:

2133

raise errors.GraphCycleError([parent, version])

2134

# ensure this parent will be available later.

2135

new_parents = n2.difference(n1)

2136

needed_versions.update(new_parents.difference(this_versions))

2137

mismatched_versions.add(version)

2138

2139

if not needed_versions and not mismatched_versions:

2140

return 0

2141

full_list = topo_sort(self.source.get_graph())

2142

2143

version_list = [i for i in full_list if (not self.target.has_version(i)

2144

and i in needed_versions)]

2145

2146

# plan the join:

2147

copy_queue = []

2148

copy_queue_records = []

2149

copy_set = set()

2150

for version_id in version_list:

2151

options = self.source._index.get_options(version_id)

2152

parents = self.source._index.get_parents_with_ghosts(version_id)

2153

# check that its will be a consistent copy:

2154

for parent in parents:

2155

# if source has the parent, we must :

2156

# * already have it or

2157

# * have it scheduled already

2158

# otherwise we don't care

2159

assert (self.target.has_version(parent) or

2160

parent in copy_set or

2161

not self.source.has_version(parent))

2162

index_memo = self.source._index.get_position(version_id)

2163

copy_queue_records.append((version_id, index_memo))

2164

copy_queue.append((version_id, options, parents))

2165

copy_set.add(version_id)

2166

2167

# data suck the join:

2168

count = 0

2169

total = len(version_list)

2170

raw_datum = []

2171

raw_records = []

2172

for (version_id, raw_data), \

2173

(version_id2, options, parents) in \

2174

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2175

copy_queue):

2176

assert version_id == version_id2, 'logic error, inconsistent results'

2177

count = count + 1

2178

pb.update("Joining knit", count, total)

2179

raw_records.append((version_id, options, parents, len(raw_data)))

2180

raw_datum.append(raw_data)

2181

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2182

2183

for version in mismatched_versions:

2184

# FIXME RBC 20060309 is this needed?

2185

n1 = set(self.target.get_parents_with_ghosts(version))

2186

n2 = set(self.source.get_parents_with_ghosts(version))

2187

# write a combined record to our history preserving the current

2188

# parents as first in the list

2189

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2190

self.target.fix_parents(version, new_parents)

2191

return count

2192

finally:

2193

pb.finished()

2194

2195

2196

InterVersionedFile.register_optimiser(InterKnit)

2197

2198

2199

class WeaveToKnit(InterVersionedFile):

2200

"""Optimised code paths for weave to knit operations."""

2201

2202

_matching_file_from_factory = bzrlib.weave.WeaveFile

2203

_matching_file_to_factory = KnitVersionedFile

2204

2205

@staticmethod

2206

def is_compatible(source, target):

2207

"""Be compatible with weaves to knits."""

2208

try:

2209

return (isinstance(source, bzrlib.weave.Weave) and

2210

isinstance(target, KnitVersionedFile))

2211

except AttributeError:

2212

return False

2213

2214

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2215

"""See InterVersionedFile.join."""

2216

assert isinstance(self.source, bzrlib.weave.Weave)

2217

assert isinstance(self.target, KnitVersionedFile)

2218

2219

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2220

2221

if not version_ids:

2222

return 0

2223

2224

pb = ui.ui_factory.nested_progress_bar()

2225

try:

2226

version_ids = list(version_ids)

2227

2228

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2229

this_versions = set(self.target._index.get_versions())

2230

needed_versions = self.source_ancestry - this_versions

2231

cross_check_versions = self.source_ancestry.intersection(this_versions)

2232

mismatched_versions = set()

2233

for version in cross_check_versions:

2234

# scan to include needed parents.

2235

n1 = set(self.target.get_parents_with_ghosts(version))

2236

n2 = set(self.source.get_parents(version))

2237

# if all of n2's parents are in n1, then its fine.

2238

if n2.difference(n1):

2239

# FIXME TEST this check for cycles being introduced works

2240

# the logic is we have a cycle if in our graph we are an

2241

# ancestor of any of the n2 revisions.

2242

for parent in n2:

2243

if parent in n1:

2244

# safe

2245

continue

2246

else:

2247

parent_ancestors = self.source.get_ancestry(parent)

2248

if version in parent_ancestors:

2249

raise errors.GraphCycleError([parent, version])

2250

# ensure this parent will be available later.

2251

new_parents = n2.difference(n1)

2252

needed_versions.update(new_parents.difference(this_versions))

2253

mismatched_versions.add(version)

2254

2255

if not needed_versions and not mismatched_versions:

2256

return 0

2257

full_list = topo_sort(self.source.get_graph())

2258

2259

version_list = [i for i in full_list if (not self.target.has_version(i)

2260

and i in needed_versions)]

2261

2262

# do the join:

2263

count = 0

2264

total = len(version_list)

2265

for version_id in version_list:

2266

pb.update("Converting to knit", count, total)

2267

parents = self.source.get_parents(version_id)

2268

# check that its will be a consistent copy:

2269

for parent in parents:

2270

# if source has the parent, we must already have it

2271

assert (self.target.has_version(parent))

2272

self.target.add_lines(

2273

version_id, parents, self.source.get_lines(version_id))

2274

count = count + 1

2275

2276

for version in mismatched_versions:

2277

# FIXME RBC 20060309 is this needed?

2278

n1 = set(self.target.get_parents_with_ghosts(version))

2279

n2 = set(self.source.get_parents(version))

2280

# write a combined record to our history preserving the current

2281

# parents as first in the list

2282

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2283

self.target.fix_parents(version, new_parents)

2284

return count

2285

finally:

2286

pb.finished()

2287

2288

2289

InterVersionedFile.register_optimiser(WeaveToKnit)

2290

2291

2292

class KnitSequenceMatcher(difflib.SequenceMatcher):

2293

"""Knit tuned sequence matcher.

2294

2295

This is based on profiling of difflib which indicated some improvements

2296

for our usage pattern.

2297

"""

2298

2299

def find_longest_match(self, alo, ahi, blo, bhi):

2300

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2301

2302

If isjunk is not defined:

2303

2304

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2305

alo <= i <= i+k <= ahi

2306

blo <= j <= j+k <= bhi

2307

and for all (i',j',k') meeting those conditions,

2308

k >= k'

2309

i <= i'

2310

and if i == i', j <= j'

2311

2312

In other words, of all maximal matching blocks, return one that

2313

starts earliest in a, and of all those maximal matching blocks that

2314

start earliest in a, return the one that starts earliest in b.

2315

2316

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2317

>>> s.find_longest_match(0, 5, 0, 9)

2318

(0, 4, 5)

2319

2320

If isjunk is defined, first the longest matching block is

2321

determined as above, but with the additional restriction that no

2322

junk element appears in the block. Then that block is extended as

2323

far as possible by matching (only) junk elements on both sides. So

2324

the resulting block never matches on junk except as identical junk

2325

happens to be adjacent to an "interesting" match.

2326

2327

Here's the same example as before, but considering blanks to be

2328

junk. That prevents " abcd" from matching the " abcd" at the tail

2329

end of the second sequence directly. Instead only the "abcd" can

2330

match, and matches the leftmost "abcd" in the second sequence:

2331

2332

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2333

>>> s.find_longest_match(0, 5, 0, 9)

2334

(1, 0, 4)

2335

2336

If no blocks match, return (alo, blo, 0).

2337

2338

>>> s = SequenceMatcher(None, "ab", "c")

2339

>>> s.find_longest_match(0, 2, 0, 1)

2340

(0, 0, 0)

2341

"""

2342

2343

# CAUTION: stripping common prefix or suffix would be incorrect.

2344

# E.g.,

2345

# ab

2346

# acab

2347

# Longest matching block is "ab", but if common prefix is

2348

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2349

# strip, so ends up claiming that ab is changed to acab by

2350

# inserting "ca" in the middle. That's minimal but unintuitive:

2351

# "it's obvious" that someone inserted "ac" at the front.

2352

# Windiff ends up at the same place as diff, but by pairing up

2353

# the unique 'b's and then matching the first two 'a's.

2354

2355

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2356

besti, bestj, bestsize = alo, blo, 0

2357

# find longest junk-free match

2358

# during an iteration of the loop, j2len[j] = length of longest

2359

# junk-free match ending with a[i-1] and b[j]

2360

j2len = {}

2361

# nothing = []

2362

b2jget = b2j.get

2363

for i in xrange(alo, ahi):

2364

# look at all instances of a[i] in b; note that because

2365

# b2j has no junk keys, the loop is skipped if a[i] is junk

2366

j2lenget = j2len.get

2367

newj2len = {}

2368

2369

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2370

# following improvement

2371

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2372

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2373

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2374

# to

2375

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2376

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2377

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2378

2379

try:

2380

js = b2j[a[i]]

2381

except KeyError:

2382

pass

2383

else:

2384

for j in js:

2385

# a[i] matches b[j]

2386

if j >= blo:

2387

if j >= bhi:

2388

break

2389

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2390

if k > bestsize:

2391

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2392

j2len = newj2len

2393

2394

# Extend the best by non-junk elements on each end. In particular,

2395

# "popular" non-junk elements aren't in b2j, which greatly speeds

2396

# the inner loop above, but also means "the best" match so far

2397

# doesn't contain any junk *or* popular non-junk elements.

2398

while besti > alo and bestj > blo and \

2399

not isbjunk(b[bestj-1]) and \

2400

a[besti-1] == b[bestj-1]:

2401

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2402

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2403

not isbjunk(b[bestj+bestsize]) and \

2404

a[besti+bestsize] == b[bestj+bestsize]:

2405

bestsize += 1

2406

2407

# Now that we have a wholly interesting match (albeit possibly

2408

# empty!), we may as well suck up the matching junk on each

2409

# side of it too. Can't think of a good reason not to, and it

2410

# saves post-processing the (possibly considerable) expense of

2411

# figuring out what to do with it. In the case of an empty

2412

# interesting match, this is clearly the right thing to do,

2413

# because no other kind of match is possible in the regions.

2414

while besti > alo and bestj > blo and \

2415

isbjunk(b[bestj-1]) and \

2416

a[besti-1] == b[bestj-1]:

2417

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2418

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2419

isbjunk(b[bestj+bestsize]) and \

2420

a[besti+bestsize] == b[bestj+bestsize]:

2421

bestsize = bestsize + 1

2422

2423

return besti, bestj, bestsize

2424

2425

2426

try:

2427

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2428

except ImportError:

2429

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »