/brz/remove-bazaar : revision 2743.3.6

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Ian Clatworthy
Date: 2007-08-30 02:02:21 UTC
mfrom: (2768 +trunk)
mto: (2772.2.1 ianc-integration)
mto: This revision was merged to the branch mainline in revision 2774.
Revision ID: ian.clatworthy@internode.on.net-20070830020221-92iai8t8tx4frqwk

Merge bzr.dev

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

100

)

101

from bzrlib.tuned_gzip import GzipFile

102

from bzrlib.osutils import (

103

contains_whitespace,

104

contains_linebreaks,

105

sha_strings,

106

)

107

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

108

from bzrlib.tsort import topo_sort

109

import bzrlib.ui

110

import bzrlib.weave

111

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

112

113

114

# TODO: Split out code specific to this format into an associated object.

115

116

# TODO: Can we put in some kind of value to check that the index and data

117

# files belong together?

118

119

# TODO: accommodate binaries, perhaps by storing a byte count

120

121

# TODO: function to check whole file

122

123

# TODO: atomically append data, then measure backwards from the cursor

124

# position after writing to work out where it was located. we may need to

125

# bypass python file buffering.

126

127

DATA_SUFFIX = '.knit'

128

INDEX_SUFFIX = '.kndx'

129

130

131

class KnitContent(object):

132

"""Content of a knit version to which deltas can be applied."""

133

134

def __init__(self, lines):

135

self._lines = lines

136

137

def annotate_iter(self):

138

"""Yield tuples of (origin, text) for each content line."""

139

return iter(self._lines)

140

141

def annotate(self):

142

"""Return a list of (origin, text) tuples."""

143

return list(self.annotate_iter())

144

145

def line_delta_iter(self, new_lines):

146

"""Generate line-based delta from this content to new_lines."""

147

new_texts = new_lines.text()

148

old_texts = self.text()

149

s = KnitSequenceMatcher(None, old_texts, new_texts)

150

for tag, i1, i2, j1, j2 in s.get_opcodes():

151

if tag == 'equal':

152

continue

153

# ofrom, oto, length, data

154

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

155

156

def line_delta(self, new_lines):

157

return list(self.line_delta_iter(new_lines))

158

159

def text(self):

160

return [text for origin, text in self._lines]

161

162

def copy(self):

163

return KnitContent(self._lines[:])

164

165

@staticmethod

166

def get_line_delta_blocks(knit_delta, source, target):

167

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

168

target_len = len(target)

169

s_pos = 0

170

t_pos = 0

171

for s_begin, s_end, t_len, new_text in knit_delta:

172

true_n = s_begin - s_pos

173

n = true_n

174

if n > 0:

175

# knit deltas do not provide reliable info about whether the

176

# last line of a file matches, due to eol handling.

177

if source[s_pos + n -1] != target[t_pos + n -1]:

178

n-=1

179

if n > 0:

180

yield s_pos, t_pos, n

181

t_pos += t_len + true_n

182

s_pos = s_end

183

n = target_len - t_pos

184

if n > 0:

185

if source[s_pos + n -1] != target[t_pos + n -1]:

186

n-=1

187

if n > 0:

188

yield s_pos, t_pos, n

189

yield s_pos + (target_len - t_pos), target_len, 0

190

191

192

class _KnitFactory(object):

193

"""Base factory for creating content objects."""

194

195

def make(self, lines, version_id):

196

num_lines = len(lines)

197

return KnitContent(zip([version_id] * num_lines, lines))

198

199

200

class KnitAnnotateFactory(_KnitFactory):

201

"""Factory for creating annotated Content objects."""

202

203

annotated = True

204

205

def parse_fulltext(self, content, version_id):

206

"""Convert fulltext to internal representation

207

208

fulltext content is of the format

209

revid(utf8) plaintext\n

210

internal representation is of the format:

211

(revid, plaintext)

212

"""

213

# TODO: jam 20070209 The tests expect this to be returned as tuples,

214

# but the code itself doesn't really depend on that.

215

# Figure out a way to not require the overhead of turning the

216

# list back into tuples.

217

lines = [tuple(line.split(' ', 1)) for line in content]

218

return KnitContent(lines)

219

220

def parse_line_delta_iter(self, lines):

221

return iter(self.parse_line_delta(lines))

222

223

def parse_line_delta(self, lines, version_id):

224

"""Convert a line based delta into internal representation.

225

226

line delta is in the form of:

227

intstart intend intcount

228

1..count lines:

229

revid(utf8) newline\n

230

internal representation is

231

(start, end, count, [1..count tuples (revid, newline)])

232

"""

233

result = []

234

lines = iter(lines)

235

next = lines.next

236

237

cache = {}

238

def cache_and_return(line):

239

origin, text = line.split(' ', 1)

240

return cache.setdefault(origin, origin), text

241

242

# walk through the lines parsing.

243

for header in lines:

244

start, end, count = [int(n) for n in header.split(',')]

245

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

246

result.append((start, end, count, contents))

247

return result

248

249

def get_fulltext_content(self, lines):

250

"""Extract just the content lines from a fulltext."""

251

return (line.split(' ', 1)[1] for line in lines)

252

253

def get_linedelta_content(self, lines):

254

"""Extract just the content from a line delta.

255

256

This doesn't return all of the extra information stored in a delta.

257

Only the actual content lines.

258

"""

259

lines = iter(lines)

260

next = lines.next

261

for header in lines:

262

header = header.split(',')

263

count = int(header[2])

264

for i in xrange(count):

265

origin, text = next().split(' ', 1)

266

yield text

267

268

def lower_fulltext(self, content):

269

"""convert a fulltext content record into a serializable form.

270

271

see parse_fulltext which this inverts.

272

"""

273

# TODO: jam 20070209 We only do the caching thing to make sure that

274

# the origin is a valid utf-8 line, eventually we could remove it

275

return ['%s %s' % (o, t) for o, t in content._lines]

276

277

def lower_line_delta(self, delta):

278

"""convert a delta into a serializable form.

279

280

See parse_line_delta which this inverts.

281

"""

282

# TODO: jam 20070209 We only do the caching thing to make sure that

283

# the origin is a valid utf-8 line, eventually we could remove it

284

out = []

285

for start, end, c, lines in delta:

286

out.append('%d,%d,%d\n' % (start, end, c))

287

out.extend(origin + ' ' + text

288

for origin, text in lines)

289

return out

290

291

292

class KnitPlainFactory(_KnitFactory):

293

"""Factory for creating plain Content objects."""

294

295

annotated = False

296

297

def parse_fulltext(self, content, version_id):

298

"""This parses an unannotated fulltext.

299

300

Note that this is not a noop - the internal representation

301

has (versionid, line) - its just a constant versionid.

302

"""

303

return self.make(content, version_id)

304

305

def parse_line_delta_iter(self, lines, version_id):

306

cur = 0

307

num_lines = len(lines)

308

while cur < num_lines:

309

header = lines[cur]

310

cur += 1

311

start, end, c = [int(n) for n in header.split(',')]

312

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

313

cur += c

314

315

def parse_line_delta(self, lines, version_id):

316

return list(self.parse_line_delta_iter(lines, version_id))

317

318

def get_fulltext_content(self, lines):

319

"""Extract just the content lines from a fulltext."""

320

return iter(lines)

321

322

def get_linedelta_content(self, lines):

323

"""Extract just the content from a line delta.

324

325

This doesn't return all of the extra information stored in a delta.

326

Only the actual content lines.

327

"""

328

lines = iter(lines)

329

next = lines.next

330

for header in lines:

331

header = header.split(',')

332

count = int(header[2])

333

for i in xrange(count):

334

yield next()

335

336

def lower_fulltext(self, content):

337

return content.text()

338

339

def lower_line_delta(self, delta):

340

out = []

341

for start, end, c, lines in delta:

342

out.append('%d,%d,%d\n' % (start, end, c))

343

out.extend([text for origin, text in lines])

344

return out

345

346

347

def make_empty_knit(transport, relpath):

348

"""Construct a empty knit at the specified location."""

349

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

350

351

352

class KnitVersionedFile(VersionedFile):

353

"""Weave-like structure with faster random access.

354

355

A knit stores a number of texts and a summary of the relationships

356

between them. Texts are identified by a string version-id. Texts

357

are normally stored and retrieved as a series of lines, but can

358

also be passed as single strings.

359

360

Lines are stored with the trailing newline (if any) included, to

361

avoid special cases for files with no final newline. Lines are

362

composed of 8-bit characters, not unicode. The combination of

363

these approaches should mean any 'binary' file can be safely

364

stored and retrieved.

365

"""

366

367

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

368

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

369

create=False, create_parent_dir=False, delay_create=False,

370

dir_mode=None, index=None, access_method=None):

371

"""Construct a knit at location specified by relpath.

372

373

:param create: If not True, only open an existing knit.

374

:param create_parent_dir: If True, create the parent directory if

375

creating the file fails. (This is used for stores with

376

hash-prefixes that may not exist yet)

377

:param delay_create: The calling code is aware that the knit won't

378

actually be created until the first data is stored.

379

:param index: An index to use for the knit.

380

"""

381

if deprecated_passed(basis_knit):

382

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

383

" deprecated as of bzr 0.9.",

384

DeprecationWarning, stacklevel=2)

385

if access_mode is None:

386

access_mode = 'w'

387

super(KnitVersionedFile, self).__init__(access_mode)

388

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

389

self.transport = transport

390

self.filename = relpath

391

self.factory = factory or KnitAnnotateFactory()

392

self.writable = (access_mode == 'w')

393

self.delta = delta

394

395

self._max_delta_chain = 200

396

397

if index is None:

398

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

399

access_mode, create=create, file_mode=file_mode,

400

create_parent_dir=create_parent_dir, delay_create=delay_create,

401

dir_mode=dir_mode)

402

else:

403

self._index = index

404

if access_method is None:

405

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

406

((create and not len(self)) and delay_create), create_parent_dir)

407

else:

408

_access = access_method

409

if create and not len(self) and not delay_create:

410

_access.create()

411

self._data = _KnitData(_access)

412

413

def __repr__(self):

414

return '%s(%s)' % (self.__class__.__name__,

415

self.transport.abspath(self.filename))

416

417

def _check_should_delta(self, first_parents):

418

"""Iterate back through the parent listing, looking for a fulltext.

419

420

This is used when we want to decide whether to add a delta or a new

421

fulltext. It searches for _max_delta_chain parents. When it finds a

422

fulltext parent, it sees if the total size of the deltas leading up to

423

it is large enough to indicate that we want a new full text anyway.

424

425

Return True if we should create a new delta, False if we should use a

426

full text.

427

"""

428

delta_size = 0

429

fulltext_size = None

430

delta_parents = first_parents

431

for count in xrange(self._max_delta_chain):

432

parent = delta_parents[0]

433

method = self._index.get_method(parent)

434

index, pos, size = self._index.get_position(parent)

435

if method == 'fulltext':

436

fulltext_size = size

437

break

438

delta_size += size

439

delta_parents = self._index.get_parents(parent)

440

else:

441

# We couldn't find a fulltext, so we must create a new one

442

return False

443

444

return fulltext_size > delta_size

445

446

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

447

"""See VersionedFile._add_delta()."""

448

self._check_add(version_id, []) # should we check the lines ?

449

self._check_versions_present(parents)

450

present_parents = []

451

ghosts = []

452

parent_texts = {}

453

for parent in parents:

454

if not self.has_version(parent):

455

ghosts.append(parent)

456

else:

457

present_parents.append(parent)

458

459

if delta_parent is None:

460

# reconstitute as full text.

461

assert len(delta) == 1 or len(delta) == 0

462

if len(delta):

463

assert delta[0][0] == 0

464

assert delta[0][1] == 0, delta[0][1]

465

return super(KnitVersionedFile, self)._add_delta(version_id,

466

parents,

467

delta_parent,

468

sha1,

469

noeol,

470

delta)

471

472

digest = sha1

473

474

options = []

475

if noeol:

476

options.append('no-eol')

477

478

if delta_parent is not None:

479

# determine the current delta chain length.

480

# To speed the extract of texts the delta chain is limited

481

# to a fixed number of deltas. This should minimize both

482

# I/O and the time spend applying deltas.

483

# The window was changed to a maximum of 200 deltas, but also added

484

# was a check that the total compressed size of the deltas is

485

# smaller than the compressed size of the fulltext.

486

if not self._check_should_delta([delta_parent]):

487

# We don't want a delta here, just do a normal insertion.

488

return super(KnitVersionedFile, self)._add_delta(version_id,

489

parents,

490

delta_parent,

491

sha1,

492

noeol,

493

delta)

494

495

options.append('line-delta')

496

store_lines = self.factory.lower_line_delta(delta)

497

498

access_memo = self._data.add_record(version_id, digest, store_lines)

499

self._index.add_version(version_id, options, access_memo, parents)

500

501

def _add_raw_records(self, records, data):

502

"""Add all the records 'records' with data pre-joined in 'data'.

503

504

:param records: A list of tuples(version_id, options, parents, size).

505

:param data: The data for the records. When it is written, the records

506

are adjusted to have pos pointing into data by the sum of

507

the preceding records sizes.

508

"""

509

# write all the data

510

raw_record_sizes = [record[3] for record in records]

511

positions = self._data.add_raw_records(raw_record_sizes, data)

512

offset = 0

513

index_entries = []

514

for (version_id, options, parents, size), access_memo in zip(

515

records, positions):

516

index_entries.append((version_id, options, access_memo, parents))

517

if self._data._do_cache:

518

self._data._cache[version_id] = data[offset:offset+size]

519

offset += size

520

self._index.add_versions(index_entries)

521

522

def enable_cache(self):

523

"""Start caching data for this knit"""

524

self._data.enable_cache()

525

526

def clear_cache(self):

527

"""Clear the data cache only."""

528

self._data.clear_cache()

529

530

def copy_to(self, name, transport):

531

"""See VersionedFile.copy_to()."""

532

# copy the current index to a temp index to avoid racing with local

533

# writes

534

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

535

self.transport.get(self._index._filename))

536

# copy the data file

537

f = self._data._open_file()

538

try:

539

transport.put_file(name + DATA_SUFFIX, f)

540

finally:

541

f.close()

542

# move the copied index into place

543

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

544

545

def create_empty(self, name, transport, mode=None):

546

return KnitVersionedFile(name, transport, factory=self.factory,

547

delta=self.delta, create=True)

548

549

def _fix_parents(self, version_id, new_parents):

550

"""Fix the parents list for version.

551

552

This is done by appending a new version to the index

553

with identical data except for the parents list.

554

the parents list must be a superset of the current

555

list.

556

"""

557

current_values = self._index._cache[version_id]

558

assert set(current_values[4]).difference(set(new_parents)) == set()

559

self._index.add_version(version_id,

560

current_values[1],

561

(None, current_values[2], current_values[3]),

562

new_parents)

563

564

def _extract_blocks(self, version_id, source, target):

565

if self._index.get_method(version_id) != 'line-delta':

566

return None

567

parent, sha1, noeol, delta = self.get_delta(version_id)

568

return KnitContent.get_line_delta_blocks(delta, source, target)

569

570

def get_delta(self, version_id):

571

"""Get a delta for constructing version from some other version."""

572

version_id = osutils.safe_revision_id(version_id)

573

self.check_not_reserved_id(version_id)

574

if not self.has_version(version_id):

575

raise RevisionNotPresent(version_id, self.filename)

576

577

parents = self.get_parents(version_id)

578

if len(parents):

579

parent = parents[0]

580

else:

581

parent = None

582

index_memo = self._index.get_position(version_id)

583

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

584

noeol = 'no-eol' in self._index.get_options(version_id)

585

if 'fulltext' == self._index.get_method(version_id):

586

new_content = self.factory.parse_fulltext(data, version_id)

587

if parent is not None:

588

reference_content = self._get_content(parent)

589

old_texts = reference_content.text()

590

else:

591

old_texts = []

592

new_texts = new_content.text()

593

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

594

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

595

else:

596

delta = self.factory.parse_line_delta(data, version_id)

597

return parent, sha1, noeol, delta

598

599

def get_graph_with_ghosts(self):

600

"""See VersionedFile.get_graph_with_ghosts()."""

601

graph_items = self._index.get_graph()

602

return dict(graph_items)

603

604

def get_sha1(self, version_id):

605

return self.get_sha1s([version_id])[0]

606

607

def get_sha1s(self, version_ids):

608

"""See VersionedFile.get_sha1()."""

609

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

610

record_map = self._get_record_map(version_ids)

611

# record entry 2 is the 'digest'.

612

return [record_map[v][2] for v in version_ids]

613

614

@staticmethod

615

def get_suffixes():

616

"""See VersionedFile.get_suffixes()."""

617

return [DATA_SUFFIX, INDEX_SUFFIX]

618

619

def has_ghost(self, version_id):

620

"""True if there is a ghost reference in the file to version_id."""

621

version_id = osutils.safe_revision_id(version_id)

622

# maybe we have it

623

if self.has_version(version_id):

624

return False

625

# optimisable if needed by memoising the _ghosts set.

626

items = self._index.get_graph()

627

for node, parents in items:

628

for parent in parents:

629

if parent not in self._index._cache:

630

if parent == version_id:

631

return True

632

return False

633

634

def versions(self):

635

"""See VersionedFile.versions."""

636

if 'evil' in debug.debug_flags:

637

trace.mutter_callsite(2, "versions scales with size of history")

638

return self._index.get_versions()

639

640

def has_version(self, version_id):

641

"""See VersionedFile.has_version."""

642

if 'evil' in debug.debug_flags:

643

trace.mutter_callsite(2, "has_version is a LBYL scenario")

644

version_id = osutils.safe_revision_id(version_id)

645

return self._index.has_version(version_id)

646

647

__contains__ = has_version

648

649

def _merge_annotations(self, content, parents, parent_texts={},

650

delta=None, annotated=None,

651

left_matching_blocks=None):

652

"""Merge annotations for content. This is done by comparing

653

the annotations based on changed to the text.

654

"""

655

if left_matching_blocks is not None:

656

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

657

else:

658

delta_seq = None

659

if annotated:

660

for parent_id in parents:

661

merge_content = self._get_content(parent_id, parent_texts)

662

if (parent_id == parents[0] and delta_seq is not None):

663

seq = delta_seq

664

else:

665

seq = patiencediff.PatienceSequenceMatcher(

666

None, merge_content.text(), content.text())

667

for i, j, n in seq.get_matching_blocks():

668

if n == 0:

669

continue

670

# this appears to copy (origin, text) pairs across to the

671

# new content for any line that matches the last-checked

672

# parent.

673

content._lines[j:j+n] = merge_content._lines[i:i+n]

674

if delta:

675

if delta_seq is None:

676

reference_content = self._get_content(parents[0], parent_texts)

677

new_texts = content.text()

678

old_texts = reference_content.text()

679

delta_seq = patiencediff.PatienceSequenceMatcher(

680

None, old_texts, new_texts)

681

return self._make_line_delta(delta_seq, content)

682

683

def _make_line_delta(self, delta_seq, new_content):

684

"""Generate a line delta from delta_seq and new_content."""

685

diff_hunks = []

686

for op in delta_seq.get_opcodes():

687

if op[0] == 'equal':

688

continue

689

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

690

return diff_hunks

691

692

def _get_components_positions(self, version_ids):

693

"""Produce a map of position data for the components of versions.

694

695

This data is intended to be used for retrieving the knit records.

696

697

A dict of version_id to (method, data_pos, data_size, next) is

698

returned.

699

method is the way referenced data should be applied.

700

data_pos is the position of the data in the knit.

701

data_size is the size of the data in the knit.

702

next is the build-parent of the version, or None for fulltexts.

703

"""

704

component_data = {}

705

for version_id in version_ids:

706

cursor = version_id

707

708

while cursor is not None and cursor not in component_data:

709

method = self._index.get_method(cursor)

710

if method == 'fulltext':

711

next = None

712

else:

713

next = self.get_parents(cursor)[0]

714

index_memo = self._index.get_position(cursor)

715

component_data[cursor] = (method, index_memo, next)

716

cursor = next

717

return component_data

718

719

def _get_content(self, version_id, parent_texts={}):

720

"""Returns a content object that makes up the specified

721

version."""

722

if not self.has_version(version_id):

723

raise RevisionNotPresent(version_id, self.filename)

724

725

cached_version = parent_texts.get(version_id, None)

726

if cached_version is not None:

727

return cached_version

728

729

text_map, contents_map = self._get_content_maps([version_id])

730

return contents_map[version_id]

731

732

def _check_versions_present(self, version_ids):

733

"""Check that all specified versions are present."""

734

self._index.check_versions_present(version_ids)

735

736

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

737

"""See VersionedFile.add_lines_with_ghosts()."""

738

self._check_add(version_id, lines)

739

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

740

741

def _add_lines(self, version_id, parents, lines, parent_texts,

742

left_matching_blocks=None):

743

"""See VersionedFile.add_lines."""

744

self._check_add(version_id, lines)

745

self._check_versions_present(parents)

746

return self._add(version_id, lines[:], parents, self.delta,

747

parent_texts, left_matching_blocks)

748

749

def _check_add(self, version_id, lines):

750

"""check that version_id and lines are safe to add."""

751

assert self.writable, "knit is not opened for write"

752

### FIXME escape. RBC 20060228

753

if contains_whitespace(version_id):

754

raise InvalidRevisionId(version_id, self.filename)

755

self.check_not_reserved_id(version_id)

756

if self.has_version(version_id):

757

raise RevisionAlreadyPresent(version_id, self.filename)

758

self._check_lines_not_unicode(lines)

759

self._check_lines_are_lines(lines)

760

761

def _add(self, version_id, lines, parents, delta, parent_texts,

762

left_matching_blocks=None):

763

"""Add a set of lines on top of version specified by parents.

764

765

If delta is true, compress the text as a line-delta against

766

the first parent.

767

768

Any versions not present will be converted into ghosts.

769

"""

770

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

771

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

772

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

773

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

774

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

775

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

776

# +1383 0 8.0370 8.0370 +<len>

777

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

778

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

779

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

780

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

781

782

present_parents = []

783

ghosts = []

784

if parent_texts is None:

785

parent_texts = {}

786

for parent in parents:

787

if not self.has_version(parent):

788

ghosts.append(parent)

789

else:

790

present_parents.append(parent)

791

792

if delta and not len(present_parents):

793

delta = False

794

795

digest = sha_strings(lines)

796

options = []

797

if lines:

798

if lines[-1][-1] != '\n':

799

options.append('no-eol')

800

lines[-1] = lines[-1] + '\n'

801

802

if len(present_parents) and delta:

803

# To speed the extract of texts the delta chain is limited

804

# to a fixed number of deltas. This should minimize both

805

# I/O and the time spend applying deltas.

806

delta = self._check_should_delta(present_parents)

807

808

assert isinstance(version_id, str)

809

lines = self.factory.make(lines, version_id)

810

if delta or (self.factory.annotated and len(present_parents) > 0):

811

# Merge annotations from parent texts if so is needed.

812

delta_hunks = self._merge_annotations(lines, present_parents,

813

parent_texts, delta, self.factory.annotated,

814

left_matching_blocks)

815

816

if delta:

817

options.append('line-delta')

818

store_lines = self.factory.lower_line_delta(delta_hunks)

819

else:

820

options.append('fulltext')

821

store_lines = self.factory.lower_fulltext(lines)

822

823

access_memo = self._data.add_record(version_id, digest, store_lines)

824

self._index.add_version(version_id, options, access_memo, parents)

825

return lines

826

827

def check(self, progress_bar=None):

828

"""See VersionedFile.check()."""

829

830

def _clone_text(self, new_version_id, old_version_id, parents):

831

"""See VersionedFile.clone_text()."""

832

# FIXME RBC 20060228 make fast by only inserting an index with null

833

# delta.

834

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

835

836

def get_lines(self, version_id):

837

"""See VersionedFile.get_lines()."""

838

return self.get_line_list([version_id])[0]

839

840

def _get_record_map(self, version_ids):

841

"""Produce a dictionary of knit records.

842

843

The keys are version_ids, the values are tuples of (method, content,

844

digest, next).

845

method is the way the content should be applied.

846

content is a KnitContent object.

847

digest is the SHA1 digest of this version id after all steps are done

848

next is the build-parent of the version, i.e. the leftmost ancestor.

849

If the method is fulltext, next will be None.

850

"""

851

position_map = self._get_components_positions(version_ids)

852

# c = component_id, m = method, i_m = index_memo, n = next

853

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

854

record_map = {}

855

for component_id, content, digest in \

856

self._data.read_records_iter(records):

857

method, index_memo, next = position_map[component_id]

858

record_map[component_id] = method, content, digest, next

859

860

return record_map

861

862

def get_text(self, version_id):

863

"""See VersionedFile.get_text"""

864

return self.get_texts([version_id])[0]

865

866

def get_texts(self, version_ids):

867

return [''.join(l) for l in self.get_line_list(version_ids)]

868

869

def get_line_list(self, version_ids):

870

"""Return the texts of listed versions as a list of strings."""

871

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

872

for version_id in version_ids:

873

self.check_not_reserved_id(version_id)

874

text_map, content_map = self._get_content_maps(version_ids)

875

return [text_map[v] for v in version_ids]

876

877

_get_lf_split_line_list = get_line_list

878

879

def _get_content_maps(self, version_ids):

880

"""Produce maps of text and KnitContents

881

882

:return: (text_map, content_map) where text_map contains the texts for

883

the requested versions and content_map contains the KnitContents.

884

Both dicts take version_ids as their keys.

885

"""

886

for version_id in version_ids:

887

if not self.has_version(version_id):

888

raise RevisionNotPresent(version_id, self.filename)

889

record_map = self._get_record_map(version_ids)

890

891

text_map = {}

892

content_map = {}

893

final_content = {}

894

for version_id in version_ids:

895

components = []

896

cursor = version_id

897

while cursor is not None:

898

method, data, digest, next = record_map[cursor]

899

components.append((cursor, method, data, digest))

900

if cursor in content_map:

901

break

902

cursor = next

903

904

content = None

905

for component_id, method, data, digest in reversed(components):

906

if component_id in content_map:

907

content = content_map[component_id]

908

else:

909

if method == 'fulltext':

910

assert content is None

911

content = self.factory.parse_fulltext(data, version_id)

912

elif method == 'line-delta':

913

delta = self.factory.parse_line_delta(data, version_id)

914

content = content.copy()

915

content._lines = self._apply_delta(content._lines,

916

delta)

917

content_map[component_id] = content

918

919

if 'no-eol' in self._index.get_options(version_id):

920

content = content.copy()

921

line = content._lines[-1][1].rstrip('\n')

922

content._lines[-1] = (content._lines[-1][0], line)

923

final_content[version_id] = content

924

925

# digest here is the digest from the last applied component.

926

text = content.text()

927

if sha_strings(text) != digest:

928

raise KnitCorrupt(self.filename,

929

'sha-1 does not match %s' % version_id)

930

931

text_map[version_id] = text

932

return text_map, final_content

933

934

def iter_lines_added_or_present_in_versions(self, version_ids=None,

935

pb=None):

936

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

937

if version_ids is None:

938

version_ids = self.versions()

939

else:

940

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

941

if pb is None:

942

pb = progress.DummyProgress()

943

# we don't care about inclusions, the caller cares.

944

# but we need to setup a list of records to visit.

945

# we need version_id, position, length

946

version_id_records = []

947

requested_versions = set(version_ids)

948

# filter for available versions

949

for version_id in requested_versions:

950

if not self.has_version(version_id):

951

raise RevisionNotPresent(version_id, self.filename)

952

# get a in-component-order queue:

953

for version_id in self.versions():

954

if version_id in requested_versions:

955

index_memo = self._index.get_position(version_id)

956

version_id_records.append((version_id, index_memo))

957

958

total = len(version_id_records)

959

for version_idx, (version_id, data, sha_value) in \

960

enumerate(self._data.read_records_iter(version_id_records)):

961

pb.update('Walking content.', version_idx, total)

962

method = self._index.get_method(version_id)

963

964

assert method in ('fulltext', 'line-delta')

965

if method == 'fulltext':

966

line_iterator = self.factory.get_fulltext_content(data)

967

else:

968

line_iterator = self.factory.get_linedelta_content(data)

969

for line in line_iterator:

970

yield line

971

972

pb.update('Walking content.', total, total)

973

974

def iter_parents(self, version_ids):

975

"""Iterate through the parents for many version ids.

976

977

:param version_ids: An iterable yielding version_ids.

978

:return: An iterator that yields (version_id, parents). Requested

979

version_ids not present in the versioned file are simply skipped.

980

The order is undefined, allowing for different optimisations in

981

the underlying implementation.

982

"""

983

version_ids = [osutils.safe_revision_id(version_id) for

984

version_id in version_ids]

985

return self._index.iter_parents(version_ids)

986

987

def num_versions(self):

988

"""See VersionedFile.num_versions()."""

989

return self._index.num_versions()

990

991

__len__ = num_versions

992

993

def annotate_iter(self, version_id):

994

"""See VersionedFile.annotate_iter."""

995

version_id = osutils.safe_revision_id(version_id)

996

content = self._get_content(version_id)

997

for origin, text in content.annotate_iter():

998

yield origin, text

999

1000

def get_parents(self, version_id):

1001

"""See VersionedFile.get_parents."""

1002

# perf notes:

1003

# optimism counts!

1004

# 52554 calls in 1264 872 internal down from 3674

1005

version_id = osutils.safe_revision_id(version_id)

1006

try:

1007

return self._index.get_parents(version_id)

1008

except KeyError:

1009

raise RevisionNotPresent(version_id, self.filename)

1010

1011

def get_parents_with_ghosts(self, version_id):

1012

"""See VersionedFile.get_parents."""

1013

version_id = osutils.safe_revision_id(version_id)

1014

try:

1015

return self._index.get_parents_with_ghosts(version_id)

1016

except KeyError:

1017

raise RevisionNotPresent(version_id, self.filename)

1018

1019

def get_ancestry(self, versions, topo_sorted=True):

1020

"""See VersionedFile.get_ancestry."""

1021

if isinstance(versions, basestring):

1022

versions = [versions]

1023

if not versions:

1024

return []

1025

versions = [osutils.safe_revision_id(v) for v in versions]

1026

return self._index.get_ancestry(versions, topo_sorted)

1027

1028

def get_ancestry_with_ghosts(self, versions):

1029

"""See VersionedFile.get_ancestry_with_ghosts."""

1030

if isinstance(versions, basestring):

1031

versions = [versions]

1032

if not versions:

1033

return []

1034

versions = [osutils.safe_revision_id(v) for v in versions]

1035

return self._index.get_ancestry_with_ghosts(versions)

1036

1037

def plan_merge(self, ver_a, ver_b):

1038

"""See VersionedFile.plan_merge."""

1039

ver_a = osutils.safe_revision_id(ver_a)

1040

ver_b = osutils.safe_revision_id(ver_b)

1041

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1042

1043

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1044

annotated_a = self.annotate(ver_a)

1045

annotated_b = self.annotate(ver_b)

1046

return merge._plan_annotate_merge(annotated_a, annotated_b,

1047

ancestors_a, ancestors_b)

1048

1049

1050

class _KnitComponentFile(object):

1051

"""One of the files used to implement a knit database"""

1052

1053

def __init__(self, transport, filename, mode, file_mode=None,

1054

create_parent_dir=False, dir_mode=None):

1055

self._transport = transport

1056

self._filename = filename

1057

self._mode = mode

1058

self._file_mode = file_mode

1059

self._dir_mode = dir_mode

1060

self._create_parent_dir = create_parent_dir

1061

self._need_to_create = False

1062

1063

def _full_path(self):

1064

"""Return the full path to this file."""

1065

return self._transport.base + self._filename

1066

1067

def check_header(self, fp):

1068

line = fp.readline()

1069

if line == '':

1070

# An empty file can actually be treated as though the file doesn't

1071

# exist yet.

1072

raise errors.NoSuchFile(self._full_path())

1073

if line != self.HEADER:

1074

raise KnitHeaderError(badline=line,

1075

filename=self._transport.abspath(self._filename))

1076

1077

def __repr__(self):

1078

return '%s(%s)' % (self.__class__.__name__, self._filename)

1079

1080

1081

class _KnitIndex(_KnitComponentFile):

1082

"""Manages knit index file.

1083

1084

The index is already kept in memory and read on startup, to enable

1085

fast lookups of revision information. The cursor of the index

1086

file is always pointing to the end, making it easy to append

1087

entries.

1088

1089

_cache is a cache for fast mapping from version id to a Index

1090

object.

1091

1092

_history is a cache for fast mapping from indexes to version ids.

1093

1094

The index data format is dictionary compressed when it comes to

1095

parent references; a index entry may only have parents that with a

1096

lover index number. As a result, the index is topological sorted.

1097

1098

Duplicate entries may be written to the index for a single version id

1099

if this is done then the latter one completely replaces the former:

1100

this allows updates to correct version and parent information.

1101

Note that the two entries may share the delta, and that successive

1102

annotations and references MUST point to the first entry.

1103

1104

The index file on disc contains a header, followed by one line per knit

1105

record. The same revision can be present in an index file more than once.

1106

The first occurrence gets assigned a sequence number starting from 0.

1107

1108

The format of a single line is

1109

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1110

REVISION_ID is a utf8-encoded revision id

1111

FLAGS is a comma separated list of flags about the record. Values include

1112

no-eol, line-delta, fulltext.

1113

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1114

that the the compressed data starts at.

1115

LENGTH is the ascii representation of the length of the data file.

1116

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1117

REVISION_ID.

1118

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1119

revision id already in the knit that is a parent of REVISION_ID.

1120

The ' :' marker is the end of record marker.

1121

1122

partial writes:

1123

when a write is interrupted to the index file, it will result in a line

1124

that does not end in ' :'. If the ' :' is not present at the end of a line,

1125

or at the end of the file, then the record that is missing it will be

1126

ignored by the parser.

1127

1128

When writing new records to the index file, the data is preceded by '\n'

1129

to ensure that records always start on new lines even if the last write was

1130

interrupted. As a result its normal for the last line in the index to be

1131

missing a trailing newline. One can be added with no harmful effects.

1132

"""

1133

1134

HEADER = "# bzr knit index 8\n"

1135

1136

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1137

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1138

1139

def _cache_version(self, version_id, options, pos, size, parents):

1140

"""Cache a version record in the history array and index cache.

1141

1142

This is inlined into _load_data for performance. KEEP IN SYNC.

1143

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1144

indexes).

1145

"""

1146

# only want the _history index to reference the 1st index entry

1147

# for version_id

1148

if version_id not in self._cache:

1149

index = len(self._history)

1150

self._history.append(version_id)

1151

else:

1152

index = self._cache[version_id][5]

1153

self._cache[version_id] = (version_id,

1154

options,

1155

pos,

1156

size,

1157

parents,

1158

index)

1159

1160

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1161

create_parent_dir=False, delay_create=False, dir_mode=None):

1162

_KnitComponentFile.__init__(self, transport, filename, mode,

1163

file_mode=file_mode,

1164

create_parent_dir=create_parent_dir,

1165

dir_mode=dir_mode)

1166

self._cache = {}

1167

# position in _history is the 'official' index for a revision

1168

# but the values may have come from a newer entry.

1169

# so - wc -l of a knit index is != the number of unique names

1170

# in the knit.

1171

self._history = []

1172

try:

1173

fp = self._transport.get(self._filename)

1174

try:

1175

# _load_data may raise NoSuchFile if the target knit is

1176

# completely empty.

1177

_load_data(self, fp)

1178

finally:

1179

fp.close()

1180

except NoSuchFile:

1181

if mode != 'w' or not create:

1182

raise

1183

elif delay_create:

1184

self._need_to_create = True

1185

else:

1186

self._transport.put_bytes_non_atomic(

1187

self._filename, self.HEADER, mode=self._file_mode)

1188

1189

def get_graph(self):

1190

"""Return a list of the node:parents lists from this knit index."""

1191

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1192

1193

def get_ancestry(self, versions, topo_sorted=True):

1194

"""See VersionedFile.get_ancestry."""

1195

# get a graph of all the mentioned versions:

1196

graph = {}

1197

pending = set(versions)

1198

cache = self._cache

1199

while pending:

1200

version = pending.pop()

1201

# trim ghosts

1202

try:

1203

parents = [p for p in cache[version][4] if p in cache]

1204

except KeyError:

1205

raise RevisionNotPresent(version, self._filename)

1206

# if not completed and not a ghost

1207

pending.update([p for p in parents if p not in graph])

1208

graph[version] = parents

1209

if not topo_sorted:

1210

return graph.keys()

1211

return topo_sort(graph.items())

1212

1213

def get_ancestry_with_ghosts(self, versions):

1214

"""See VersionedFile.get_ancestry_with_ghosts."""

1215

# get a graph of all the mentioned versions:

1216

self.check_versions_present(versions)

1217

cache = self._cache

1218

graph = {}

1219

pending = set(versions)

1220

while pending:

1221

version = pending.pop()

1222

try:

1223

parents = cache[version][4]

1224

except KeyError:

1225

# ghost, fake it

1226

graph[version] = []

1227

else:

1228

# if not completed

1229

pending.update([p for p in parents if p not in graph])

1230

graph[version] = parents

1231

return topo_sort(graph.items())

1232

1233

def iter_parents(self, version_ids):

1234

"""Iterate through the parents for many version ids.

1235

1236

:param version_ids: An iterable yielding version_ids.

1237

:return: An iterator that yields (version_id, parents). Requested

1238

version_ids not present in the versioned file are simply skipped.

1239

The order is undefined, allowing for different optimisations in

1240

the underlying implementation.

1241

"""

1242

for version_id in version_ids:

1243

try:

1244

yield version_id, tuple(self.get_parents(version_id))

1245

except KeyError:

1246

pass

1247

1248

def num_versions(self):

1249

return len(self._history)

1250

1251

__len__ = num_versions

1252

1253

def get_versions(self):

1254

"""Get all the versions in the file. not topologically sorted."""

1255

return self._history

1256

1257

def _version_list_to_index(self, versions):

1258

result_list = []

1259

cache = self._cache

1260

for version in versions:

1261

if version in cache:

1262

# -- inlined lookup() --

1263

result_list.append(str(cache[version][5]))

1264

# -- end lookup () --

1265

else:

1266

result_list.append('.' + version)

1267

return ' '.join(result_list)

1268

1269

def add_version(self, version_id, options, index_memo, parents):

1270

"""Add a version record to the index."""

1271

self.add_versions(((version_id, options, index_memo, parents),))

1272

1273

def add_versions(self, versions):

1274

"""Add multiple versions to the index.

1275

1276

:param versions: a list of tuples:

1277

(version_id, options, pos, size, parents).

1278

"""

1279

lines = []

1280

orig_history = self._history[:]

1281

orig_cache = self._cache.copy()

1282

1283

try:

1284

for version_id, options, (index, pos, size), parents in versions:

1285

line = "\n%s %s %s %s %s :" % (version_id,

1286

','.join(options),

1287

pos,

1288

size,

1289

self._version_list_to_index(parents))

1290

assert isinstance(line, str), \

1291

'content must be utf-8 encoded: %r' % (line,)

1292

lines.append(line)

1293

self._cache_version(version_id, options, pos, size, parents)

1294

if not self._need_to_create:

1295

self._transport.append_bytes(self._filename, ''.join(lines))

1296

else:

1297

sio = StringIO()

1298

sio.write(self.HEADER)

1299

sio.writelines(lines)

1300

sio.seek(0)

1301

self._transport.put_file_non_atomic(self._filename, sio,

1302

create_parent_dir=self._create_parent_dir,

1303

mode=self._file_mode,

1304

dir_mode=self._dir_mode)

1305

self._need_to_create = False

1306

except:

1307

# If any problems happen, restore the original values and re-raise

1308

self._history = orig_history

1309

self._cache = orig_cache

1310

raise

1311

1312

def has_version(self, version_id):

1313

"""True if the version is in the index."""

1314

return version_id in self._cache

1315

1316

def get_position(self, version_id):

1317

"""Return details needed to access the version.

1318

1319

.kndx indices do not support split-out data, so return None for the

1320

index field.

1321

1322

:return: a tuple (None, data position, size) to hand to the access

1323

logic to get the record.

1324

"""

1325

entry = self._cache[version_id]

1326

return None, entry[2], entry[3]

1327

1328

def get_method(self, version_id):

1329

"""Return compression method of specified version."""

1330

options = self._cache[version_id][1]

1331

if 'fulltext' in options:

1332

return 'fulltext'

1333

else:

1334

if 'line-delta' not in options:

1335

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1336

return 'line-delta'

1337

1338

def get_options(self, version_id):

1339

"""Return a string represention options.

1340

1341

e.g. foo,bar

1342

"""

1343

return self._cache[version_id][1]

1344

1345

def get_parents(self, version_id):

1346

"""Return parents of specified version ignoring ghosts."""

1347

return [parent for parent in self._cache[version_id][4]

1348

if parent in self._cache]

1349

1350

def get_parents_with_ghosts(self, version_id):

1351

"""Return parents of specified version with ghosts."""

1352

return self._cache[version_id][4]

1353

1354

def check_versions_present(self, version_ids):

1355

"""Check that all specified versions are present."""

1356

cache = self._cache

1357

for version_id in version_ids:

1358

if version_id not in cache:

1359

raise RevisionNotPresent(version_id, self._filename)

1360

1361

1362

class KnitGraphIndex(object):

1363

"""A knit index that builds on GraphIndex."""

1364

1365

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1366

"""Construct a KnitGraphIndex on a graph_index.

1367

1368

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1369

:param deltas: Allow delta-compressed records.

1370

:param add_callback: If not None, allow additions to the index and call

1371

this callback with a list of added GraphIndex nodes:

1372

[(node, value, node_refs), ...]

1373

:param parents: If True, record knits parents, if not do not record

1374

parents.

1375

"""

1376

self._graph_index = graph_index

1377

self._deltas = deltas

1378

self._add_callback = add_callback

1379

self._parents = parents

1380

if deltas and not parents:

1381

raise KnitCorrupt(self, "Cannot do delta compression without "

1382

"parent tracking.")

1383

1384

def _get_entries(self, keys, check_present=False):

1385

"""Get the entries for keys.

1386

1387

:param keys: An iterable of index keys, - 1-tuples.

1388

"""

1389

keys = set(keys)

1390

found_keys = set()

1391

if self._parents:

1392

for node in self._graph_index.iter_entries(keys):

1393

yield node

1394

found_keys.add(node[1])

1395

else:

1396

# adapt parentless index to the rest of the code.

1397

for node in self._graph_index.iter_entries(keys):

1398

yield node[0], node[1], node[2], ()

1399

found_keys.add(node[1])

1400

if check_present:

1401

missing_keys = keys.difference(found_keys)

1402

if missing_keys:

1403

raise RevisionNotPresent(missing_keys.pop(), self)

1404

1405

def _present_keys(self, version_ids):

1406

return set([

1407

node[1] for node in self._get_entries(version_ids)])

1408

1409

def _parentless_ancestry(self, versions):

1410

"""Honour the get_ancestry API for parentless knit indices."""

1411

wanted_keys = self._version_ids_to_keys(versions)

1412

present_keys = self._present_keys(wanted_keys)

1413

missing = set(wanted_keys).difference(present_keys)

1414

if missing:

1415

raise RevisionNotPresent(missing.pop(), self)

1416

return list(self._keys_to_version_ids(present_keys))

1417

1418

def get_ancestry(self, versions, topo_sorted=True):

1419

"""See VersionedFile.get_ancestry."""

1420

if not self._parents:

1421

return self._parentless_ancestry(versions)

1422

# XXX: This will do len(history) index calls - perhaps

1423

# it should be altered to be a index core feature?

1424

# get a graph of all the mentioned versions:

1425

graph = {}

1426

ghosts = set()

1427

versions = self._version_ids_to_keys(versions)

1428

pending = set(versions)

1429

while pending:

1430

# get all pending nodes

1431

this_iteration = pending

1432

new_nodes = self._get_entries(this_iteration)

1433

found = set()

1434

pending = set()

1435

for (index, key, value, node_refs) in new_nodes:

1436

# dont ask for ghosties - otherwise

1437

# we we can end up looping with pending

1438

# being entirely ghosted.

1439

graph[key] = [parent for parent in node_refs[0]

1440

if parent not in ghosts]

1441

# queue parents

1442

for parent in graph[key]:

1443

# dont examine known nodes again

1444

if parent in graph:

1445

continue

1446

pending.add(parent)

1447

found.add(key)

1448

ghosts.update(this_iteration.difference(found))

1449

if versions.difference(graph):

1450

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1451

if topo_sorted:

1452

result_keys = topo_sort(graph.items())

1453

else:

1454

result_keys = graph.iterkeys()

1455

return [key[0] for key in result_keys]

1456

1457

def get_ancestry_with_ghosts(self, versions):

1458

"""See VersionedFile.get_ancestry."""

1459

if not self._parents:

1460

return self._parentless_ancestry(versions)

1461

# XXX: This will do len(history) index calls - perhaps

1462

# it should be altered to be a index core feature?

1463

# get a graph of all the mentioned versions:

1464

graph = {}

1465

versions = self._version_ids_to_keys(versions)

1466

pending = set(versions)

1467

while pending:

1468

# get all pending nodes

1469

this_iteration = pending

1470

new_nodes = self._get_entries(this_iteration)

1471

pending = set()

1472

for (index, key, value, node_refs) in new_nodes:

1473

graph[key] = node_refs[0]

1474

# queue parents

1475

for parent in graph[key]:

1476

# dont examine known nodes again

1477

if parent in graph:

1478

continue

1479

pending.add(parent)

1480

missing_versions = this_iteration.difference(graph)

1481

missing_needed = versions.intersection(missing_versions)

1482

if missing_needed:

1483

raise RevisionNotPresent(missing_needed.pop(), self)

1484

for missing_version in missing_versions:

1485

# add a key, no parents

1486

graph[missing_version] = []

1487

pending.discard(missing_version) # don't look for it

1488

result_keys = topo_sort(graph.items())

1489

return [key[0] for key in result_keys]

1490

1491

def get_graph(self):

1492

"""Return a list of the node:parents lists from this knit index."""

1493

if not self._parents:

1494

return [(key, ()) for key in self.get_versions()]

1495

result = []

1496

for index, key, value, refs in self._graph_index.iter_all_entries():

1497

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1498

return result

1499

1500

def iter_parents(self, version_ids):

1501

"""Iterate through the parents for many version ids.

1502

1503

:param version_ids: An iterable yielding version_ids.

1504

:return: An iterator that yields (version_id, parents). Requested

1505

version_ids not present in the versioned file are simply skipped.

1506

The order is undefined, allowing for different optimisations in

1507

the underlying implementation.

1508

"""

1509

if self._parents:

1510

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1511

all_parents = set()

1512

present_parents = set()

1513

for node in all_nodes:

1514

all_parents.update(node[3][0])

1515

# any node we are querying must be present

1516

present_parents.add(node[1])

1517

unknown_parents = all_parents.difference(present_parents)

1518

present_parents.update(self._present_keys(unknown_parents))

1519

for node in all_nodes:

1520

parents = []

1521

for parent in node[3][0]:

1522

if parent in present_parents:

1523

parents.append(parent[0])

1524

yield node[1][0], tuple(parents)

1525

else:

1526

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1527

yield node[1][0], ()

1528

1529

def num_versions(self):

1530

return len(list(self._graph_index.iter_all_entries()))

1531

1532

__len__ = num_versions

1533

1534

def get_versions(self):

1535

"""Get all the versions in the file. not topologically sorted."""

1536

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1537

1538

def has_version(self, version_id):

1539

"""True if the version is in the index."""

1540

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1541

1542

def _keys_to_version_ids(self, keys):

1543

return tuple(key[0] for key in keys)

1544

1545

def get_position(self, version_id):

1546

"""Return details needed to access the version.

1547

1548

:return: a tuple (index, data position, size) to hand to the access

1549

logic to get the record.

1550

"""

1551

node = self._get_node(version_id)

1552

bits = node[2][1:].split(' ')

1553

return node[0], int(bits[0]), int(bits[1])

1554

1555

def get_method(self, version_id):

1556

"""Return compression method of specified version."""

1557

if not self._deltas:

1558

return 'fulltext'

1559

return self._parent_compression(self._get_node(version_id)[3][1])

1560

1561

def _parent_compression(self, reference_list):

1562

# use the second reference list to decide if this is delta'd or not.

1563

if len(reference_list):

1564

return 'line-delta'

1565

else:

1566

return 'fulltext'

1567

1568

def _get_node(self, version_id):

1569

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1570

1571

def get_options(self, version_id):

1572

"""Return a string represention options.

1573

1574

e.g. foo,bar

1575

"""

1576

node = self._get_node(version_id)

1577

if not self._deltas:

1578

options = ['fulltext']

1579

else:

1580

options = [self._parent_compression(node[3][1])]

1581

if node[2][0] == 'N':

1582

options.append('no-eol')

1583

return options

1584

1585

def get_parents(self, version_id):

1586

"""Return parents of specified version ignoring ghosts."""

1587

parents = list(self.iter_parents([version_id]))

1588

if not parents:

1589

# missing key

1590

raise errors.RevisionNotPresent(version_id, self)

1591

return parents[0][1]

1592

1593

def get_parents_with_ghosts(self, version_id):

1594

"""Return parents of specified version with ghosts."""

1595

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1596

check_present=True))

1597

if not self._parents:

1598

return ()

1599

return self._keys_to_version_ids(nodes[0][3][0])

1600

1601

def check_versions_present(self, version_ids):

1602

"""Check that all specified versions are present."""

1603

keys = self._version_ids_to_keys(version_ids)

1604

present = self._present_keys(keys)

1605

missing = keys.difference(present)

1606

if missing:

1607

raise RevisionNotPresent(missing.pop(), self)

1608

1609

def add_version(self, version_id, options, access_memo, parents):

1610

"""Add a version record to the index."""

1611

return self.add_versions(((version_id, options, access_memo, parents),))

1612

1613

def add_versions(self, versions):

1614

"""Add multiple versions to the index.

1615

1616

This function does not insert data into the Immutable GraphIndex

1617

backing the KnitGraphIndex, instead it prepares data for insertion by

1618

the caller and checks that it is safe to insert then calls

1619

self._add_callback with the prepared GraphIndex nodes.

1620

1621

:param versions: a list of tuples:

1622

(version_id, options, pos, size, parents).

1623

"""

1624

if not self._add_callback:

1625

raise errors.ReadOnlyError(self)

1626

# we hope there are no repositories with inconsistent parentage

1627

# anymore.

1628

# check for dups

1629

1630

keys = {}

1631

for (version_id, options, access_memo, parents) in versions:

1632

index, pos, size = access_memo

1633

key = (version_id, )

1634

parents = tuple((parent, ) for parent in parents)

1635

if 'no-eol' in options:

1636

value = 'N'

1637

else:

1638

value = ' '

1639

value += "%d %d" % (pos, size)

1640

if not self._deltas:

1641

if 'line-delta' in options:

1642

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1643

if self._parents:

1644

if self._deltas:

1645

if 'line-delta' in options:

1646

node_refs = (parents, (parents[0],))

1647

else:

1648

node_refs = (parents, ())

1649

else:

1650

node_refs = (parents, )

1651

else:

1652

if parents:

1653

raise KnitCorrupt(self, "attempt to add node with parents "

1654

"in parentless index.")

1655

node_refs = ()

1656

keys[key] = (value, node_refs)

1657

present_nodes = self._get_entries(keys)

1658

for (index, key, value, node_refs) in present_nodes:

1659

if (value, node_refs) != keys[key]:

1660

raise KnitCorrupt(self, "inconsistent details in add_versions"

1661

": %s %s" % ((value, node_refs), keys[key]))

1662

del keys[key]

1663

result = []

1664

if self._parents:

1665

for key, (value, node_refs) in keys.iteritems():

1666

result.append((key, value, node_refs))

1667

else:

1668

for key, (value, node_refs) in keys.iteritems():

1669

result.append((key, value))

1670

self._add_callback(result)

1671

1672

def _version_ids_to_keys(self, version_ids):

1673

return set((version_id, ) for version_id in version_ids)

1674

1675

1676

class _KnitAccess(object):

1677

"""Access to knit records in a .knit file."""

1678

1679

def __init__(self, transport, filename, _file_mode, _dir_mode,

1680

_need_to_create, _create_parent_dir):

1681

"""Create a _KnitAccess for accessing and inserting data.

1682

1683

:param transport: The transport the .knit is located on.

1684

:param filename: The filename of the .knit.

1685

"""

1686

self._transport = transport

1687

self._filename = filename

1688

self._file_mode = _file_mode

1689

self._dir_mode = _dir_mode

1690

self._need_to_create = _need_to_create

1691

self._create_parent_dir = _create_parent_dir

1692

1693

def add_raw_records(self, sizes, raw_data):

1694

"""Add raw knit bytes to a storage area.

1695

1696

The data is spooled to whereever the access method is storing data.

1697

1698

:param sizes: An iterable containing the size of each raw data segment.

1699

:param raw_data: A bytestring containing the data.

1700

:return: A list of memos to retrieve the record later. Each memo is a

1701

tuple - (index, pos, length), where the index field is always None

1702

for the .knit access method.

1703

"""

1704

assert type(raw_data) == str, \

1705

'data must be plain bytes was %s' % type(raw_data)

1706

if not self._need_to_create:

1707

base = self._transport.append_bytes(self._filename, raw_data)

1708

else:

1709

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1710

create_parent_dir=self._create_parent_dir,

1711

mode=self._file_mode,

1712

dir_mode=self._dir_mode)

1713

self._need_to_create = False

1714

base = 0

1715

result = []

1716

for size in sizes:

1717

result.append((None, base, size))

1718

base += size

1719

return result

1720

1721

def create(self):

1722

"""IFF this data access has its own storage area, initialise it.

1723

1724

:return: None.

1725

"""

1726

self._transport.put_bytes_non_atomic(self._filename, '',

1727

mode=self._file_mode)

1728

1729

def open_file(self):

1730

"""IFF this data access can be represented as a single file, open it.

1731

1732

For knits that are not mapped to a single file on disk this will

1733

always return None.

1734

1735

:return: None or a file handle.

1736

"""

1737

try:

1738

return self._transport.get(self._filename)

1739

except NoSuchFile:

1740

pass

1741

return None

1742

1743

def get_raw_records(self, memos_for_retrieval):

1744

"""Get the raw bytes for a records.

1745

1746

:param memos_for_retrieval: An iterable containing the (index, pos,

1747

length) memo for retrieving the bytes. The .knit method ignores

1748

the index as there is always only a single file.

1749

:return: An iterator over the bytes of the records.

1750

"""

1751

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1752

for pos, data in self._transport.readv(self._filename, read_vector):

1753

yield data

1754

1755

1756

class _PackAccess(object):

1757

"""Access to knit records via a collection of packs."""

1758

1759

def __init__(self, index_to_packs, writer=None):

1760

"""Create a _PackAccess object.

1761

1762

:param index_to_packs: A dict mapping index objects to the transport

1763

and file names for obtaining data.

1764

:param writer: A tuple (pack.ContainerWriter, write_index) which

1765

contains the pack to write, and the index that reads from it will

1766

be associated with.

1767

"""

1768

if writer:

1769

self.container_writer = writer[0]

1770

self.write_index = writer[1]

1771

else:

1772

self.container_writer = None

1773

self.write_index = None

1774

self.indices = index_to_packs

1775

1776

def add_raw_records(self, sizes, raw_data):

1777

"""Add raw knit bytes to a storage area.

1778

1779

The data is spooled to the container writer in one bytes-record per

1780

raw data item.

1781

1782

:param sizes: An iterable containing the size of each raw data segment.

1783

:param raw_data: A bytestring containing the data.

1784

:return: A list of memos to retrieve the record later. Each memo is a

1785

tuple - (index, pos, length), where the index field is the

1786

write_index object supplied to the PackAccess object.

1787

"""

1788

assert type(raw_data) == str, \

1789

'data must be plain bytes was %s' % type(raw_data)

1790

result = []

1791

offset = 0

1792

for size in sizes:

1793

p_offset, p_length = self.container_writer.add_bytes_record(

1794

raw_data[offset:offset+size], [])

1795

offset += size

1796

result.append((self.write_index, p_offset, p_length))

1797

return result

1798

1799

def create(self):

1800

"""Pack based knits do not get individually created."""

1801

1802

def get_raw_records(self, memos_for_retrieval):

1803

"""Get the raw bytes for a records.

1804

1805

:param memos_for_retrieval: An iterable containing the (index, pos,

1806

length) memo for retrieving the bytes. The Pack access method

1807

looks up the pack to use for a given record in its index_to_pack

1808

map.

1809

:return: An iterator over the bytes of the records.

1810

"""

1811

# first pass, group into same-index requests

1812

request_lists = []

1813

current_index = None

1814

for (index, offset, length) in memos_for_retrieval:

1815

if current_index == index:

1816

current_list.append((offset, length))

1817

else:

1818

if current_index is not None:

1819

request_lists.append((current_index, current_list))

1820

current_index = index

1821

current_list = [(offset, length)]

1822

# handle the last entry

1823

if current_index is not None:

1824

request_lists.append((current_index, current_list))

1825

for index, offsets in request_lists:

1826

transport, path = self.indices[index]

1827

reader = pack.make_readv_reader(transport, path, offsets)

1828

for names, read_func in reader.iter_records():

1829

yield read_func(None)

1830

1831

def open_file(self):

1832

"""Pack based knits have no single file."""

1833

return None

1834

1835

def set_writer(self, writer, index, (transport, packname)):

1836

"""Set a writer to use for adding data."""

1837

self.indices[index] = (transport, packname)

1838

self.container_writer = writer

1839

self.write_index = index

1840

1841

1842

class _KnitData(object):

1843

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1844

1845

The KnitData class provides the logic for parsing and using knit records,

1846

making use of an access method for the low level read and write operations.

1847

"""

1848

1849

def __init__(self, access):

1850

"""Create a KnitData object.

1851

1852

:param access: The access method to use. Access methods such as

1853

_KnitAccess manage the insertion of raw records and the subsequent

1854

retrieval of the same.

1855

"""

1856

self._access = access

1857

self._checked = False

1858

# TODO: jam 20060713 conceptually, this could spill to disk

1859

# if the cached size gets larger than a certain amount

1860

# but it complicates the model a bit, so for now just use

1861

# a simple dictionary

1862

self._cache = {}

1863

self._do_cache = False

1864

1865

def enable_cache(self):

1866

"""Enable caching of reads."""

1867

self._do_cache = True

1868

1869

def clear_cache(self):

1870

"""Clear the record cache."""

1871

self._do_cache = False

1872

self._cache = {}

1873

1874

def _open_file(self):

1875

return self._access.open_file()

1876

1877

def _record_to_data(self, version_id, digest, lines):

1878

"""Convert version_id, digest, lines into a raw data block.

1879

1880

:return: (len, a StringIO instance with the raw data ready to read.)

1881

"""

1882

sio = StringIO()

1883

data_file = GzipFile(None, mode='wb', fileobj=sio)

1884

1885

assert isinstance(version_id, str)

1886

data_file.writelines(chain(

1887

["version %s %d %s\n" % (version_id,

1888

len(lines),

1889

digest)],

1890

lines,

1891

["end %s\n" % version_id]))

1892

data_file.close()

1893

length= sio.tell()

1894

1895

sio.seek(0)

1896

return length, sio

1897

1898

def add_raw_records(self, sizes, raw_data):

1899

"""Append a prepared record to the data file.

1900

1901

:param sizes: An iterable containing the size of each raw data segment.

1902

:param raw_data: A bytestring containing the data.

1903

:return: a list of index data for the way the data was stored.

1904

See the access method add_raw_records documentation for more

1905

details.

1906

"""

1907

return self._access.add_raw_records(sizes, raw_data)

1908

1909

def add_record(self, version_id, digest, lines):

1910

"""Write new text record to disk.

1911

1912

Returns index data for retrieving it later, as per add_raw_records.

1913

"""

1914

size, sio = self._record_to_data(version_id, digest, lines)

1915

result = self.add_raw_records([size], sio.getvalue())

1916

if self._do_cache:

1917

self._cache[version_id] = sio.getvalue()

1918

return result[0]

1919

1920

def _parse_record_header(self, version_id, raw_data):

1921

"""Parse a record header for consistency.

1922

1923

:return: the header and the decompressor stream.

1924

as (stream, header_record)

1925

"""

1926

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1927

try:

1928

rec = self._check_header(version_id, df.readline())

1929

except Exception, e:

1930

raise KnitCorrupt(self._access,

1931

"While reading {%s} got %s(%s)"

1932

% (version_id, e.__class__.__name__, str(e)))

1933

return df, rec

1934

1935

def _check_header(self, version_id, line):

1936

rec = line.split()

1937

if len(rec) != 4:

1938

raise KnitCorrupt(self._access,

1939

'unexpected number of elements in record header')

1940

if rec[1] != version_id:

1941

raise KnitCorrupt(self._access,

1942

'unexpected version, wanted %r, got %r'

1943

% (version_id, rec[1]))

1944

return rec

1945

1946

def _parse_record(self, version_id, data):

1947

# profiling notes:

1948

# 4168 calls in 2880 217 internal

1949

# 4168 calls to _parse_record_header in 2121

1950

# 4168 calls to readlines in 330

1951

df = GzipFile(mode='rb', fileobj=StringIO(data))

1952

1953

try:

1954

record_contents = df.readlines()

1955

except Exception, e:

1956

raise KnitCorrupt(self._access,

1957

"While reading {%s} got %s(%s)"

1958

% (version_id, e.__class__.__name__, str(e)))

1959

header = record_contents.pop(0)

1960

rec = self._check_header(version_id, header)

1961

1962

last_line = record_contents.pop()

1963

if len(record_contents) != int(rec[2]):

1964

raise KnitCorrupt(self._access,

1965

'incorrect number of lines %s != %s'

1966

' for version {%s}'

1967

% (len(record_contents), int(rec[2]),

1968

version_id))

1969

if last_line != 'end %s\n' % rec[1]:

1970

raise KnitCorrupt(self._access,

1971

'unexpected version end line %r, wanted %r'

1972

% (last_line, version_id))

1973

df.close()

1974

return record_contents, rec[3]

1975

1976

def read_records_iter_raw(self, records):

1977

"""Read text records from data file and yield raw data.

1978

1979

This unpacks enough of the text record to validate the id is

1980

as expected but thats all.

1981

"""

1982

# setup an iterator of the external records:

1983

# uses readv so nice and fast we hope.

1984

if len(records):

1985

# grab the disk data needed.

1986

if self._cache:

1987

# Don't check _cache if it is empty

1988

needed_offsets = [index_memo for version_id, index_memo

1989

in records

1990

if version_id not in self._cache]

1991

else:

1992

needed_offsets = [index_memo for version_id, index_memo

1993

in records]

1994

1995

raw_records = self._access.get_raw_records(needed_offsets)

1996

1997

for version_id, index_memo in records:

1998

if version_id in self._cache:

1999

# This data has already been validated

2000

data = self._cache[version_id]

2001

else:

2002

data = raw_records.next()

2003

if self._do_cache:

2004

self._cache[version_id] = data

2005

2006

# validate the header

2007

df, rec = self._parse_record_header(version_id, data)

2008

df.close()

2009

yield version_id, data

2010

2011

def read_records_iter(self, records):

2012

"""Read text records from data file and yield result.

2013

2014

The result will be returned in whatever is the fastest to read.

2015

Not by the order requested. Also, multiple requests for the same

2016

record will only yield 1 response.

2017

:param records: A list of (version_id, pos, len) entries

2018

:return: Yields (version_id, contents, digest) in the order

2019

read, not the order requested

2020

"""

2021

if not records:

2022

return

2023

2024

if self._cache:

2025

# Skip records we have alread seen

2026

yielded_records = set()

2027

needed_records = set()

2028

for record in records:

2029

if record[0] in self._cache:

2030

if record[0] in yielded_records:

2031

continue

2032

yielded_records.add(record[0])

2033

data = self._cache[record[0]]

2034

content, digest = self._parse_record(record[0], data)

2035

yield (record[0], content, digest)

2036

else:

2037

needed_records.add(record)

2038

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2039

else:

2040

needed_records = sorted(set(records), key=operator.itemgetter(1))

2041

2042

if not needed_records:

2043

return

2044

2045

# The transport optimizes the fetching as well

2046

# (ie, reads continuous ranges.)

2047

raw_data = self._access.get_raw_records(

2048

[index_memo for version_id, index_memo in needed_records])

2049

2050

for (version_id, index_memo), data in \

2051

izip(iter(needed_records), raw_data):

2052

content, digest = self._parse_record(version_id, data)

2053

if self._do_cache:

2054

self._cache[version_id] = data

2055

yield version_id, content, digest

2056

2057

def read_records(self, records):

2058

"""Read records into a dictionary."""

2059

components = {}

2060

for record_id, content, digest in \

2061

self.read_records_iter(records):

2062

components[record_id] = (content, digest)

2063

return components

2064

2065

2066

class InterKnit(InterVersionedFile):

2067

"""Optimised code paths for knit to knit operations."""

2068

2069

_matching_file_from_factory = KnitVersionedFile

2070

_matching_file_to_factory = KnitVersionedFile

2071

2072

@staticmethod

2073

def is_compatible(source, target):

2074

"""Be compatible with knits. """

2075

try:

2076

return (isinstance(source, KnitVersionedFile) and

2077

isinstance(target, KnitVersionedFile))

2078

except AttributeError:

2079

return False

2080

2081

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2082

"""See InterVersionedFile.join."""

2083

assert isinstance(self.source, KnitVersionedFile)

2084

assert isinstance(self.target, KnitVersionedFile)

2085

2086

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2087

2088

if not version_ids:

2089

return 0

2090

2091

pb = ui.ui_factory.nested_progress_bar()

2092

try:

2093

version_ids = list(version_ids)

2094

if None in version_ids:

2095

version_ids.remove(None)

2096

2097

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2098

this_versions = set(self.target._index.get_versions())

2099

needed_versions = self.source_ancestry - this_versions

2100

cross_check_versions = self.source_ancestry.intersection(this_versions)

2101

mismatched_versions = set()

2102

for version in cross_check_versions:

2103

# scan to include needed parents.

2104

n1 = set(self.target.get_parents_with_ghosts(version))

2105

n2 = set(self.source.get_parents_with_ghosts(version))

2106

if n1 != n2:

2107

# FIXME TEST this check for cycles being introduced works

2108

# the logic is we have a cycle if in our graph we are an

2109

# ancestor of any of the n2 revisions.

2110

for parent in n2:

2111

if parent in n1:

2112

# safe

2113

continue

2114

else:

2115

parent_ancestors = self.source.get_ancestry(parent)

2116

if version in parent_ancestors:

2117

raise errors.GraphCycleError([parent, version])

2118

# ensure this parent will be available later.

2119

new_parents = n2.difference(n1)

2120

needed_versions.update(new_parents.difference(this_versions))

2121

mismatched_versions.add(version)

2122

2123

if not needed_versions and not mismatched_versions:

2124

return 0

2125

full_list = topo_sort(self.source.get_graph())

2126

2127

version_list = [i for i in full_list if (not self.target.has_version(i)

2128

and i in needed_versions)]

2129

2130

# plan the join:

2131

copy_queue = []

2132

copy_queue_records = []

2133

copy_set = set()

2134

for version_id in version_list:

2135

options = self.source._index.get_options(version_id)

2136

parents = self.source._index.get_parents_with_ghosts(version_id)

2137

# check that its will be a consistent copy:

2138

for parent in parents:

2139

# if source has the parent, we must :

2140

# * already have it or

2141

# * have it scheduled already

2142

# otherwise we don't care

2143

assert (self.target.has_version(parent) or

2144

parent in copy_set or

2145

not self.source.has_version(parent))

2146

index_memo = self.source._index.get_position(version_id)

2147

copy_queue_records.append((version_id, index_memo))

2148

copy_queue.append((version_id, options, parents))

2149

copy_set.add(version_id)

2150

2151

# data suck the join:

2152

count = 0

2153

total = len(version_list)

2154

raw_datum = []

2155

raw_records = []

2156

for (version_id, raw_data), \

2157

(version_id2, options, parents) in \

2158

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2159

copy_queue):

2160

assert version_id == version_id2, 'logic error, inconsistent results'

2161

count = count + 1

2162

pb.update("Joining knit", count, total)

2163

raw_records.append((version_id, options, parents, len(raw_data)))

2164

raw_datum.append(raw_data)

2165

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2166

2167

for version in mismatched_versions:

2168

# FIXME RBC 20060309 is this needed?

2169

n1 = set(self.target.get_parents_with_ghosts(version))

2170

n2 = set(self.source.get_parents_with_ghosts(version))

2171

# write a combined record to our history preserving the current

2172

# parents as first in the list

2173

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2174

self.target.fix_parents(version, new_parents)

2175

return count

2176

finally:

2177

pb.finished()

2178

2179

2180

InterVersionedFile.register_optimiser(InterKnit)

2181

2182

2183

class WeaveToKnit(InterVersionedFile):

2184

"""Optimised code paths for weave to knit operations."""

2185

2186

_matching_file_from_factory = bzrlib.weave.WeaveFile

2187

_matching_file_to_factory = KnitVersionedFile

2188

2189

@staticmethod

2190

def is_compatible(source, target):

2191

"""Be compatible with weaves to knits."""

2192

try:

2193

return (isinstance(source, bzrlib.weave.Weave) and

2194

isinstance(target, KnitVersionedFile))

2195

except AttributeError:

2196

return False

2197

2198

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2199

"""See InterVersionedFile.join."""

2200

assert isinstance(self.source, bzrlib.weave.Weave)

2201

assert isinstance(self.target, KnitVersionedFile)

2202

2203

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2204

2205

if not version_ids:

2206

return 0

2207

2208

pb = ui.ui_factory.nested_progress_bar()

2209

try:

2210

version_ids = list(version_ids)

2211

2212

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2213

this_versions = set(self.target._index.get_versions())

2214

needed_versions = self.source_ancestry - this_versions

2215

cross_check_versions = self.source_ancestry.intersection(this_versions)

2216

mismatched_versions = set()

2217

for version in cross_check_versions:

2218

# scan to include needed parents.

2219

n1 = set(self.target.get_parents_with_ghosts(version))

2220

n2 = set(self.source.get_parents(version))

2221

# if all of n2's parents are in n1, then its fine.

2222

if n2.difference(n1):

2223

# FIXME TEST this check for cycles being introduced works

2224

# the logic is we have a cycle if in our graph we are an

2225

# ancestor of any of the n2 revisions.

2226

for parent in n2:

2227

if parent in n1:

2228

# safe

2229

continue

2230

else:

2231

parent_ancestors = self.source.get_ancestry(parent)

2232

if version in parent_ancestors:

2233

raise errors.GraphCycleError([parent, version])

2234

# ensure this parent will be available later.

2235

new_parents = n2.difference(n1)

2236

needed_versions.update(new_parents.difference(this_versions))

2237

mismatched_versions.add(version)

2238

2239

if not needed_versions and not mismatched_versions:

2240

return 0

2241

full_list = topo_sort(self.source.get_graph())

2242

2243

version_list = [i for i in full_list if (not self.target.has_version(i)

2244

and i in needed_versions)]

2245

2246

# do the join:

2247

count = 0

2248

total = len(version_list)

2249

for version_id in version_list:

2250

pb.update("Converting to knit", count, total)

2251

parents = self.source.get_parents(version_id)

2252

# check that its will be a consistent copy:

2253

for parent in parents:

2254

# if source has the parent, we must already have it

2255

assert (self.target.has_version(parent))

2256

self.target.add_lines(

2257

version_id, parents, self.source.get_lines(version_id))

2258

count = count + 1

2259

2260

for version in mismatched_versions:

2261

# FIXME RBC 20060309 is this needed?

2262

n1 = set(self.target.get_parents_with_ghosts(version))

2263

n2 = set(self.source.get_parents(version))

2264

# write a combined record to our history preserving the current

2265

# parents as first in the list

2266

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2267

self.target.fix_parents(version, new_parents)

2268

return count

2269

finally:

2270

pb.finished()

2271

2272

2273

InterVersionedFile.register_optimiser(WeaveToKnit)

2274

2275

2276

class KnitSequenceMatcher(difflib.SequenceMatcher):

2277

"""Knit tuned sequence matcher.

2278

2279

This is based on profiling of difflib which indicated some improvements

2280

for our usage pattern.

2281

"""

2282

2283

def find_longest_match(self, alo, ahi, blo, bhi):

2284

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2285

2286

If isjunk is not defined:

2287

2288

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2289

alo <= i <= i+k <= ahi

2290

blo <= j <= j+k <= bhi

2291

and for all (i',j',k') meeting those conditions,

2292

k >= k'

2293

i <= i'

2294

and if i == i', j <= j'

2295

2296

In other words, of all maximal matching blocks, return one that

2297

starts earliest in a, and of all those maximal matching blocks that

2298

start earliest in a, return the one that starts earliest in b.

2299

2300

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2301

>>> s.find_longest_match(0, 5, 0, 9)

2302

(0, 4, 5)

2303

2304

If isjunk is defined, first the longest matching block is

2305

determined as above, but with the additional restriction that no

2306

junk element appears in the block. Then that block is extended as

2307

far as possible by matching (only) junk elements on both sides. So

2308

the resulting block never matches on junk except as identical junk

2309

happens to be adjacent to an "interesting" match.

2310

2311

Here's the same example as before, but considering blanks to be

2312

junk. That prevents " abcd" from matching the " abcd" at the tail

2313

end of the second sequence directly. Instead only the "abcd" can

2314

match, and matches the leftmost "abcd" in the second sequence:

2315

2316

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2317

>>> s.find_longest_match(0, 5, 0, 9)

2318

(1, 0, 4)

2319

2320

If no blocks match, return (alo, blo, 0).

2321

2322

>>> s = SequenceMatcher(None, "ab", "c")

2323

>>> s.find_longest_match(0, 2, 0, 1)

2324

(0, 0, 0)

2325

"""

2326

2327

# CAUTION: stripping common prefix or suffix would be incorrect.

2328

# E.g.,

2329

# ab

2330

# acab

2331

# Longest matching block is "ab", but if common prefix is

2332

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2333

# strip, so ends up claiming that ab is changed to acab by

2334

# inserting "ca" in the middle. That's minimal but unintuitive:

2335

# "it's obvious" that someone inserted "ac" at the front.

2336

# Windiff ends up at the same place as diff, but by pairing up

2337

# the unique 'b's and then matching the first two 'a's.

2338

2339

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2340

besti, bestj, bestsize = alo, blo, 0

2341

# find longest junk-free match

2342

# during an iteration of the loop, j2len[j] = length of longest

2343

# junk-free match ending with a[i-1] and b[j]

2344

j2len = {}

2345

# nothing = []

2346

b2jget = b2j.get

2347

for i in xrange(alo, ahi):

2348

# look at all instances of a[i] in b; note that because

2349

# b2j has no junk keys, the loop is skipped if a[i] is junk

2350

j2lenget = j2len.get

2351

newj2len = {}

2352

2353

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2354

# following improvement

2355

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2356

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2357

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2358

# to

2359

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2360

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2361

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2362

2363

try:

2364

js = b2j[a[i]]

2365

except KeyError:

2366

pass

2367

else:

2368

for j in js:

2369

# a[i] matches b[j]

2370

if j >= blo:

2371

if j >= bhi:

2372

break

2373

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2374

if k > bestsize:

2375

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2376

j2len = newj2len

2377

2378

# Extend the best by non-junk elements on each end. In particular,

2379

# "popular" non-junk elements aren't in b2j, which greatly speeds

2380

# the inner loop above, but also means "the best" match so far

2381

# doesn't contain any junk *or* popular non-junk elements.

2382

while besti > alo and bestj > blo and \

2383

not isbjunk(b[bestj-1]) and \

2384

a[besti-1] == b[bestj-1]:

2385

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2386

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2387

not isbjunk(b[bestj+bestsize]) and \

2388

a[besti+bestsize] == b[bestj+bestsize]:

2389

bestsize += 1

2390

2391

# Now that we have a wholly interesting match (albeit possibly

2392

# empty!), we may as well suck up the matching junk on each

2393

# side of it too. Can't think of a good reason not to, and it

2394

# saves post-processing the (possibly considerable) expense of

2395

# figuring out what to do with it. In the case of an empty

2396

# interesting match, this is clearly the right thing to do,

2397

# because no other kind of match is possible in the regions.

2398

while besti > alo and bestj > blo and \

2399

isbjunk(b[bestj-1]) and \

2400

a[besti-1] == b[bestj-1]:

2401

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2402

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2403

isbjunk(b[bestj+bestsize]) and \

2404

a[besti+bestsize] == b[bestj+bestsize]:

2405

bestsize = bestsize + 1

2406

2407

return besti, bestj, bestsize

2408

2409

2410

try:

2411

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2412

except ImportError:

2413

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »