/brz/remove-bazaar : revision 2743

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-08-22 05:28:32 UTC
mfrom: (2671.4.3 reconcile)
Revision ID: pqm@pqm.ubuntu.com-20070822052832-nxby1d1plok4syek

(robertc) Add a private repository attribute to tell reonccile tests what the repository does to inventories during reconcile(). (Robert Collins).

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

pack,

)

""")

from bzrlib import (

cache_utf8,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

100

from bzrlib.osutils import (

101

contains_whitespace,

102

contains_linebreaks,

103

sha_strings,

104

)

105

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

106

from bzrlib.tsort import topo_sort

107

import bzrlib.ui

108

import bzrlib.weave

109

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

110

111

112

# TODO: Split out code specific to this format into an associated object.

113

114

# TODO: Can we put in some kind of value to check that the index and data

115

# files belong together?

116

117

# TODO: accommodate binaries, perhaps by storing a byte count

118

119

# TODO: function to check whole file

120

121

# TODO: atomically append data, then measure backwards from the cursor

122

# position after writing to work out where it was located. we may need to

123

# bypass python file buffering.

124

125

DATA_SUFFIX = '.knit'

126

INDEX_SUFFIX = '.kndx'

127

128

129

class KnitContent(object):

130

"""Content of a knit version to which deltas can be applied."""

131

132

def __init__(self, lines):

133

self._lines = lines

134

135

def annotate_iter(self):

136

"""Yield tuples of (origin, text) for each content line."""

137

return iter(self._lines)

138

139

def annotate(self):

140

"""Return a list of (origin, text) tuples."""

141

return list(self.annotate_iter())

142

143

def line_delta_iter(self, new_lines):

144

"""Generate line-based delta from this content to new_lines."""

145

new_texts = new_lines.text()

146

old_texts = self.text()

147

s = KnitSequenceMatcher(None, old_texts, new_texts)

148

for tag, i1, i2, j1, j2 in s.get_opcodes():

149

if tag == 'equal':

150

continue

151

# ofrom, oto, length, data

152

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

153

154

def line_delta(self, new_lines):

155

return list(self.line_delta_iter(new_lines))

156

157

def text(self):

158

return [text for origin, text in self._lines]

159

160

def copy(self):

161

return KnitContent(self._lines[:])

162

163

@staticmethod

164

def get_line_delta_blocks(knit_delta, source, target):

165

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

166

target_len = len(target)

167

s_pos = 0

168

t_pos = 0

169

for s_begin, s_end, t_len, new_text in knit_delta:

170

true_n = s_begin - s_pos

171

n = true_n

172

if n > 0:

173

# knit deltas do not provide reliable info about whether the

174

# last line of a file matches, due to eol handling.

175

if source[s_pos + n -1] != target[t_pos + n -1]:

176

n-=1

177

if n > 0:

178

yield s_pos, t_pos, n

179

t_pos += t_len + true_n

180

s_pos = s_end

181

n = target_len - t_pos

182

if n > 0:

183

if source[s_pos + n -1] != target[t_pos + n -1]:

184

n-=1

185

if n > 0:

186

yield s_pos, t_pos, n

187

yield s_pos + (target_len - t_pos), target_len, 0

188

189

190

class _KnitFactory(object):

191

"""Base factory for creating content objects."""

192

193

def make(self, lines, version_id):

194

num_lines = len(lines)

195

return KnitContent(zip([version_id] * num_lines, lines))

196

197

198

class KnitAnnotateFactory(_KnitFactory):

199

"""Factory for creating annotated Content objects."""

200

201

annotated = True

202

203

def parse_fulltext(self, content, version_id):

204

"""Convert fulltext to internal representation

205

206

fulltext content is of the format

207

revid(utf8) plaintext\n

208

internal representation is of the format:

209

(revid, plaintext)

210

"""

211

# TODO: jam 20070209 The tests expect this to be returned as tuples,

212

# but the code itself doesn't really depend on that.

213

# Figure out a way to not require the overhead of turning the

214

# list back into tuples.

215

lines = [tuple(line.split(' ', 1)) for line in content]

216

return KnitContent(lines)

217

218

def parse_line_delta_iter(self, lines):

219

return iter(self.parse_line_delta(lines))

220

221

def parse_line_delta(self, lines, version_id):

222

"""Convert a line based delta into internal representation.

223

224

line delta is in the form of:

225

intstart intend intcount

226

1..count lines:

227

revid(utf8) newline\n

228

internal representation is

229

(start, end, count, [1..count tuples (revid, newline)])

230

"""

231

result = []

232

lines = iter(lines)

233

next = lines.next

234

235

cache = {}

236

def cache_and_return(line):

237

origin, text = line.split(' ', 1)

238

return cache.setdefault(origin, origin), text

239

240

# walk through the lines parsing.

241

for header in lines:

242

start, end, count = [int(n) for n in header.split(',')]

243

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

244

result.append((start, end, count, contents))

245

return result

246

247

def get_fulltext_content(self, lines):

248

"""Extract just the content lines from a fulltext."""

249

return (line.split(' ', 1)[1] for line in lines)

250

251

def get_linedelta_content(self, lines):

252

"""Extract just the content from a line delta.

253

254

This doesn't return all of the extra information stored in a delta.

255

Only the actual content lines.

256

"""

257

lines = iter(lines)

258

next = lines.next

259

for header in lines:

260

header = header.split(',')

261

count = int(header[2])

262

for i in xrange(count):

263

origin, text = next().split(' ', 1)

264

yield text

265

266

def lower_fulltext(self, content):

267

"""convert a fulltext content record into a serializable form.

268

269

see parse_fulltext which this inverts.

270

"""

271

# TODO: jam 20070209 We only do the caching thing to make sure that

272

# the origin is a valid utf-8 line, eventually we could remove it

273

return ['%s %s' % (o, t) for o, t in content._lines]

274

275

def lower_line_delta(self, delta):

276

"""convert a delta into a serializable form.

277

278

See parse_line_delta which this inverts.

279

"""

280

# TODO: jam 20070209 We only do the caching thing to make sure that

281

# the origin is a valid utf-8 line, eventually we could remove it

282

out = []

283

for start, end, c, lines in delta:

284

out.append('%d,%d,%d\n' % (start, end, c))

285

out.extend(origin + ' ' + text

286

for origin, text in lines)

287

return out

288

289

290

class KnitPlainFactory(_KnitFactory):

291

"""Factory for creating plain Content objects."""

292

293

annotated = False

294

295

def parse_fulltext(self, content, version_id):

296

"""This parses an unannotated fulltext.

297

298

Note that this is not a noop - the internal representation

299

has (versionid, line) - its just a constant versionid.

300

"""

301

return self.make(content, version_id)

302

303

def parse_line_delta_iter(self, lines, version_id):

304

cur = 0

305

num_lines = len(lines)

306

while cur < num_lines:

307

header = lines[cur]

308

cur += 1

309

start, end, c = [int(n) for n in header.split(',')]

310

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

311

cur += c

312

313

def parse_line_delta(self, lines, version_id):

314

return list(self.parse_line_delta_iter(lines, version_id))

315

316

def get_fulltext_content(self, lines):

317

"""Extract just the content lines from a fulltext."""

318

return iter(lines)

319

320

def get_linedelta_content(self, lines):

321

"""Extract just the content from a line delta.

322

323

This doesn't return all of the extra information stored in a delta.

324

Only the actual content lines.

325

"""

326

lines = iter(lines)

327

next = lines.next

328

for header in lines:

329

header = header.split(',')

330

count = int(header[2])

331

for i in xrange(count):

332

yield next()

333

334

def lower_fulltext(self, content):

335

return content.text()

336

337

def lower_line_delta(self, delta):

338

out = []

339

for start, end, c, lines in delta:

340

out.append('%d,%d,%d\n' % (start, end, c))

341

out.extend([text for origin, text in lines])

342

return out

343

344

345

def make_empty_knit(transport, relpath):

346

"""Construct a empty knit at the specified location."""

347

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

348

349

350

class KnitVersionedFile(VersionedFile):

351

"""Weave-like structure with faster random access.

352

353

A knit stores a number of texts and a summary of the relationships

354

between them. Texts are identified by a string version-id. Texts

355

are normally stored and retrieved as a series of lines, but can

356

also be passed as single strings.

357

358

Lines are stored with the trailing newline (if any) included, to

359

avoid special cases for files with no final newline. Lines are

360

composed of 8-bit characters, not unicode. The combination of

361

these approaches should mean any 'binary' file can be safely

362

stored and retrieved.

363

"""

364

365

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

366

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

367

create=False, create_parent_dir=False, delay_create=False,

368

dir_mode=None, index=None, access_method=None):

369

"""Construct a knit at location specified by relpath.

370

371

:param create: If not True, only open an existing knit.

372

:param create_parent_dir: If True, create the parent directory if

373

creating the file fails. (This is used for stores with

374

hash-prefixes that may not exist yet)

375

:param delay_create: The calling code is aware that the knit won't

376

actually be created until the first data is stored.

377

:param index: An index to use for the knit.

378

"""

379

if deprecated_passed(basis_knit):

380

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

381

" deprecated as of bzr 0.9.",

382

DeprecationWarning, stacklevel=2)

383

if access_mode is None:

384

access_mode = 'w'

385

super(KnitVersionedFile, self).__init__(access_mode)

386

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

387

self.transport = transport

388

self.filename = relpath

389

self.factory = factory or KnitAnnotateFactory()

390

self.writable = (access_mode == 'w')

391

self.delta = delta

392

393

self._max_delta_chain = 200

394

395

if index is None:

396

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

397

access_mode, create=create, file_mode=file_mode,

398

create_parent_dir=create_parent_dir, delay_create=delay_create,

399

dir_mode=dir_mode)

400

else:

401

self._index = index

402

if access_method is None:

403

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

404

((create and not len(self)) and delay_create), create_parent_dir)

405

else:

406

_access = access_method

407

if create and not len(self) and not delay_create:

408

_access.create()

409

self._data = _KnitData(_access)

410

411

def __repr__(self):

412

return '%s(%s)' % (self.__class__.__name__,

413

self.transport.abspath(self.filename))

414

415

def _check_should_delta(self, first_parents):

416

"""Iterate back through the parent listing, looking for a fulltext.

417

418

This is used when we want to decide whether to add a delta or a new

419

fulltext. It searches for _max_delta_chain parents. When it finds a

420

fulltext parent, it sees if the total size of the deltas leading up to

421

it is large enough to indicate that we want a new full text anyway.

422

423

Return True if we should create a new delta, False if we should use a

424

full text.

425

"""

426

delta_size = 0

427

fulltext_size = None

428

delta_parents = first_parents

429

for count in xrange(self._max_delta_chain):

430

parent = delta_parents[0]

431

method = self._index.get_method(parent)

432

index, pos, size = self._index.get_position(parent)

433

if method == 'fulltext':

434

fulltext_size = size

435

break

436

delta_size += size

437

delta_parents = self._index.get_parents(parent)

438

else:

439

# We couldn't find a fulltext, so we must create a new one

440

return False

441

442

return fulltext_size > delta_size

443

444

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

445

"""See VersionedFile._add_delta()."""

446

self._check_add(version_id, []) # should we check the lines ?

447

self._check_versions_present(parents)

448

present_parents = []

449

ghosts = []

450

parent_texts = {}

451

for parent in parents:

452

if not self.has_version(parent):

453

ghosts.append(parent)

454

else:

455

present_parents.append(parent)

456

457

if delta_parent is None:

458

# reconstitute as full text.

459

assert len(delta) == 1 or len(delta) == 0

460

if len(delta):

461

assert delta[0][0] == 0

462

assert delta[0][1] == 0, delta[0][1]

463

return super(KnitVersionedFile, self)._add_delta(version_id,

464

parents,

465

delta_parent,

466

sha1,

467

noeol,

468

delta)

469

470

digest = sha1

471

472

options = []

473

if noeol:

474

options.append('no-eol')

475

476

if delta_parent is not None:

477

# determine the current delta chain length.

478

# To speed the extract of texts the delta chain is limited

479

# to a fixed number of deltas. This should minimize both

480

# I/O and the time spend applying deltas.

481

# The window was changed to a maximum of 200 deltas, but also added

482

# was a check that the total compressed size of the deltas is

483

# smaller than the compressed size of the fulltext.

484

if not self._check_should_delta([delta_parent]):

485

# We don't want a delta here, just do a normal insertion.

486

return super(KnitVersionedFile, self)._add_delta(version_id,

487

parents,

488

delta_parent,

489

sha1,

490

noeol,

491

delta)

492

493

options.append('line-delta')

494

store_lines = self.factory.lower_line_delta(delta)

495

496

access_memo = self._data.add_record(version_id, digest, store_lines)

497

self._index.add_version(version_id, options, access_memo, parents)

498

499

def _add_raw_records(self, records, data):

500

"""Add all the records 'records' with data pre-joined in 'data'.

501

502

:param records: A list of tuples(version_id, options, parents, size).

503

:param data: The data for the records. When it is written, the records

504

are adjusted to have pos pointing into data by the sum of

505

the preceding records sizes.

506

"""

507

# write all the data

508

raw_record_sizes = [record[3] for record in records]

509

positions = self._data.add_raw_records(raw_record_sizes, data)

510

offset = 0

511

index_entries = []

512

for (version_id, options, parents, size), access_memo in zip(

513

records, positions):

514

index_entries.append((version_id, options, access_memo, parents))

515

if self._data._do_cache:

516

self._data._cache[version_id] = data[offset:offset+size]

517

offset += size

518

self._index.add_versions(index_entries)

519

520

def enable_cache(self):

521

"""Start caching data for this knit"""

522

self._data.enable_cache()

523

524

def clear_cache(self):

525

"""Clear the data cache only."""

526

self._data.clear_cache()

527

528

def copy_to(self, name, transport):

529

"""See VersionedFile.copy_to()."""

530

# copy the current index to a temp index to avoid racing with local

531

# writes

532

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

533

self.transport.get(self._index._filename))

534

# copy the data file

535

f = self._data._open_file()

536

try:

537

transport.put_file(name + DATA_SUFFIX, f)

538

finally:

539

f.close()

540

# move the copied index into place

541

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

542

543

def create_empty(self, name, transport, mode=None):

544

return KnitVersionedFile(name, transport, factory=self.factory,

545

delta=self.delta, create=True)

546

547

def _fix_parents(self, version_id, new_parents):

548

"""Fix the parents list for version.

549

550

This is done by appending a new version to the index

551

with identical data except for the parents list.

552

the parents list must be a superset of the current

553

list.

554

"""

555

current_values = self._index._cache[version_id]

556

assert set(current_values[4]).difference(set(new_parents)) == set()

557

self._index.add_version(version_id,

558

current_values[1],

559

(None, current_values[2], current_values[3]),

560

new_parents)

561

562

def _extract_blocks(self, version_id, source, target):

563

if self._index.get_method(version_id) != 'line-delta':

564

return None

565

parent, sha1, noeol, delta = self.get_delta(version_id)

566

return KnitContent.get_line_delta_blocks(delta, source, target)

567

568

def get_delta(self, version_id):

569

"""Get a delta for constructing version from some other version."""

570

version_id = osutils.safe_revision_id(version_id)

571

self.check_not_reserved_id(version_id)

572

if not self.has_version(version_id):

573

raise RevisionNotPresent(version_id, self.filename)

574

575

parents = self.get_parents(version_id)

576

if len(parents):

577

parent = parents[0]

578

else:

579

parent = None

580

index_memo = self._index.get_position(version_id)

581

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

582

noeol = 'no-eol' in self._index.get_options(version_id)

583

if 'fulltext' == self._index.get_method(version_id):

584

new_content = self.factory.parse_fulltext(data, version_id)

585

if parent is not None:

586

reference_content = self._get_content(parent)

587

old_texts = reference_content.text()

588

else:

589

old_texts = []

590

new_texts = new_content.text()

591

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

592

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

593

else:

594

delta = self.factory.parse_line_delta(data, version_id)

595

return parent, sha1, noeol, delta

596

597

def get_graph_with_ghosts(self):

598

"""See VersionedFile.get_graph_with_ghosts()."""

599

graph_items = self._index.get_graph()

600

return dict(graph_items)

601

602

def get_sha1(self, version_id):

603

return self.get_sha1s([version_id])[0]

604

605

def get_sha1s(self, version_ids):

606

"""See VersionedFile.get_sha1()."""

607

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

608

record_map = self._get_record_map(version_ids)

609

# record entry 2 is the 'digest'.

610

return [record_map[v][2] for v in version_ids]

611

612

@staticmethod

613

def get_suffixes():

614

"""See VersionedFile.get_suffixes()."""

615

return [DATA_SUFFIX, INDEX_SUFFIX]

616

617

def has_ghost(self, version_id):

618

"""True if there is a ghost reference in the file to version_id."""

619

version_id = osutils.safe_revision_id(version_id)

620

# maybe we have it

621

if self.has_version(version_id):

622

return False

623

# optimisable if needed by memoising the _ghosts set.

624

items = self._index.get_graph()

625

for node, parents in items:

626

for parent in parents:

627

if parent not in self._index._cache:

628

if parent == version_id:

629

return True

630

return False

631

632

def versions(self):

633

"""See VersionedFile.versions."""

634

return self._index.get_versions()

635

636

def has_version(self, version_id):

637

"""See VersionedFile.has_version."""

638

version_id = osutils.safe_revision_id(version_id)

639

return self._index.has_version(version_id)

640

641

__contains__ = has_version

642

643

def _merge_annotations(self, content, parents, parent_texts={},

644

delta=None, annotated=None,

645

left_matching_blocks=None):

646

"""Merge annotations for content. This is done by comparing

647

the annotations based on changed to the text.

648

"""

649

if left_matching_blocks is not None:

650

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

651

else:

652

delta_seq = None

653

if annotated:

654

for parent_id in parents:

655

merge_content = self._get_content(parent_id, parent_texts)

656

if (parent_id == parents[0] and delta_seq is not None):

657

seq = delta_seq

658

else:

659

seq = patiencediff.PatienceSequenceMatcher(

660

None, merge_content.text(), content.text())

661

for i, j, n in seq.get_matching_blocks():

662

if n == 0:

663

continue

664

# this appears to copy (origin, text) pairs across to the

665

# new content for any line that matches the last-checked

666

# parent.

667

content._lines[j:j+n] = merge_content._lines[i:i+n]

668

if delta:

669

if delta_seq is None:

670

reference_content = self._get_content(parents[0], parent_texts)

671

new_texts = content.text()

672

old_texts = reference_content.text()

673

delta_seq = patiencediff.PatienceSequenceMatcher(

674

None, old_texts, new_texts)

675

return self._make_line_delta(delta_seq, content)

676

677

def _make_line_delta(self, delta_seq, new_content):

678

"""Generate a line delta from delta_seq and new_content."""

679

diff_hunks = []

680

for op in delta_seq.get_opcodes():

681

if op[0] == 'equal':

682

continue

683

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

684

return diff_hunks

685

686

def _get_components_positions(self, version_ids):

687

"""Produce a map of position data for the components of versions.

688

689

This data is intended to be used for retrieving the knit records.

690

691

A dict of version_id to (method, data_pos, data_size, next) is

692

returned.

693

method is the way referenced data should be applied.

694

data_pos is the position of the data in the knit.

695

data_size is the size of the data in the knit.

696

next is the build-parent of the version, or None for fulltexts.

697

"""

698

component_data = {}

699

for version_id in version_ids:

700

cursor = version_id

701

702

while cursor is not None and cursor not in component_data:

703

method = self._index.get_method(cursor)

704

if method == 'fulltext':

705

next = None

706

else:

707

next = self.get_parents(cursor)[0]

708

index_memo = self._index.get_position(cursor)

709

component_data[cursor] = (method, index_memo, next)

710

cursor = next

711

return component_data

712

713

def _get_content(self, version_id, parent_texts={}):

714

"""Returns a content object that makes up the specified

715

version."""

716

if not self.has_version(version_id):

717

raise RevisionNotPresent(version_id, self.filename)

718

719

cached_version = parent_texts.get(version_id, None)

720

if cached_version is not None:

721

return cached_version

722

723

text_map, contents_map = self._get_content_maps([version_id])

724

return contents_map[version_id]

725

726

def _check_versions_present(self, version_ids):

727

"""Check that all specified versions are present."""

728

self._index.check_versions_present(version_ids)

729

730

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

731

"""See VersionedFile.add_lines_with_ghosts()."""

732

self._check_add(version_id, lines)

733

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

734

735

def _add_lines(self, version_id, parents, lines, parent_texts,

736

left_matching_blocks=None):

737

"""See VersionedFile.add_lines."""

738

self._check_add(version_id, lines)

739

self._check_versions_present(parents)

740

return self._add(version_id, lines[:], parents, self.delta,

741

parent_texts, left_matching_blocks)

742

743

def _check_add(self, version_id, lines):

744

"""check that version_id and lines are safe to add."""

745

assert self.writable, "knit is not opened for write"

746

### FIXME escape. RBC 20060228

747

if contains_whitespace(version_id):

748

raise InvalidRevisionId(version_id, self.filename)

749

self.check_not_reserved_id(version_id)

750

if self.has_version(version_id):

751

raise RevisionAlreadyPresent(version_id, self.filename)

752

self._check_lines_not_unicode(lines)

753

self._check_lines_are_lines(lines)

754

755

def _add(self, version_id, lines, parents, delta, parent_texts,

756

left_matching_blocks=None):

757

"""Add a set of lines on top of version specified by parents.

758

759

If delta is true, compress the text as a line-delta against

760

the first parent.

761

762

Any versions not present will be converted into ghosts.

763

"""

764

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

765

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

766

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

767

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

768

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

769

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

770

# +1383 0 8.0370 8.0370 +<len>

771

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

772

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

773

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

774

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

775

776

present_parents = []

777

ghosts = []

778

if parent_texts is None:

779

parent_texts = {}

780

for parent in parents:

781

if not self.has_version(parent):

782

ghosts.append(parent)

783

else:

784

present_parents.append(parent)

785

786

if delta and not len(present_parents):

787

delta = False

788

789

digest = sha_strings(lines)

790

options = []

791

if lines:

792

if lines[-1][-1] != '\n':

793

options.append('no-eol')

794

lines[-1] = lines[-1] + '\n'

795

796

if len(present_parents) and delta:

797

# To speed the extract of texts the delta chain is limited

798

# to a fixed number of deltas. This should minimize both

799

# I/O and the time spend applying deltas.

800

delta = self._check_should_delta(present_parents)

801

802

assert isinstance(version_id, str)

803

lines = self.factory.make(lines, version_id)

804

if delta or (self.factory.annotated and len(present_parents) > 0):

805

# Merge annotations from parent texts if so is needed.

806

delta_hunks = self._merge_annotations(lines, present_parents,

807

parent_texts, delta, self.factory.annotated,

808

left_matching_blocks)

809

810

if delta:

811

options.append('line-delta')

812

store_lines = self.factory.lower_line_delta(delta_hunks)

813

else:

814

options.append('fulltext')

815

store_lines = self.factory.lower_fulltext(lines)

816

817

access_memo = self._data.add_record(version_id, digest, store_lines)

818

self._index.add_version(version_id, options, access_memo, parents)

819

return lines

820

821

def check(self, progress_bar=None):

822

"""See VersionedFile.check()."""

823

824

def _clone_text(self, new_version_id, old_version_id, parents):

825

"""See VersionedFile.clone_text()."""

826

# FIXME RBC 20060228 make fast by only inserting an index with null

827

# delta.

828

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

829

830

def get_lines(self, version_id):

831

"""See VersionedFile.get_lines()."""

832

return self.get_line_list([version_id])[0]

833

834

def _get_record_map(self, version_ids):

835

"""Produce a dictionary of knit records.

836

837

The keys are version_ids, the values are tuples of (method, content,

838

digest, next).

839

method is the way the content should be applied.

840

content is a KnitContent object.

841

digest is the SHA1 digest of this version id after all steps are done

842

next is the build-parent of the version, i.e. the leftmost ancestor.

843

If the method is fulltext, next will be None.

844

"""

845

position_map = self._get_components_positions(version_ids)

846

# c = component_id, m = method, i_m = index_memo, n = next

847

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

848

record_map = {}

849

for component_id, content, digest in \

850

self._data.read_records_iter(records):

851

method, index_memo, next = position_map[component_id]

852

record_map[component_id] = method, content, digest, next

853

854

return record_map

855

856

def get_text(self, version_id):

857

"""See VersionedFile.get_text"""

858

return self.get_texts([version_id])[0]

859

860

def get_texts(self, version_ids):

861

return [''.join(l) for l in self.get_line_list(version_ids)]

862

863

def get_line_list(self, version_ids):

864

"""Return the texts of listed versions as a list of strings."""

865

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

866

for version_id in version_ids:

867

self.check_not_reserved_id(version_id)

868

text_map, content_map = self._get_content_maps(version_ids)

869

return [text_map[v] for v in version_ids]

870

871

_get_lf_split_line_list = get_line_list

872

873

def _get_content_maps(self, version_ids):

874

"""Produce maps of text and KnitContents

875

876

:return: (text_map, content_map) where text_map contains the texts for

877

the requested versions and content_map contains the KnitContents.

878

Both dicts take version_ids as their keys.

879

"""

880

for version_id in version_ids:

881

if not self.has_version(version_id):

882

raise RevisionNotPresent(version_id, self.filename)

883

record_map = self._get_record_map(version_ids)

884

885

text_map = {}

886

content_map = {}

887

final_content = {}

888

for version_id in version_ids:

889

components = []

890

cursor = version_id

891

while cursor is not None:

892

method, data, digest, next = record_map[cursor]

893

components.append((cursor, method, data, digest))

894

if cursor in content_map:

895

break

896

cursor = next

897

898

content = None

899

for component_id, method, data, digest in reversed(components):

900

if component_id in content_map:

901

content = content_map[component_id]

902

else:

903

if method == 'fulltext':

904

assert content is None

905

content = self.factory.parse_fulltext(data, version_id)

906

elif method == 'line-delta':

907

delta = self.factory.parse_line_delta(data, version_id)

908

content = content.copy()

909

content._lines = self._apply_delta(content._lines,

910

delta)

911

content_map[component_id] = content

912

913

if 'no-eol' in self._index.get_options(version_id):

914

content = content.copy()

915

line = content._lines[-1][1].rstrip('\n')

916

content._lines[-1] = (content._lines[-1][0], line)

917

final_content[version_id] = content

918

919

# digest here is the digest from the last applied component.

920

text = content.text()

921

if sha_strings(text) != digest:

922

raise KnitCorrupt(self.filename,

923

'sha-1 does not match %s' % version_id)

924

925

text_map[version_id] = text

926

return text_map, final_content

927

928

def iter_lines_added_or_present_in_versions(self, version_ids=None,

929

pb=None):

930

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

931

if version_ids is None:

932

version_ids = self.versions()

933

else:

934

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

935

if pb is None:

936

pb = progress.DummyProgress()

937

# we don't care about inclusions, the caller cares.

938

# but we need to setup a list of records to visit.

939

# we need version_id, position, length

940

version_id_records = []

941

requested_versions = set(version_ids)

942

# filter for available versions

943

for version_id in requested_versions:

944

if not self.has_version(version_id):

945

raise RevisionNotPresent(version_id, self.filename)

946

# get a in-component-order queue:

947

for version_id in self.versions():

948

if version_id in requested_versions:

949

index_memo = self._index.get_position(version_id)

950

version_id_records.append((version_id, index_memo))

951

952

total = len(version_id_records)

953

for version_idx, (version_id, data, sha_value) in \

954

enumerate(self._data.read_records_iter(version_id_records)):

955

pb.update('Walking content.', version_idx, total)

956

method = self._index.get_method(version_id)

957

958

assert method in ('fulltext', 'line-delta')

959

if method == 'fulltext':

960

line_iterator = self.factory.get_fulltext_content(data)

961

else:

962

line_iterator = self.factory.get_linedelta_content(data)

963

for line in line_iterator:

964

yield line

965

966

pb.update('Walking content.', total, total)

967

968

def iter_parents(self, version_ids):

969

"""Iterate through the parents for many version ids.

970

971

:param version_ids: An iterable yielding version_ids.

972

:return: An iterator that yields (version_id, parents). Requested

973

version_ids not present in the versioned file are simply skipped.

974

The order is undefined, allowing for different optimisations in

975

the underlying implementation.

976

"""

977

version_ids = [osutils.safe_revision_id(version_id) for

978

version_id in version_ids]

979

return self._index.iter_parents(version_ids)

980

981

def num_versions(self):

982

"""See VersionedFile.num_versions()."""

983

return self._index.num_versions()

984

985

__len__ = num_versions

986

987

def annotate_iter(self, version_id):

988

"""See VersionedFile.annotate_iter."""

989

version_id = osutils.safe_revision_id(version_id)

990

content = self._get_content(version_id)

991

for origin, text in content.annotate_iter():

992

yield origin, text

993

994

def get_parents(self, version_id):

995

"""See VersionedFile.get_parents."""

996

# perf notes:

997

# optimism counts!

998

# 52554 calls in 1264 872 internal down from 3674

999

version_id = osutils.safe_revision_id(version_id)

1000

try:

1001

return self._index.get_parents(version_id)

1002

except KeyError:

1003

raise RevisionNotPresent(version_id, self.filename)

1004

1005

def get_parents_with_ghosts(self, version_id):

1006

"""See VersionedFile.get_parents."""

1007

version_id = osutils.safe_revision_id(version_id)

1008

try:

1009

return self._index.get_parents_with_ghosts(version_id)

1010

except KeyError:

1011

raise RevisionNotPresent(version_id, self.filename)

1012

1013

def get_ancestry(self, versions, topo_sorted=True):

1014

"""See VersionedFile.get_ancestry."""

1015

if isinstance(versions, basestring):

1016

versions = [versions]

1017

if not versions:

1018

return []

1019

versions = [osutils.safe_revision_id(v) for v in versions]

1020

return self._index.get_ancestry(versions, topo_sorted)

1021

1022

def get_ancestry_with_ghosts(self, versions):

1023

"""See VersionedFile.get_ancestry_with_ghosts."""

1024

if isinstance(versions, basestring):

1025

versions = [versions]

1026

if not versions:

1027

return []

1028

versions = [osutils.safe_revision_id(v) for v in versions]

1029

return self._index.get_ancestry_with_ghosts(versions)

1030

1031

def plan_merge(self, ver_a, ver_b):

1032

"""See VersionedFile.plan_merge."""

1033

ver_a = osutils.safe_revision_id(ver_a)

1034

ver_b = osutils.safe_revision_id(ver_b)

1035

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1036

1037

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1038

annotated_a = self.annotate(ver_a)

1039

annotated_b = self.annotate(ver_b)

1040

return merge._plan_annotate_merge(annotated_a, annotated_b,

1041

ancestors_a, ancestors_b)

1042

1043

1044

class _KnitComponentFile(object):

1045

"""One of the files used to implement a knit database"""

1046

1047

def __init__(self, transport, filename, mode, file_mode=None,

1048

create_parent_dir=False, dir_mode=None):

1049

self._transport = transport

1050

self._filename = filename

1051

self._mode = mode

1052

self._file_mode = file_mode

1053

self._dir_mode = dir_mode

1054

self._create_parent_dir = create_parent_dir

1055

self._need_to_create = False

1056

1057

def _full_path(self):

1058

"""Return the full path to this file."""

1059

return self._transport.base + self._filename

1060

1061

def check_header(self, fp):

1062

line = fp.readline()

1063

if line == '':

1064

# An empty file can actually be treated as though the file doesn't

1065

# exist yet.

1066

raise errors.NoSuchFile(self._full_path())

1067

if line != self.HEADER:

1068

raise KnitHeaderError(badline=line,

1069

filename=self._transport.abspath(self._filename))

1070

1071

def __repr__(self):

1072

return '%s(%s)' % (self.__class__.__name__, self._filename)

1073

1074

1075

class _KnitIndex(_KnitComponentFile):

1076

"""Manages knit index file.

1077

1078

The index is already kept in memory and read on startup, to enable

1079

fast lookups of revision information. The cursor of the index

1080

file is always pointing to the end, making it easy to append

1081

entries.

1082

1083

_cache is a cache for fast mapping from version id to a Index

1084

object.

1085

1086

_history is a cache for fast mapping from indexes to version ids.

1087

1088

The index data format is dictionary compressed when it comes to

1089

parent references; a index entry may only have parents that with a

1090

lover index number. As a result, the index is topological sorted.

1091

1092

Duplicate entries may be written to the index for a single version id

1093

if this is done then the latter one completely replaces the former:

1094

this allows updates to correct version and parent information.

1095

Note that the two entries may share the delta, and that successive

1096

annotations and references MUST point to the first entry.

1097

1098

The index file on disc contains a header, followed by one line per knit

1099

record. The same revision can be present in an index file more than once.

1100

The first occurrence gets assigned a sequence number starting from 0.

1101

1102

The format of a single line is

1103

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1104

REVISION_ID is a utf8-encoded revision id

1105

FLAGS is a comma separated list of flags about the record. Values include

1106

no-eol, line-delta, fulltext.

1107

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1108

that the the compressed data starts at.

1109

LENGTH is the ascii representation of the length of the data file.

1110

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1111

REVISION_ID.

1112

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1113

revision id already in the knit that is a parent of REVISION_ID.

1114

The ' :' marker is the end of record marker.

1115

1116

partial writes:

1117

when a write is interrupted to the index file, it will result in a line

1118

that does not end in ' :'. If the ' :' is not present at the end of a line,

1119

or at the end of the file, then the record that is missing it will be

1120

ignored by the parser.

1121

1122

When writing new records to the index file, the data is preceded by '\n'

1123

to ensure that records always start on new lines even if the last write was

1124

interrupted. As a result its normal for the last line in the index to be

1125

missing a trailing newline. One can be added with no harmful effects.

1126

"""

1127

1128

HEADER = "# bzr knit index 8\n"

1129

1130

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1131

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1132

1133

def _cache_version(self, version_id, options, pos, size, parents):

1134

"""Cache a version record in the history array and index cache.

1135

1136

This is inlined into _load_data for performance. KEEP IN SYNC.

1137

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1138

indexes).

1139

"""

1140

# only want the _history index to reference the 1st index entry

1141

# for version_id

1142

if version_id not in self._cache:

1143

index = len(self._history)

1144

self._history.append(version_id)

1145

else:

1146

index = self._cache[version_id][5]

1147

self._cache[version_id] = (version_id,

1148

options,

1149

pos,

1150

size,

1151

parents,

1152

index)

1153

1154

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1155

create_parent_dir=False, delay_create=False, dir_mode=None):

1156

_KnitComponentFile.__init__(self, transport, filename, mode,

1157

file_mode=file_mode,

1158

create_parent_dir=create_parent_dir,

1159

dir_mode=dir_mode)

1160

self._cache = {}

1161

# position in _history is the 'official' index for a revision

1162

# but the values may have come from a newer entry.

1163

# so - wc -l of a knit index is != the number of unique names

1164

# in the knit.

1165

self._history = []

1166

try:

1167

fp = self._transport.get(self._filename)

1168

try:

1169

# _load_data may raise NoSuchFile if the target knit is

1170

# completely empty.

1171

_load_data(self, fp)

1172

finally:

1173

fp.close()

1174

except NoSuchFile:

1175

if mode != 'w' or not create:

1176

raise

1177

elif delay_create:

1178

self._need_to_create = True

1179

else:

1180

self._transport.put_bytes_non_atomic(

1181

self._filename, self.HEADER, mode=self._file_mode)

1182

1183

def get_graph(self):

1184

"""Return a list of the node:parents lists from this knit index."""

1185

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1186

1187

def get_ancestry(self, versions, topo_sorted=True):

1188

"""See VersionedFile.get_ancestry."""

1189

# get a graph of all the mentioned versions:

1190

graph = {}

1191

pending = set(versions)

1192

cache = self._cache

1193

while pending:

1194

version = pending.pop()

1195

# trim ghosts

1196

try:

1197

parents = [p for p in cache[version][4] if p in cache]

1198

except KeyError:

1199

raise RevisionNotPresent(version, self._filename)

1200

# if not completed and not a ghost

1201

pending.update([p for p in parents if p not in graph])

1202

graph[version] = parents

1203

if not topo_sorted:

1204

return graph.keys()

1205

return topo_sort(graph.items())

1206

1207

def get_ancestry_with_ghosts(self, versions):

1208

"""See VersionedFile.get_ancestry_with_ghosts."""

1209

# get a graph of all the mentioned versions:

1210

self.check_versions_present(versions)

1211

cache = self._cache

1212

graph = {}

1213

pending = set(versions)

1214

while pending:

1215

version = pending.pop()

1216

try:

1217

parents = cache[version][4]

1218

except KeyError:

1219

# ghost, fake it

1220

graph[version] = []

1221

else:

1222

# if not completed

1223

pending.update([p for p in parents if p not in graph])

1224

graph[version] = parents

1225

return topo_sort(graph.items())

1226

1227

def iter_parents(self, version_ids):

1228

"""Iterate through the parents for many version ids.

1229

1230

:param version_ids: An iterable yielding version_ids.

1231

:return: An iterator that yields (version_id, parents). Requested

1232

version_ids not present in the versioned file are simply skipped.

1233

The order is undefined, allowing for different optimisations in

1234

the underlying implementation.

1235

"""

1236

for version_id in version_ids:

1237

try:

1238

yield version_id, tuple(self.get_parents(version_id))

1239

except KeyError:

1240

pass

1241

1242

def num_versions(self):

1243

return len(self._history)

1244

1245

__len__ = num_versions

1246

1247

def get_versions(self):

1248

"""Get all the versions in the file. not topologically sorted."""

1249

return self._history

1250

1251

def _version_list_to_index(self, versions):

1252

result_list = []

1253

cache = self._cache

1254

for version in versions:

1255

if version in cache:

1256

# -- inlined lookup() --

1257

result_list.append(str(cache[version][5]))

1258

# -- end lookup () --

1259

else:

1260

result_list.append('.' + version)

1261

return ' '.join(result_list)

1262

1263

def add_version(self, version_id, options, index_memo, parents):

1264

"""Add a version record to the index."""

1265

self.add_versions(((version_id, options, index_memo, parents),))

1266

1267

def add_versions(self, versions):

1268

"""Add multiple versions to the index.

1269

1270

:param versions: a list of tuples:

1271

(version_id, options, pos, size, parents).

1272

"""

1273

lines = []

1274

orig_history = self._history[:]

1275

orig_cache = self._cache.copy()

1276

1277

try:

1278

for version_id, options, (index, pos, size), parents in versions:

1279

line = "\n%s %s %s %s %s :" % (version_id,

1280

','.join(options),

1281

pos,

1282

size,

1283

self._version_list_to_index(parents))

1284

assert isinstance(line, str), \

1285

'content must be utf-8 encoded: %r' % (line,)

1286

lines.append(line)

1287

self._cache_version(version_id, options, pos, size, parents)

1288

if not self._need_to_create:

1289

self._transport.append_bytes(self._filename, ''.join(lines))

1290

else:

1291

sio = StringIO()

1292

sio.write(self.HEADER)

1293

sio.writelines(lines)

1294

sio.seek(0)

1295

self._transport.put_file_non_atomic(self._filename, sio,

1296

create_parent_dir=self._create_parent_dir,

1297

mode=self._file_mode,

1298

dir_mode=self._dir_mode)

1299

self._need_to_create = False

1300

except:

1301

# If any problems happen, restore the original values and re-raise

1302

self._history = orig_history

1303

self._cache = orig_cache

1304

raise

1305

1306

def has_version(self, version_id):

1307

"""True if the version is in the index."""

1308

return version_id in self._cache

1309

1310

def get_position(self, version_id):

1311

"""Return details needed to access the version.

1312

1313

.kndx indices do not support split-out data, so return None for the

1314

index field.

1315

1316

:return: a tuple (None, data position, size) to hand to the access

1317

logic to get the record.

1318

"""

1319

entry = self._cache[version_id]

1320

return None, entry[2], entry[3]

1321

1322

def get_method(self, version_id):

1323

"""Return compression method of specified version."""

1324

options = self._cache[version_id][1]

1325

if 'fulltext' in options:

1326

return 'fulltext'

1327

else:

1328

if 'line-delta' not in options:

1329

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1330

return 'line-delta'

1331

1332

def get_options(self, version_id):

1333

"""Return a string represention options.

1334

1335

e.g. foo,bar

1336

"""

1337

return self._cache[version_id][1]

1338

1339

def get_parents(self, version_id):

1340

"""Return parents of specified version ignoring ghosts."""

1341

return [parent for parent in self._cache[version_id][4]

1342

if parent in self._cache]

1343

1344

def get_parents_with_ghosts(self, version_id):

1345

"""Return parents of specified version with ghosts."""

1346

return self._cache[version_id][4]

1347

1348

def check_versions_present(self, version_ids):

1349

"""Check that all specified versions are present."""

1350

cache = self._cache

1351

for version_id in version_ids:

1352

if version_id not in cache:

1353

raise RevisionNotPresent(version_id, self._filename)

1354

1355

1356

class KnitGraphIndex(object):

1357

"""A knit index that builds on GraphIndex."""

1358

1359

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1360

"""Construct a KnitGraphIndex on a graph_index.

1361

1362

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1363

:param deltas: Allow delta-compressed records.

1364

:param add_callback: If not None, allow additions to the index and call

1365

this callback with a list of added GraphIndex nodes:

1366

[(node, value, node_refs), ...]

1367

:param parents: If True, record knits parents, if not do not record

1368

parents.

1369

"""

1370

self._graph_index = graph_index

1371

self._deltas = deltas

1372

self._add_callback = add_callback

1373

self._parents = parents

1374

if deltas and not parents:

1375

raise KnitCorrupt(self, "Cannot do delta compression without "

1376

"parent tracking.")

1377

1378

def _get_entries(self, keys, check_present=False):

1379

"""Get the entries for keys.

1380

1381

:param keys: An iterable of index keys, - 1-tuples.

1382

"""

1383

keys = set(keys)

1384

found_keys = set()

1385

if self._parents:

1386

for node in self._graph_index.iter_entries(keys):

1387

yield node

1388

found_keys.add(node[1])

1389

else:

1390

# adapt parentless index to the rest of the code.

1391

for node in self._graph_index.iter_entries(keys):

1392

yield node[0], node[1], node[2], ()

1393

found_keys.add(node[1])

1394

if check_present:

1395

missing_keys = keys.difference(found_keys)

1396

if missing_keys:

1397

raise RevisionNotPresent(missing_keys.pop(), self)

1398

1399

def _present_keys(self, version_ids):

1400

return set([

1401

node[1] for node in self._get_entries(version_ids)])

1402

1403

def _parentless_ancestry(self, versions):

1404

"""Honour the get_ancestry API for parentless knit indices."""

1405

wanted_keys = self._version_ids_to_keys(versions)

1406

present_keys = self._present_keys(wanted_keys)

1407

missing = set(wanted_keys).difference(present_keys)

1408

if missing:

1409

raise RevisionNotPresent(missing.pop(), self)

1410

return list(self._keys_to_version_ids(present_keys))

1411

1412

def get_ancestry(self, versions, topo_sorted=True):

1413

"""See VersionedFile.get_ancestry."""

1414

if not self._parents:

1415

return self._parentless_ancestry(versions)

1416

# XXX: This will do len(history) index calls - perhaps

1417

# it should be altered to be a index core feature?

1418

# get a graph of all the mentioned versions:

1419

graph = {}

1420

ghosts = set()

1421

versions = self._version_ids_to_keys(versions)

1422

pending = set(versions)

1423

while pending:

1424

# get all pending nodes

1425

this_iteration = pending

1426

new_nodes = self._get_entries(this_iteration)

1427

found = set()

1428

pending = set()

1429

for (index, key, value, node_refs) in new_nodes:

1430

# dont ask for ghosties - otherwise

1431

# we we can end up looping with pending

1432

# being entirely ghosted.

1433

graph[key] = [parent for parent in node_refs[0]

1434

if parent not in ghosts]

1435

# queue parents

1436

for parent in graph[key]:

1437

# dont examine known nodes again

1438

if parent in graph:

1439

continue

1440

pending.add(parent)

1441

found.add(key)

1442

ghosts.update(this_iteration.difference(found))

1443

if versions.difference(graph):

1444

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1445

if topo_sorted:

1446

result_keys = topo_sort(graph.items())

1447

else:

1448

result_keys = graph.iterkeys()

1449

return [key[0] for key in result_keys]

1450

1451

def get_ancestry_with_ghosts(self, versions):

1452

"""See VersionedFile.get_ancestry."""

1453

if not self._parents:

1454

return self._parentless_ancestry(versions)

1455

# XXX: This will do len(history) index calls - perhaps

1456

# it should be altered to be a index core feature?

1457

# get a graph of all the mentioned versions:

1458

graph = {}

1459

versions = self._version_ids_to_keys(versions)

1460

pending = set(versions)

1461

while pending:

1462

# get all pending nodes

1463

this_iteration = pending

1464

new_nodes = self._get_entries(this_iteration)

1465

pending = set()

1466

for (index, key, value, node_refs) in new_nodes:

1467

graph[key] = node_refs[0]

1468

# queue parents

1469

for parent in graph[key]:

1470

# dont examine known nodes again

1471

if parent in graph:

1472

continue

1473

pending.add(parent)

1474

missing_versions = this_iteration.difference(graph)

1475

missing_needed = versions.intersection(missing_versions)

1476

if missing_needed:

1477

raise RevisionNotPresent(missing_needed.pop(), self)

1478

for missing_version in missing_versions:

1479

# add a key, no parents

1480

graph[missing_version] = []

1481

pending.discard(missing_version) # don't look for it

1482

result_keys = topo_sort(graph.items())

1483

return [key[0] for key in result_keys]

1484

1485

def get_graph(self):

1486

"""Return a list of the node:parents lists from this knit index."""

1487

if not self._parents:

1488

return [(key, ()) for key in self.get_versions()]

1489

result = []

1490

for index, key, value, refs in self._graph_index.iter_all_entries():

1491

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1492

return result

1493

1494

def iter_parents(self, version_ids):

1495

"""Iterate through the parents for many version ids.

1496

1497

:param version_ids: An iterable yielding version_ids.

1498

:return: An iterator that yields (version_id, parents). Requested

1499

version_ids not present in the versioned file are simply skipped.

1500

The order is undefined, allowing for different optimisations in

1501

the underlying implementation.

1502

"""

1503

if self._parents:

1504

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1505

all_parents = set()

1506

present_parents = set()

1507

for node in all_nodes:

1508

all_parents.update(node[3][0])

1509

# any node we are querying must be present

1510

present_parents.add(node[1])

1511

unknown_parents = all_parents.difference(present_parents)

1512

present_parents.update(self._present_keys(unknown_parents))

1513

for node in all_nodes:

1514

parents = []

1515

for parent in node[3][0]:

1516

if parent in present_parents:

1517

parents.append(parent[0])

1518

yield node[1][0], tuple(parents)

1519

else:

1520

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1521

yield node[1][0], ()

1522

1523

def num_versions(self):

1524

return len(list(self._graph_index.iter_all_entries()))

1525

1526

__len__ = num_versions

1527

1528

def get_versions(self):

1529

"""Get all the versions in the file. not topologically sorted."""

1530

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1531

1532

def has_version(self, version_id):

1533

"""True if the version is in the index."""

1534

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1535

1536

def _keys_to_version_ids(self, keys):

1537

return tuple(key[0] for key in keys)

1538

1539

def get_position(self, version_id):

1540

"""Return details needed to access the version.

1541

1542

:return: a tuple (index, data position, size) to hand to the access

1543

logic to get the record.

1544

"""

1545

node = self._get_node(version_id)

1546

bits = node[2][1:].split(' ')

1547

return node[0], int(bits[0]), int(bits[1])

1548

1549

def get_method(self, version_id):

1550

"""Return compression method of specified version."""

1551

if not self._deltas:

1552

return 'fulltext'

1553

return self._parent_compression(self._get_node(version_id)[3][1])

1554

1555

def _parent_compression(self, reference_list):

1556

# use the second reference list to decide if this is delta'd or not.

1557

if len(reference_list):

1558

return 'line-delta'

1559

else:

1560

return 'fulltext'

1561

1562

def _get_node(self, version_id):

1563

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1564

1565

def get_options(self, version_id):

1566

"""Return a string represention options.

1567

1568

e.g. foo,bar

1569

"""

1570

node = self._get_node(version_id)

1571

if not self._deltas:

1572

options = ['fulltext']

1573

else:

1574

options = [self._parent_compression(node[3][1])]

1575

if node[2][0] == 'N':

1576

options.append('no-eol')

1577

return options

1578

1579

def get_parents(self, version_id):

1580

"""Return parents of specified version ignoring ghosts."""

1581

parents = list(self.iter_parents([version_id]))

1582

if not parents:

1583

# missing key

1584

raise errors.RevisionNotPresent(version_id, self)

1585

return parents[0][1]

1586

1587

def get_parents_with_ghosts(self, version_id):

1588

"""Return parents of specified version with ghosts."""

1589

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1590

check_present=True))

1591

if not self._parents:

1592

return ()

1593

return self._keys_to_version_ids(nodes[0][3][0])

1594

1595

def check_versions_present(self, version_ids):

1596

"""Check that all specified versions are present."""

1597

keys = self._version_ids_to_keys(version_ids)

1598

present = self._present_keys(keys)

1599

missing = keys.difference(present)

1600

if missing:

1601

raise RevisionNotPresent(missing.pop(), self)

1602

1603

def add_version(self, version_id, options, access_memo, parents):

1604

"""Add a version record to the index."""

1605

return self.add_versions(((version_id, options, access_memo, parents),))

1606

1607

def add_versions(self, versions):

1608

"""Add multiple versions to the index.

1609

1610

This function does not insert data into the Immutable GraphIndex

1611

backing the KnitGraphIndex, instead it prepares data for insertion by

1612

the caller and checks that it is safe to insert then calls

1613

self._add_callback with the prepared GraphIndex nodes.

1614

1615

:param versions: a list of tuples:

1616

(version_id, options, pos, size, parents).

1617

"""

1618

if not self._add_callback:

1619

raise errors.ReadOnlyError(self)

1620

# we hope there are no repositories with inconsistent parentage

1621

# anymore.

1622

# check for dups

1623

1624

keys = {}

1625

for (version_id, options, access_memo, parents) in versions:

1626

index, pos, size = access_memo

1627

key = (version_id, )

1628

parents = tuple((parent, ) for parent in parents)

1629

if 'no-eol' in options:

1630

value = 'N'

1631

else:

1632

value = ' '

1633

value += "%d %d" % (pos, size)

1634

if not self._deltas:

1635

if 'line-delta' in options:

1636

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1637

if self._parents:

1638

if self._deltas:

1639

if 'line-delta' in options:

1640

node_refs = (parents, (parents[0],))

1641

else:

1642

node_refs = (parents, ())

1643

else:

1644

node_refs = (parents, )

1645

else:

1646

if parents:

1647

raise KnitCorrupt(self, "attempt to add node with parents "

1648

"in parentless index.")

1649

node_refs = ()

1650

keys[key] = (value, node_refs)

1651

present_nodes = self._get_entries(keys)

1652

for (index, key, value, node_refs) in present_nodes:

1653

if (value, node_refs) != keys[key]:

1654

raise KnitCorrupt(self, "inconsistent details in add_versions"

1655

": %s %s" % ((value, node_refs), keys[key]))

1656

del keys[key]

1657

result = []

1658

if self._parents:

1659

for key, (value, node_refs) in keys.iteritems():

1660

result.append((key, value, node_refs))

1661

else:

1662

for key, (value, node_refs) in keys.iteritems():

1663

result.append((key, value))

1664

self._add_callback(result)

1665

1666

def _version_ids_to_keys(self, version_ids):

1667

return set((version_id, ) for version_id in version_ids)

1668

1669

1670

class _KnitAccess(object):

1671

"""Access to knit records in a .knit file."""

1672

1673

def __init__(self, transport, filename, _file_mode, _dir_mode,

1674

_need_to_create, _create_parent_dir):

1675

"""Create a _KnitAccess for accessing and inserting data.

1676

1677

:param transport: The transport the .knit is located on.

1678

:param filename: The filename of the .knit.

1679

"""

1680

self._transport = transport

1681

self._filename = filename

1682

self._file_mode = _file_mode

1683

self._dir_mode = _dir_mode

1684

self._need_to_create = _need_to_create

1685

self._create_parent_dir = _create_parent_dir

1686

1687

def add_raw_records(self, sizes, raw_data):

1688

"""Add raw knit bytes to a storage area.

1689

1690

The data is spooled to whereever the access method is storing data.

1691

1692

:param sizes: An iterable containing the size of each raw data segment.

1693

:param raw_data: A bytestring containing the data.

1694

:return: A list of memos to retrieve the record later. Each memo is a

1695

tuple - (index, pos, length), where the index field is always None

1696

for the .knit access method.

1697

"""

1698

assert type(raw_data) == str, \

1699

'data must be plain bytes was %s' % type(raw_data)

1700

if not self._need_to_create:

1701

base = self._transport.append_bytes(self._filename, raw_data)

1702

else:

1703

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1704

create_parent_dir=self._create_parent_dir,

1705

mode=self._file_mode,

1706

dir_mode=self._dir_mode)

1707

self._need_to_create = False

1708

base = 0

1709

result = []

1710

for size in sizes:

1711

result.append((None, base, size))

1712

base += size

1713

return result

1714

1715

def create(self):

1716

"""IFF this data access has its own storage area, initialise it.

1717

1718

:return: None.

1719

"""

1720

self._transport.put_bytes_non_atomic(self._filename, '',

1721

mode=self._file_mode)

1722

1723

def open_file(self):

1724

"""IFF this data access can be represented as a single file, open it.

1725

1726

For knits that are not mapped to a single file on disk this will

1727

always return None.

1728

1729

:return: None or a file handle.

1730

"""

1731

try:

1732

return self._transport.get(self._filename)

1733

except NoSuchFile:

1734

pass

1735

return None

1736

1737

def get_raw_records(self, memos_for_retrieval):

1738

"""Get the raw bytes for a records.

1739

1740

:param memos_for_retrieval: An iterable containing the (index, pos,

1741

length) memo for retrieving the bytes. The .knit method ignores

1742

the index as there is always only a single file.

1743

:return: An iterator over the bytes of the records.

1744

"""

1745

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1746

for pos, data in self._transport.readv(self._filename, read_vector):

1747

yield data

1748

1749

1750

class _PackAccess(object):

1751

"""Access to knit records via a collection of packs."""

1752

1753

def __init__(self, index_to_packs, writer=None):

1754

"""Create a _PackAccess object.

1755

1756

:param index_to_packs: A dict mapping index objects to the transport

1757

and file names for obtaining data.

1758

:param writer: A tuple (pack.ContainerWriter, write_index) which

1759

contains the pack to write, and the index that reads from it will

1760

be associated with.

1761

"""

1762

if writer:

1763

self.container_writer = writer[0]

1764

self.write_index = writer[1]

1765

else:

1766

self.container_writer = None

1767

self.write_index = None

1768

self.indices = index_to_packs

1769

1770

def add_raw_records(self, sizes, raw_data):

1771

"""Add raw knit bytes to a storage area.

1772

1773

The data is spooled to the container writer in one bytes-record per

1774

raw data item.

1775

1776

:param sizes: An iterable containing the size of each raw data segment.

1777

:param raw_data: A bytestring containing the data.

1778

:return: A list of memos to retrieve the record later. Each memo is a

1779

tuple - (index, pos, length), where the index field is the

1780

write_index object supplied to the PackAccess object.

1781

"""

1782

assert type(raw_data) == str, \

1783

'data must be plain bytes was %s' % type(raw_data)

1784

result = []

1785

offset = 0

1786

for size in sizes:

1787

p_offset, p_length = self.container_writer.add_bytes_record(

1788

raw_data[offset:offset+size], [])

1789

offset += size

1790

result.append((self.write_index, p_offset, p_length))

1791

return result

1792

1793

def create(self):

1794

"""Pack based knits do not get individually created."""

1795

1796

def get_raw_records(self, memos_for_retrieval):

1797

"""Get the raw bytes for a records.

1798

1799

:param memos_for_retrieval: An iterable containing the (index, pos,

1800

length) memo for retrieving the bytes. The Pack access method

1801

looks up the pack to use for a given record in its index_to_pack

1802

map.

1803

:return: An iterator over the bytes of the records.

1804

"""

1805

# first pass, group into same-index requests

1806

request_lists = []

1807

current_index = None

1808

for (index, offset, length) in memos_for_retrieval:

1809

if current_index == index:

1810

current_list.append((offset, length))

1811

else:

1812

if current_index is not None:

1813

request_lists.append((current_index, current_list))

1814

current_index = index

1815

current_list = [(offset, length)]

1816

# handle the last entry

1817

if current_index is not None:

1818

request_lists.append((current_index, current_list))

1819

for index, offsets in request_lists:

1820

transport, path = self.indices[index]

1821

reader = pack.make_readv_reader(transport, path, offsets)

1822

for names, read_func in reader.iter_records():

1823

yield read_func(None)

1824

1825

def open_file(self):

1826

"""Pack based knits have no single file."""

1827

return None

1828

1829

def set_writer(self, writer, index, (transport, packname)):

1830

"""Set a writer to use for adding data."""

1831

self.indices[index] = (transport, packname)

1832

self.container_writer = writer

1833

self.write_index = index

1834

1835

1836

class _KnitData(object):

1837

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1838

1839

The KnitData class provides the logic for parsing and using knit records,

1840

making use of an access method for the low level read and write operations.

1841

"""

1842

1843

def __init__(self, access):

1844

"""Create a KnitData object.

1845

1846

:param access: The access method to use. Access methods such as

1847

_KnitAccess manage the insertion of raw records and the subsequent

1848

retrieval of the same.

1849

"""

1850

self._access = access

1851

self._checked = False

1852

# TODO: jam 20060713 conceptually, this could spill to disk

1853

# if the cached size gets larger than a certain amount

1854

# but it complicates the model a bit, so for now just use

1855

# a simple dictionary

1856

self._cache = {}

1857

self._do_cache = False

1858

1859

def enable_cache(self):

1860

"""Enable caching of reads."""

1861

self._do_cache = True

1862

1863

def clear_cache(self):

1864

"""Clear the record cache."""

1865

self._do_cache = False

1866

self._cache = {}

1867

1868

def _open_file(self):

1869

return self._access.open_file()

1870

1871

def _record_to_data(self, version_id, digest, lines):

1872

"""Convert version_id, digest, lines into a raw data block.

1873

1874

:return: (len, a StringIO instance with the raw data ready to read.)

1875

"""

1876

sio = StringIO()

1877

data_file = GzipFile(None, mode='wb', fileobj=sio)

1878

1879

assert isinstance(version_id, str)

1880

data_file.writelines(chain(

1881

["version %s %d %s\n" % (version_id,

1882

len(lines),

1883

digest)],

1884

lines,

1885

["end %s\n" % version_id]))

1886

data_file.close()

1887

length= sio.tell()

1888

1889

sio.seek(0)

1890

return length, sio

1891

1892

def add_raw_records(self, sizes, raw_data):

1893

"""Append a prepared record to the data file.

1894

1895

:param sizes: An iterable containing the size of each raw data segment.

1896

:param raw_data: A bytestring containing the data.

1897

:return: a list of index data for the way the data was stored.

1898

See the access method add_raw_records documentation for more

1899

details.

1900

"""

1901

return self._access.add_raw_records(sizes, raw_data)

1902

1903

def add_record(self, version_id, digest, lines):

1904

"""Write new text record to disk.

1905

1906

Returns index data for retrieving it later, as per add_raw_records.

1907

"""

1908

size, sio = self._record_to_data(version_id, digest, lines)

1909

result = self.add_raw_records([size], sio.getvalue())

1910

if self._do_cache:

1911

self._cache[version_id] = sio.getvalue()

1912

return result[0]

1913

1914

def _parse_record_header(self, version_id, raw_data):

1915

"""Parse a record header for consistency.

1916

1917

:return: the header and the decompressor stream.

1918

as (stream, header_record)

1919

"""

1920

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1921

try:

1922

rec = self._check_header(version_id, df.readline())

1923

except Exception, e:

1924

raise KnitCorrupt(self._access,

1925

"While reading {%s} got %s(%s)"

1926

% (version_id, e.__class__.__name__, str(e)))

1927

return df, rec

1928

1929

def _check_header(self, version_id, line):

1930

rec = line.split()

1931

if len(rec) != 4:

1932

raise KnitCorrupt(self._access,

1933

'unexpected number of elements in record header')

1934

if rec[1] != version_id:

1935

raise KnitCorrupt(self._access,

1936

'unexpected version, wanted %r, got %r'

1937

% (version_id, rec[1]))

1938

return rec

1939

1940

def _parse_record(self, version_id, data):

1941

# profiling notes:

1942

# 4168 calls in 2880 217 internal

1943

# 4168 calls to _parse_record_header in 2121

1944

# 4168 calls to readlines in 330

1945

df = GzipFile(mode='rb', fileobj=StringIO(data))

1946

1947

try:

1948

record_contents = df.readlines()

1949

except Exception, e:

1950

raise KnitCorrupt(self._access,

1951

"While reading {%s} got %s(%s)"

1952

% (version_id, e.__class__.__name__, str(e)))

1953

header = record_contents.pop(0)

1954

rec = self._check_header(version_id, header)

1955

1956

last_line = record_contents.pop()

1957

if len(record_contents) != int(rec[2]):

1958

raise KnitCorrupt(self._access,

1959

'incorrect number of lines %s != %s'

1960

' for version {%s}'

1961

% (len(record_contents), int(rec[2]),

1962

version_id))

1963

if last_line != 'end %s\n' % rec[1]:

1964

raise KnitCorrupt(self._access,

1965

'unexpected version end line %r, wanted %r'

1966

% (last_line, version_id))

1967

df.close()

1968

return record_contents, rec[3]

1969

1970

def read_records_iter_raw(self, records):

1971

"""Read text records from data file and yield raw data.

1972

1973

This unpacks enough of the text record to validate the id is

1974

as expected but thats all.

1975

"""

1976

# setup an iterator of the external records:

1977

# uses readv so nice and fast we hope.

1978

if len(records):

1979

# grab the disk data needed.

1980

if self._cache:

1981

# Don't check _cache if it is empty

1982

needed_offsets = [index_memo for version_id, index_memo

1983

in records

1984

if version_id not in self._cache]

1985

else:

1986

needed_offsets = [index_memo for version_id, index_memo

1987

in records]

1988

1989

raw_records = self._access.get_raw_records(needed_offsets)

1990

1991

for version_id, index_memo in records:

1992

if version_id in self._cache:

1993

# This data has already been validated

1994

data = self._cache[version_id]

1995

else:

1996

data = raw_records.next()

1997

if self._do_cache:

1998

self._cache[version_id] = data

1999

2000

# validate the header

2001

df, rec = self._parse_record_header(version_id, data)

2002

df.close()

2003

yield version_id, data

2004

2005

def read_records_iter(self, records):

2006

"""Read text records from data file and yield result.

2007

2008

The result will be returned in whatever is the fastest to read.

2009

Not by the order requested. Also, multiple requests for the same

2010

record will only yield 1 response.

2011

:param records: A list of (version_id, pos, len) entries

2012

:return: Yields (version_id, contents, digest) in the order

2013

read, not the order requested

2014

"""

2015

if not records:

2016

return

2017

2018

if self._cache:

2019

# Skip records we have alread seen

2020

yielded_records = set()

2021

needed_records = set()

2022

for record in records:

2023

if record[0] in self._cache:

2024

if record[0] in yielded_records:

2025

continue

2026

yielded_records.add(record[0])

2027

data = self._cache[record[0]]

2028

content, digest = self._parse_record(record[0], data)

2029

yield (record[0], content, digest)

2030

else:

2031

needed_records.add(record)

2032

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2033

else:

2034

needed_records = sorted(set(records), key=operator.itemgetter(1))

2035

2036

if not needed_records:

2037

return

2038

2039

# The transport optimizes the fetching as well

2040

# (ie, reads continuous ranges.)

2041

raw_data = self._access.get_raw_records(

2042

[index_memo for version_id, index_memo in needed_records])

2043

2044

for (version_id, index_memo), data in \

2045

izip(iter(needed_records), raw_data):

2046

content, digest = self._parse_record(version_id, data)

2047

if self._do_cache:

2048

self._cache[version_id] = data

2049

yield version_id, content, digest

2050

2051

def read_records(self, records):

2052

"""Read records into a dictionary."""

2053

components = {}

2054

for record_id, content, digest in \

2055

self.read_records_iter(records):

2056

components[record_id] = (content, digest)

2057

return components

2058

2059

2060

class InterKnit(InterVersionedFile):

2061

"""Optimised code paths for knit to knit operations."""

2062

2063

_matching_file_from_factory = KnitVersionedFile

2064

_matching_file_to_factory = KnitVersionedFile

2065

2066

@staticmethod

2067

def is_compatible(source, target):

2068

"""Be compatible with knits. """

2069

try:

2070

return (isinstance(source, KnitVersionedFile) and

2071

isinstance(target, KnitVersionedFile))

2072

except AttributeError:

2073

return False

2074

2075

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2076

"""See InterVersionedFile.join."""

2077

assert isinstance(self.source, KnitVersionedFile)

2078

assert isinstance(self.target, KnitVersionedFile)

2079

2080

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2081

2082

if not version_ids:

2083

return 0

2084

2085

pb = ui.ui_factory.nested_progress_bar()

2086

try:

2087

version_ids = list(version_ids)

2088

if None in version_ids:

2089

version_ids.remove(None)

2090

2091

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2092

this_versions = set(self.target._index.get_versions())

2093

needed_versions = self.source_ancestry - this_versions

2094

cross_check_versions = self.source_ancestry.intersection(this_versions)

2095

mismatched_versions = set()

2096

for version in cross_check_versions:

2097

# scan to include needed parents.

2098

n1 = set(self.target.get_parents_with_ghosts(version))

2099

n2 = set(self.source.get_parents_with_ghosts(version))

2100

if n1 != n2:

2101

# FIXME TEST this check for cycles being introduced works

2102

# the logic is we have a cycle if in our graph we are an

2103

# ancestor of any of the n2 revisions.

2104

for parent in n2:

2105

if parent in n1:

2106

# safe

2107

continue

2108

else:

2109

parent_ancestors = self.source.get_ancestry(parent)

2110

if version in parent_ancestors:

2111

raise errors.GraphCycleError([parent, version])

2112

# ensure this parent will be available later.

2113

new_parents = n2.difference(n1)

2114

needed_versions.update(new_parents.difference(this_versions))

2115

mismatched_versions.add(version)

2116

2117

if not needed_versions and not mismatched_versions:

2118

return 0

2119

full_list = topo_sort(self.source.get_graph())

2120

2121

version_list = [i for i in full_list if (not self.target.has_version(i)

2122

and i in needed_versions)]

2123

2124

# plan the join:

2125

copy_queue = []

2126

copy_queue_records = []

2127

copy_set = set()

2128

for version_id in version_list:

2129

options = self.source._index.get_options(version_id)

2130

parents = self.source._index.get_parents_with_ghosts(version_id)

2131

# check that its will be a consistent copy:

2132

for parent in parents:

2133

# if source has the parent, we must :

2134

# * already have it or

2135

# * have it scheduled already

2136

# otherwise we don't care

2137

assert (self.target.has_version(parent) or

2138

parent in copy_set or

2139

not self.source.has_version(parent))

2140

index_memo = self.source._index.get_position(version_id)

2141

copy_queue_records.append((version_id, index_memo))

2142

copy_queue.append((version_id, options, parents))

2143

copy_set.add(version_id)

2144

2145

# data suck the join:

2146

count = 0

2147

total = len(version_list)

2148

raw_datum = []

2149

raw_records = []

2150

for (version_id, raw_data), \

2151

(version_id2, options, parents) in \

2152

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2153

copy_queue):

2154

assert version_id == version_id2, 'logic error, inconsistent results'

2155

count = count + 1

2156

pb.update("Joining knit", count, total)

2157

raw_records.append((version_id, options, parents, len(raw_data)))

2158

raw_datum.append(raw_data)

2159

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2160

2161

for version in mismatched_versions:

2162

# FIXME RBC 20060309 is this needed?

2163

n1 = set(self.target.get_parents_with_ghosts(version))

2164

n2 = set(self.source.get_parents_with_ghosts(version))

2165

# write a combined record to our history preserving the current

2166

# parents as first in the list

2167

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2168

self.target.fix_parents(version, new_parents)

2169

return count

2170

finally:

2171

pb.finished()

2172

2173

2174

InterVersionedFile.register_optimiser(InterKnit)

2175

2176

2177

class WeaveToKnit(InterVersionedFile):

2178

"""Optimised code paths for weave to knit operations."""

2179

2180

_matching_file_from_factory = bzrlib.weave.WeaveFile

2181

_matching_file_to_factory = KnitVersionedFile

2182

2183

@staticmethod

2184

def is_compatible(source, target):

2185

"""Be compatible with weaves to knits."""

2186

try:

2187

return (isinstance(source, bzrlib.weave.Weave) and

2188

isinstance(target, KnitVersionedFile))

2189

except AttributeError:

2190

return False

2191

2192

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2193

"""See InterVersionedFile.join."""

2194

assert isinstance(self.source, bzrlib.weave.Weave)

2195

assert isinstance(self.target, KnitVersionedFile)

2196

2197

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2198

2199

if not version_ids:

2200

return 0

2201

2202

pb = ui.ui_factory.nested_progress_bar()

2203

try:

2204

version_ids = list(version_ids)

2205

2206

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2207

this_versions = set(self.target._index.get_versions())

2208

needed_versions = self.source_ancestry - this_versions

2209

cross_check_versions = self.source_ancestry.intersection(this_versions)

2210

mismatched_versions = set()

2211

for version in cross_check_versions:

2212

# scan to include needed parents.

2213

n1 = set(self.target.get_parents_with_ghosts(version))

2214

n2 = set(self.source.get_parents(version))

2215

# if all of n2's parents are in n1, then its fine.

2216

if n2.difference(n1):

2217

# FIXME TEST this check for cycles being introduced works

2218

# the logic is we have a cycle if in our graph we are an

2219

# ancestor of any of the n2 revisions.

2220

for parent in n2:

2221

if parent in n1:

2222

# safe

2223

continue

2224

else:

2225

parent_ancestors = self.source.get_ancestry(parent)

2226

if version in parent_ancestors:

2227

raise errors.GraphCycleError([parent, version])

2228

# ensure this parent will be available later.

2229

new_parents = n2.difference(n1)

2230

needed_versions.update(new_parents.difference(this_versions))

2231

mismatched_versions.add(version)

2232

2233

if not needed_versions and not mismatched_versions:

2234

return 0

2235

full_list = topo_sort(self.source.get_graph())

2236

2237

version_list = [i for i in full_list if (not self.target.has_version(i)

2238

and i in needed_versions)]

2239

2240

# do the join:

2241

count = 0

2242

total = len(version_list)

2243

for version_id in version_list:

2244

pb.update("Converting to knit", count, total)

2245

parents = self.source.get_parents(version_id)

2246

# check that its will be a consistent copy:

2247

for parent in parents:

2248

# if source has the parent, we must already have it

2249

assert (self.target.has_version(parent))

2250

self.target.add_lines(

2251

version_id, parents, self.source.get_lines(version_id))

2252

count = count + 1

2253

2254

for version in mismatched_versions:

2255

# FIXME RBC 20060309 is this needed?

2256

n1 = set(self.target.get_parents_with_ghosts(version))

2257

n2 = set(self.source.get_parents(version))

2258

# write a combined record to our history preserving the current

2259

# parents as first in the list

2260

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2261

self.target.fix_parents(version, new_parents)

2262

return count

2263

finally:

2264

pb.finished()

2265

2266

2267

InterVersionedFile.register_optimiser(WeaveToKnit)

2268

2269

2270

class KnitSequenceMatcher(difflib.SequenceMatcher):

2271

"""Knit tuned sequence matcher.

2272

2273

This is based on profiling of difflib which indicated some improvements

2274

for our usage pattern.

2275

"""

2276

2277

def find_longest_match(self, alo, ahi, blo, bhi):

2278

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2279

2280

If isjunk is not defined:

2281

2282

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2283

alo <= i <= i+k <= ahi

2284

blo <= j <= j+k <= bhi

2285

and for all (i',j',k') meeting those conditions,

2286

k >= k'

2287

i <= i'

2288

and if i == i', j <= j'

2289

2290

In other words, of all maximal matching blocks, return one that

2291

starts earliest in a, and of all those maximal matching blocks that

2292

start earliest in a, return the one that starts earliest in b.

2293

2294

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2295

>>> s.find_longest_match(0, 5, 0, 9)

2296

(0, 4, 5)

2297

2298

If isjunk is defined, first the longest matching block is

2299

determined as above, but with the additional restriction that no

2300

junk element appears in the block. Then that block is extended as

2301

far as possible by matching (only) junk elements on both sides. So

2302

the resulting block never matches on junk except as identical junk

2303

happens to be adjacent to an "interesting" match.

2304

2305

Here's the same example as before, but considering blanks to be

2306

junk. That prevents " abcd" from matching the " abcd" at the tail

2307

end of the second sequence directly. Instead only the "abcd" can

2308

match, and matches the leftmost "abcd" in the second sequence:

2309

2310

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2311

>>> s.find_longest_match(0, 5, 0, 9)

2312

(1, 0, 4)

2313

2314

If no blocks match, return (alo, blo, 0).

2315

2316

>>> s = SequenceMatcher(None, "ab", "c")

2317

>>> s.find_longest_match(0, 2, 0, 1)

2318

(0, 0, 0)

2319

"""

2320

2321

# CAUTION: stripping common prefix or suffix would be incorrect.

2322

# E.g.,

2323

# ab

2324

# acab

2325

# Longest matching block is "ab", but if common prefix is

2326

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2327

# strip, so ends up claiming that ab is changed to acab by

2328

# inserting "ca" in the middle. That's minimal but unintuitive:

2329

# "it's obvious" that someone inserted "ac" at the front.

2330

# Windiff ends up at the same place as diff, but by pairing up

2331

# the unique 'b's and then matching the first two 'a's.

2332

2333

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2334

besti, bestj, bestsize = alo, blo, 0

2335

# find longest junk-free match

2336

# during an iteration of the loop, j2len[j] = length of longest

2337

# junk-free match ending with a[i-1] and b[j]

2338

j2len = {}

2339

# nothing = []

2340

b2jget = b2j.get

2341

for i in xrange(alo, ahi):

2342

# look at all instances of a[i] in b; note that because

2343

# b2j has no junk keys, the loop is skipped if a[i] is junk

2344

j2lenget = j2len.get

2345

newj2len = {}

2346

2347

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2348

# following improvement

2349

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2350

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2351

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2352

# to

2353

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2354

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2355

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2356

2357

try:

2358

js = b2j[a[i]]

2359

except KeyError:

2360

pass

2361

else:

2362

for j in js:

2363

# a[i] matches b[j]

2364

if j >= blo:

2365

if j >= bhi:

2366

break

2367

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2368

if k > bestsize:

2369

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2370

j2len = newj2len

2371

2372

# Extend the best by non-junk elements on each end. In particular,

2373

# "popular" non-junk elements aren't in b2j, which greatly speeds

2374

# the inner loop above, but also means "the best" match so far

2375

# doesn't contain any junk *or* popular non-junk elements.

2376

while besti > alo and bestj > blo and \

2377

not isbjunk(b[bestj-1]) and \

2378

a[besti-1] == b[bestj-1]:

2379

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2380

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2381

not isbjunk(b[bestj+bestsize]) and \

2382

a[besti+bestsize] == b[bestj+bestsize]:

2383

bestsize += 1

2384

2385

# Now that we have a wholly interesting match (albeit possibly

2386

# empty!), we may as well suck up the matching junk on each

2387

# side of it too. Can't think of a good reason not to, and it

2388

# saves post-processing the (possibly considerable) expense of

2389

# figuring out what to do with it. In the case of an empty

2390

# interesting match, this is clearly the right thing to do,

2391

# because no other kind of match is possible in the regions.

2392

while besti > alo and bestj > blo and \

2393

isbjunk(b[bestj-1]) and \

2394

a[besti-1] == b[bestj-1]:

2395

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2396

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2397

isbjunk(b[bestj+bestsize]) and \

2398

a[besti+bestsize] == b[bestj+bestsize]:

2399

bestsize = bestsize + 1

2400

2401

return besti, bestj, bestsize

2402

2403

2404

try:

2405

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2406

except ImportError:

2407

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »