/brz/remove-bazaar : revision 2858.2.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2007-10-08 07:29:57 UTC
mfrom: (2894 +trunk)
mto: This revision was merged to the branch mainline in revision 2895.
Revision ID: mbp@sourcefrog.net-20071008072957-uhm1gl1mqcsdc377

merge trunk

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

104

from bzrlib.osutils import (

105

contains_whitespace,

106

contains_linebreaks,

107

sha_string,

108

sha_strings,

109

)

110

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

111

from bzrlib.tsort import topo_sort

112

import bzrlib.ui

113

import bzrlib.weave

114

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

115

116

117

# TODO: Split out code specific to this format into an associated object.

118

119

# TODO: Can we put in some kind of value to check that the index and data

120

# files belong together?

121

122

# TODO: accommodate binaries, perhaps by storing a byte count

123

124

# TODO: function to check whole file

125

126

# TODO: atomically append data, then measure backwards from the cursor

127

# position after writing to work out where it was located. we may need to

128

# bypass python file buffering.

129

130

DATA_SUFFIX = '.knit'

131

INDEX_SUFFIX = '.kndx'

132

133

134

class KnitContent(object):

135

"""Content of a knit version to which deltas can be applied."""

136

137

def annotate(self):

138

"""Return a list of (origin, text) tuples."""

139

return list(self.annotate_iter())

140

141

def line_delta_iter(self, new_lines):

142

"""Generate line-based delta from this content to new_lines."""

143

new_texts = new_lines.text()

144

old_texts = self.text()

145

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

146

for tag, i1, i2, j1, j2 in s.get_opcodes():

147

if tag == 'equal':

148

continue

149

# ofrom, oto, length, data

150

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

151

152

def line_delta(self, new_lines):

153

return list(self.line_delta_iter(new_lines))

154

155

@staticmethod

156

def get_line_delta_blocks(knit_delta, source, target):

157

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

158

target_len = len(target)

159

s_pos = 0

160

t_pos = 0

161

for s_begin, s_end, t_len, new_text in knit_delta:

162

true_n = s_begin - s_pos

163

n = true_n

164

if n > 0:

165

# knit deltas do not provide reliable info about whether the

166

# last line of a file matches, due to eol handling.

167

if source[s_pos + n -1] != target[t_pos + n -1]:

168

n-=1

169

if n > 0:

170

yield s_pos, t_pos, n

171

t_pos += t_len + true_n

172

s_pos = s_end

173

n = target_len - t_pos

174

if n > 0:

175

if source[s_pos + n -1] != target[t_pos + n -1]:

176

n-=1

177

if n > 0:

178

yield s_pos, t_pos, n

179

yield s_pos + (target_len - t_pos), target_len, 0

180

181

182

class AnnotatedKnitContent(KnitContent):

183

"""Annotated content."""

184

185

def __init__(self, lines):

186

self._lines = lines

187

188

def annotate_iter(self):

189

"""Yield tuples of (origin, text) for each content line."""

190

return iter(self._lines)

191

192

def strip_last_line_newline(self):

193

line = self._lines[-1][1].rstrip('\n')

194

self._lines[-1] = (self._lines[-1][0], line)

195

196

def text(self):

197

return [text for origin, text in self._lines]

198

199

def copy(self):

200

return AnnotatedKnitContent(self._lines[:])

201

202

203

class PlainKnitContent(KnitContent):

204

"""Unannotated content.

205

206

When annotate[_iter] is called on this content, the same version is reported

207

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

208

objects.

209

"""

210

211

def __init__(self, lines, version_id):

212

self._lines = lines

213

self._version_id = version_id

214

215

def annotate_iter(self):

216

"""Yield tuples of (origin, text) for each content line."""

217

for line in self._lines:

218

yield self._version_id, line

219

220

def copy(self):

221

return PlainKnitContent(self._lines[:], self._version_id)

222

223

def strip_last_line_newline(self):

224

self._lines[-1] = self._lines[-1].rstrip('\n')

225

226

def text(self):

227

return self._lines

228

229

230

class KnitAnnotateFactory(object):

231

"""Factory for creating annotated Content objects."""

232

233

annotated = True

234

235

def make(self, lines, version_id):

236

num_lines = len(lines)

237

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

238

239

def parse_fulltext(self, content, version_id):

240

"""Convert fulltext to internal representation

241

242

fulltext content is of the format

243

revid(utf8) plaintext\n

244

internal representation is of the format:

245

(revid, plaintext)

246

"""

247

# TODO: jam 20070209 The tests expect this to be returned as tuples,

248

# but the code itself doesn't really depend on that.

249

# Figure out a way to not require the overhead of turning the

250

# list back into tuples.

251

lines = [tuple(line.split(' ', 1)) for line in content]

252

return AnnotatedKnitContent(lines)

253

254

def parse_line_delta_iter(self, lines):

255

return iter(self.parse_line_delta(lines))

256

257

def parse_line_delta(self, lines, version_id, plain=False):

258

"""Convert a line based delta into internal representation.

259

260

line delta is in the form of:

261

intstart intend intcount

262

1..count lines:

263

revid(utf8) newline\n

264

internal representation is

265

(start, end, count, [1..count tuples (revid, newline)])

266

267

:param plain: If True, the lines are returned as a plain

268

list, not as a list of tuples, i.e.

269

(start, end, count, [1..count newline])

270

"""

271

result = []

272

lines = iter(lines)

273

next = lines.next

274

275

cache = {}

276

def cache_and_return(line):

277

origin, text = line.split(' ', 1)

278

return cache.setdefault(origin, origin), text

279

280

# walk through the lines parsing.

281

# Note that the plain test is explicitly pulled out of the

282

# loop to minimise any performance impact

283

if plain:

284

for header in lines:

285

start, end, count = [int(n) for n in header.split(',')]

286

contents = [next().split(' ', 1)[1] for i in xrange(count)]

287

result.append((start, end, count, contents))

288

else:

289

for header in lines:

290

start, end, count = [int(n) for n in header.split(',')]

291

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

292

result.append((start, end, count, contents))

293

return result

294

295

def get_fulltext_content(self, lines):

296

"""Extract just the content lines from a fulltext."""

297

return (line.split(' ', 1)[1] for line in lines)

298

299

def get_linedelta_content(self, lines):

300

"""Extract just the content from a line delta.

301

302

This doesn't return all of the extra information stored in a delta.

303

Only the actual content lines.

304

"""

305

lines = iter(lines)

306

next = lines.next

307

for header in lines:

308

header = header.split(',')

309

count = int(header[2])

310

for i in xrange(count):

311

origin, text = next().split(' ', 1)

312

yield text

313

314

def lower_fulltext(self, content):

315

"""convert a fulltext content record into a serializable form.

316

317

see parse_fulltext which this inverts.

318

"""

319

# TODO: jam 20070209 We only do the caching thing to make sure that

320

# the origin is a valid utf-8 line, eventually we could remove it

321

return ['%s %s' % (o, t) for o, t in content._lines]

322

323

def lower_line_delta(self, delta):

324

"""convert a delta into a serializable form.

325

326

See parse_line_delta which this inverts.

327

"""

328

# TODO: jam 20070209 We only do the caching thing to make sure that

329

# the origin is a valid utf-8 line, eventually we could remove it

330

out = []

331

for start, end, c, lines in delta:

332

out.append('%d,%d,%d\n' % (start, end, c))

333

out.extend(origin + ' ' + text

334

for origin, text in lines)

335

return out

336

337

def annotate_iter(self, knit, version_id):

338

content = knit._get_content(version_id)

339

return content.annotate_iter()

340

341

342

class KnitPlainFactory(object):

343

"""Factory for creating plain Content objects."""

344

345

annotated = False

346

347

def make(self, lines, version_id):

348

return PlainKnitContent(lines, version_id)

349

350

def parse_fulltext(self, content, version_id):

351

"""This parses an unannotated fulltext.

352

353

Note that this is not a noop - the internal representation

354

has (versionid, line) - its just a constant versionid.

355

"""

356

return self.make(content, version_id)

357

358

def parse_line_delta_iter(self, lines, version_id):

359

cur = 0

360

num_lines = len(lines)

361

while cur < num_lines:

362

header = lines[cur]

363

cur += 1

364

start, end, c = [int(n) for n in header.split(',')]

365

yield start, end, c, lines[cur:cur+c]

366

cur += c

367

368

def parse_line_delta(self, lines, version_id):

369

return list(self.parse_line_delta_iter(lines, version_id))

370

371

def get_fulltext_content(self, lines):

372

"""Extract just the content lines from a fulltext."""

373

return iter(lines)

374

375

def get_linedelta_content(self, lines):

376

"""Extract just the content from a line delta.

377

378

This doesn't return all of the extra information stored in a delta.

379

Only the actual content lines.

380

"""

381

lines = iter(lines)

382

next = lines.next

383

for header in lines:

384

header = header.split(',')

385

count = int(header[2])

386

for i in xrange(count):

387

yield next()

388

389

def lower_fulltext(self, content):

390

return content.text()

391

392

def lower_line_delta(self, delta):

393

out = []

394

for start, end, c, lines in delta:

395

out.append('%d,%d,%d\n' % (start, end, c))

396

out.extend(lines)

397

return out

398

399

def annotate_iter(self, knit, version_id):

400

return annotate_knit(knit, version_id)

401

402

403

def make_empty_knit(transport, relpath):

404

"""Construct a empty knit at the specified location."""

405

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

406

407

408

class KnitVersionedFile(VersionedFile):

409

"""Weave-like structure with faster random access.

410

411

A knit stores a number of texts and a summary of the relationships

412

between them. Texts are identified by a string version-id. Texts

413

are normally stored and retrieved as a series of lines, but can

414

also be passed as single strings.

415

416

Lines are stored with the trailing newline (if any) included, to

417

avoid special cases for files with no final newline. Lines are

418

composed of 8-bit characters, not unicode. The combination of

419

these approaches should mean any 'binary' file can be safely

420

stored and retrieved.

421

"""

422

423

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

424

factory=None, delta=True, create=False, create_parent_dir=False,

425

delay_create=False, dir_mode=None, index=None, access_method=None):

426

"""Construct a knit at location specified by relpath.

427

428

:param create: If not True, only open an existing knit.

429

:param create_parent_dir: If True, create the parent directory if

430

creating the file fails. (This is used for stores with

431

hash-prefixes that may not exist yet)

432

:param delay_create: The calling code is aware that the knit won't

433

actually be created until the first data is stored.

434

:param index: An index to use for the knit.

435

"""

436

if access_mode is None:

437

access_mode = 'w'

438

super(KnitVersionedFile, self).__init__(access_mode)

439

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

440

self.transport = transport

441

self.filename = relpath

442

self.factory = factory or KnitAnnotateFactory()

443

self.writable = (access_mode == 'w')

444

self.delta = delta

445

446

self._max_delta_chain = 200

447

448

if index is None:

449

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

450

access_mode, create=create, file_mode=file_mode,

451

create_parent_dir=create_parent_dir, delay_create=delay_create,

452

dir_mode=dir_mode)

453

else:

454

self._index = index

455

if access_method is None:

456

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

457

((create and not len(self)) and delay_create), create_parent_dir)

458

else:

459

_access = access_method

460

if create and not len(self) and not delay_create:

461

_access.create()

462

self._data = _KnitData(_access)

463

464

def __repr__(self):

465

return '%s(%s)' % (self.__class__.__name__,

466

self.transport.abspath(self.filename))

467

468

def _check_should_delta(self, first_parents):

469

"""Iterate back through the parent listing, looking for a fulltext.

470

471

This is used when we want to decide whether to add a delta or a new

472

fulltext. It searches for _max_delta_chain parents. When it finds a

473

fulltext parent, it sees if the total size of the deltas leading up to

474

it is large enough to indicate that we want a new full text anyway.

475

476

Return True if we should create a new delta, False if we should use a

477

full text.

478

"""

479

delta_size = 0

480

fulltext_size = None

481

delta_parents = first_parents

482

for count in xrange(self._max_delta_chain):

483

parent = delta_parents[0]

484

method = self._index.get_method(parent)

485

index, pos, size = self._index.get_position(parent)

486

if method == 'fulltext':

487

fulltext_size = size

488

break

489

delta_size += size

490

delta_parents = self._index.get_parents(parent)

491

else:

492

# We couldn't find a fulltext, so we must create a new one

493

return False

494

495

return fulltext_size > delta_size

496

497

def _add_raw_records(self, records, data):

498

"""Add all the records 'records' with data pre-joined in 'data'.

499

500

:param records: A list of tuples(version_id, options, parents, size).

501

:param data: The data for the records. When it is written, the records

502

are adjusted to have pos pointing into data by the sum of

503

the preceding records sizes.

504

"""

505

# write all the data

506

raw_record_sizes = [record[3] for record in records]

507

positions = self._data.add_raw_records(raw_record_sizes, data)

508

offset = 0

509

index_entries = []

510

for (version_id, options, parents, size), access_memo in zip(

511

records, positions):

512

index_entries.append((version_id, options, access_memo, parents))

513

if self._data._do_cache:

514

self._data._cache[version_id] = data[offset:offset+size]

515

offset += size

516

self._index.add_versions(index_entries)

517

518

def enable_cache(self):

519

"""Start caching data for this knit"""

520

self._data.enable_cache()

521

522

def clear_cache(self):

523

"""Clear the data cache only."""

524

self._data.clear_cache()

525

526

def copy_to(self, name, transport):

527

"""See VersionedFile.copy_to()."""

528

# copy the current index to a temp index to avoid racing with local

529

# writes

530

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

531

self.transport.get(self._index._filename))

532

# copy the data file

533

f = self._data._open_file()

534

try:

535

transport.put_file(name + DATA_SUFFIX, f)

536

finally:

537

f.close()

538

# move the copied index into place

539

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

540

541

def create_empty(self, name, transport, mode=None):

542

return KnitVersionedFile(name, transport, factory=self.factory,

543

delta=self.delta, create=True)

544

545

def get_data_stream(self, required_versions):

546

"""Get a data stream for the specified versions.

547

548

Versions may be returned in any order, not necessarily the order

549

specified.

550

551

:param required_versions: The exact set of versions to be extracted.

552

Unlike some other knit methods, this is not used to generate a

553

transitive closure, rather it is used precisely as given.

554

555

:returns: format_signature, list of (version, options, length, parents),

556

reader_callable.

557

"""

558

if not isinstance(required_versions, set):

559

required_versions = set(required_versions)

560

# we don't care about inclusions, the caller cares.

561

# but we need to setup a list of records to visit.

562

for version_id in required_versions:

563

if not self.has_version(version_id):

564

raise RevisionNotPresent(version_id, self.filename)

565

# Pick the desired versions out of the index in oldest-to-newest order

566

version_list = []

567

for version_id in self.versions():

568

if version_id in required_versions:

569

version_list.append(version_id)

570

571

# create the list of version information for the result

572

copy_queue_records = []

573

copy_set = set()

574

result_version_list = []

575

for version_id in version_list:

576

options = self._index.get_options(version_id)

577

parents = self._index.get_parents_with_ghosts(version_id)

578

index_memo = self._index.get_position(version_id)

579

copy_queue_records.append((version_id, index_memo))

580

none, data_pos, data_size = index_memo

581

copy_set.add(version_id)

582

# version, options, length, parents

583

result_version_list.append((version_id, options, data_size,

584

parents))

585

586

# Read the compressed record data.

587

# XXX:

588

# From here down to the return should really be logic in the returned

589

# callable -- in a class that adapts read_records_iter_raw to read

590

# requests.

591

raw_datum = []

592

for (version_id, raw_data), \

593

(version_id2, options, _, parents) in \

594

izip(self._data.read_records_iter_raw(copy_queue_records),

595

result_version_list):

596

assert version_id == version_id2, 'logic error, inconsistent results'

597

raw_datum.append(raw_data)

598

pseudo_file = StringIO(''.join(raw_datum))

599

def read(length):

600

if length is None:

601

return pseudo_file.read()

602

else:

603

return pseudo_file.read(length)

604

return (self.get_format_signature(), result_version_list, read)

605

606

def _extract_blocks(self, version_id, source, target):

607

if self._index.get_method(version_id) != 'line-delta':

608

return None

609

parent, sha1, noeol, delta = self.get_delta(version_id)

610

return KnitContent.get_line_delta_blocks(delta, source, target)

611

612

def get_delta(self, version_id):

613

"""Get a delta for constructing version from some other version."""

614

self.check_not_reserved_id(version_id)

615

parents = self.get_parents(version_id)

616

if len(parents):

617

parent = parents[0]

618

else:

619

parent = None

620

index_memo = self._index.get_position(version_id)

621

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

622

noeol = 'no-eol' in self._index.get_options(version_id)

623

if 'fulltext' == self._index.get_method(version_id):

624

new_content = self.factory.parse_fulltext(data, version_id)

625

if parent is not None:

626

reference_content = self._get_content(parent)

627

old_texts = reference_content.text()

628

else:

629

old_texts = []

630

new_texts = new_content.text()

631

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

632

new_texts)

633

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

634

else:

635

delta = self.factory.parse_line_delta(data, version_id)

636

return parent, sha1, noeol, delta

637

638

def get_format_signature(self):

639

"""See VersionedFile.get_format_signature()."""

640

if self.factory.annotated:

641

annotated_part = "annotated"

642

else:

643

annotated_part = "plain"

644

return "knit-%s" % (annotated_part,)

645

646

def get_graph_with_ghosts(self):

647

"""See VersionedFile.get_graph_with_ghosts()."""

648

graph_items = self._index.get_graph()

649

return dict(graph_items)

650

651

def get_sha1(self, version_id):

652

return self.get_sha1s([version_id])[0]

653

654

def get_sha1s(self, version_ids):

655

"""See VersionedFile.get_sha1()."""

656

record_map = self._get_record_map(version_ids)

657

# record entry 2 is the 'digest'.

658

return [record_map[v][2] for v in version_ids]

659

660

@staticmethod

661

def get_suffixes():

662

"""See VersionedFile.get_suffixes()."""

663

return [DATA_SUFFIX, INDEX_SUFFIX]

664

665

def has_ghost(self, version_id):

666

"""True if there is a ghost reference in the file to version_id."""

667

# maybe we have it

668

if self.has_version(version_id):

669

return False

670

# optimisable if needed by memoising the _ghosts set.

671

items = self._index.get_graph()

672

for node, parents in items:

673

for parent in parents:

674

if parent not in self._index._cache:

675

if parent == version_id:

676

return True

677

return False

678

679

def insert_data_stream(self, (format, data_list, reader_callable)):

680

"""Insert knit records from a data stream into this knit.

681

682

If a version in the stream is already present in this knit, it will not

683

be inserted a second time. It will be checked for consistency with the

684

stored version however, and may cause a KnitCorrupt error to be raised

685

if the data in the stream disagrees with the already stored data.

686

687

:seealso: get_data_stream

688

"""

689

if format != self.get_format_signature():

690

trace.mutter('incompatible format signature inserting to %r', self)

691

raise KnitDataStreamIncompatible(

692

format, self.get_format_signature())

693

694

for version_id, options, length, parents in data_list:

695

if self.has_version(version_id):

696

# First check: the list of parents.

697

my_parents = self.get_parents_with_ghosts(version_id)

698

if my_parents != parents:

699

# XXX: KnitCorrupt is not quite the right exception here.

700

raise KnitCorrupt(

701

self.filename,

702

'parents list %r from data stream does not match '

703

'already recorded parents %r for %s'

704

% (parents, my_parents, version_id))

705

706

# Also check the SHA-1 of the fulltext this content will

707

# produce.

708

raw_data = reader_callable(length)

709

my_fulltext_sha1 = self.get_sha1(version_id)

710

df, rec = self._data._parse_record_header(version_id, raw_data)

711

stream_fulltext_sha1 = rec[3]

712

if my_fulltext_sha1 != stream_fulltext_sha1:

713

# Actually, we don't know if it's this knit that's corrupt,

714

# or the data stream we're trying to insert.

715

raise KnitCorrupt(

716

self.filename, 'sha-1 does not match %s' % version_id)

717

else:

718

self._add_raw_records(

719

[(version_id, options, parents, length)],

720

reader_callable(length))

721

722

def versions(self):

723

"""See VersionedFile.versions."""

724

if 'evil' in debug.debug_flags:

725

trace.mutter_callsite(2, "versions scales with size of history")

726

return self._index.get_versions()

727

728

def has_version(self, version_id):

729

"""See VersionedFile.has_version."""

730

if 'evil' in debug.debug_flags:

731

trace.mutter_callsite(2, "has_version is a LBYL scenario")

732

return self._index.has_version(version_id)

733

734

__contains__ = has_version

735

736

def _merge_annotations(self, content, parents, parent_texts={},

737

delta=None, annotated=None,

738

left_matching_blocks=None):

739

"""Merge annotations for content. This is done by comparing

740

the annotations based on changed to the text.

741

"""

742

if left_matching_blocks is not None:

743

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

744

else:

745

delta_seq = None

746

if annotated:

747

for parent_id in parents:

748

merge_content = self._get_content(parent_id, parent_texts)

749

if (parent_id == parents[0] and delta_seq is not None):

750

seq = delta_seq

751

else:

752

seq = patiencediff.PatienceSequenceMatcher(

753

None, merge_content.text(), content.text())

754

for i, j, n in seq.get_matching_blocks():

755

if n == 0:

756

continue

757

# this appears to copy (origin, text) pairs across to the

758

# new content for any line that matches the last-checked

759

# parent.

760

content._lines[j:j+n] = merge_content._lines[i:i+n]

761

if delta:

762

if delta_seq is None:

763

reference_content = self._get_content(parents[0], parent_texts)

764

new_texts = content.text()

765

old_texts = reference_content.text()

766

delta_seq = patiencediff.PatienceSequenceMatcher(

767

None, old_texts, new_texts)

768

return self._make_line_delta(delta_seq, content)

769

770

def _make_line_delta(self, delta_seq, new_content):

771

"""Generate a line delta from delta_seq and new_content."""

772

diff_hunks = []

773

for op in delta_seq.get_opcodes():

774

if op[0] == 'equal':

775

continue

776

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

777

return diff_hunks

778

779

def _get_components_positions(self, version_ids):

780

"""Produce a map of position data for the components of versions.

781

782

This data is intended to be used for retrieving the knit records.

783

784

A dict of version_id to (method, data_pos, data_size, next) is

785

returned.

786

method is the way referenced data should be applied.

787

data_pos is the position of the data in the knit.

788

data_size is the size of the data in the knit.

789

next is the build-parent of the version, or None for fulltexts.

790

"""

791

component_data = {}

792

for version_id in version_ids:

793

cursor = version_id

794

795

while cursor is not None and cursor not in component_data:

796

method = self._index.get_method(cursor)

797

if method == 'fulltext':

798

next = None

799

else:

800

next = self.get_parents(cursor)[0]

801

index_memo = self._index.get_position(cursor)

802

component_data[cursor] = (method, index_memo, next)

803

cursor = next

804

return component_data

805

806

def _get_content(self, version_id, parent_texts={}):

807

"""Returns a content object that makes up the specified

808

version."""

809

cached_version = parent_texts.get(version_id, None)

810

if cached_version is not None:

811

if not self.has_version(version_id):

812

raise RevisionNotPresent(version_id, self.filename)

813

return cached_version

814

815

text_map, contents_map = self._get_content_maps([version_id])

816

return contents_map[version_id]

817

818

def _check_versions_present(self, version_ids):

819

"""Check that all specified versions are present."""

820

self._index.check_versions_present(version_ids)

821

822

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

823

nostore_sha, random_id, check_content):

824

"""See VersionedFile.add_lines_with_ghosts()."""

825

self._check_add(version_id, lines, random_id, check_content)

826

return self._add(version_id, lines, parents, self.delta,

827

parent_texts, None, nostore_sha, random_id)

828

829

def _add_lines(self, version_id, parents, lines, parent_texts,

830

left_matching_blocks, nostore_sha, random_id, check_content):

831

"""See VersionedFile.add_lines."""

832

self._check_add(version_id, lines, random_id, check_content)

833

self._check_versions_present(parents)

834

return self._add(version_id, lines[:], parents, self.delta,

835

parent_texts, left_matching_blocks, nostore_sha, random_id)

836

837

def _check_add(self, version_id, lines, random_id, check_content):

838

"""check that version_id and lines are safe to add."""

839

if contains_whitespace(version_id):

840

raise InvalidRevisionId(version_id, self.filename)

841

self.check_not_reserved_id(version_id)

842

# Technically this could be avoided if we are happy to allow duplicate

843

# id insertion when other things than bzr core insert texts, but it

844

# seems useful for folk using the knit api directly to have some safety

845

# blanket that we can disable.

846

if not random_id and self.has_version(version_id):

847

raise RevisionAlreadyPresent(version_id, self.filename)

848

if check_content:

849

self._check_lines_not_unicode(lines)

850

self._check_lines_are_lines(lines)

851

852

def _add(self, version_id, lines, parents, delta, parent_texts,

853

left_matching_blocks, nostore_sha, random_id):

854

"""Add a set of lines on top of version specified by parents.

855

856

If delta is true, compress the text as a line-delta against

857

the first parent.

858

859

Any versions not present will be converted into ghosts.

860

"""

861

# first thing, if the content is something we don't need to store, find

862

# that out.

863

line_bytes = ''.join(lines)

864

digest = sha_string(line_bytes)

865

if nostore_sha == digest:

866

raise errors.ExistingContent

867

868

present_parents = []

869

if parent_texts is None:

870

parent_texts = {}

871

for parent in parents:

872

if self.has_version(parent):

873

present_parents.append(parent)

874

875

# can only compress against the left most present parent.

876

if (delta and

877

(len(present_parents) == 0 or

878

present_parents[0] != parents[0])):

879

delta = False

880

881

text_length = len(line_bytes)

882

options = []

883

if lines:

884

if lines[-1][-1] != '\n':

885

# copy the contents of lines.

886

lines = lines[:]

887

options.append('no-eol')

888

lines[-1] = lines[-1] + '\n'

889

890

if delta:

891

# To speed the extract of texts the delta chain is limited

892

# to a fixed number of deltas. This should minimize both

893

# I/O and the time spend applying deltas.

894

delta = self._check_should_delta(present_parents)

895

896

assert isinstance(version_id, str)

897

content = self.factory.make(lines, version_id)

898

if delta or (self.factory.annotated and len(present_parents) > 0):

899

# Merge annotations from parent texts if needed.

900

delta_hunks = self._merge_annotations(content, present_parents,

901

parent_texts, delta, self.factory.annotated,

902

left_matching_blocks)

903

904

if delta:

905

options.append('line-delta')

906

store_lines = self.factory.lower_line_delta(delta_hunks)

907

size, bytes = self._data._record_to_data(version_id, digest,

908

store_lines)

909

else:

910

options.append('fulltext')

911

# get mixed annotation + content and feed it into the

912

# serialiser.

913

store_lines = self.factory.lower_fulltext(content)

914

size, bytes = self._data._record_to_data(version_id, digest,

915

store_lines)

916

917

access_memo = self._data.add_raw_records([size], bytes)[0]

918

self._index.add_versions(

919

((version_id, options, access_memo, parents),),

920

random_id=random_id)

921

return digest, text_length, content

922

923

def check(self, progress_bar=None):

924

"""See VersionedFile.check()."""

925

926

def _clone_text(self, new_version_id, old_version_id, parents):

927

"""See VersionedFile.clone_text()."""

928

# FIXME RBC 20060228 make fast by only inserting an index with null

929

# delta.

930

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

931

932

def get_lines(self, version_id):

933

"""See VersionedFile.get_lines()."""

934

return self.get_line_list([version_id])[0]

935

936

def _get_record_map(self, version_ids):

937

"""Produce a dictionary of knit records.

938

939

The keys are version_ids, the values are tuples of (method, content,

940

digest, next).

941

method is the way the content should be applied.

942

content is a KnitContent object.

943

digest is the SHA1 digest of this version id after all steps are done

944

next is the build-parent of the version, i.e. the leftmost ancestor.

945

If the method is fulltext, next will be None.

946

"""

947

position_map = self._get_components_positions(version_ids)

948

# c = component_id, m = method, i_m = index_memo, n = next

949

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

950

record_map = {}

951

for component_id, content, digest in \

952

self._data.read_records_iter(records):

953

method, index_memo, next = position_map[component_id]

954

record_map[component_id] = method, content, digest, next

955

956

return record_map

957

958

def get_text(self, version_id):

959

"""See VersionedFile.get_text"""

960

return self.get_texts([version_id])[0]

961

962

def get_texts(self, version_ids):

963

return [''.join(l) for l in self.get_line_list(version_ids)]

964

965

def get_line_list(self, version_ids):

966

"""Return the texts of listed versions as a list of strings."""

967

for version_id in version_ids:

968

self.check_not_reserved_id(version_id)

969

text_map, content_map = self._get_content_maps(version_ids)

970

return [text_map[v] for v in version_ids]

971

972

_get_lf_split_line_list = get_line_list

973

974

def _get_content_maps(self, version_ids):

975

"""Produce maps of text and KnitContents

976

977

:return: (text_map, content_map) where text_map contains the texts for

978

the requested versions and content_map contains the KnitContents.

979

Both dicts take version_ids as their keys.

980

"""

981

record_map = self._get_record_map(version_ids)

982

983

text_map = {}

984

content_map = {}

985

final_content = {}

986

for version_id in version_ids:

987

components = []

988

cursor = version_id

989

while cursor is not None:

990

method, data, digest, next = record_map[cursor]

991

components.append((cursor, method, data, digest))

992

if cursor in content_map:

993

break

994

cursor = next

995

996

content = None

997

for component_id, method, data, digest in reversed(components):

998

if component_id in content_map:

999

content = content_map[component_id]

1000

else:

1001

if method == 'fulltext':

1002

assert content is None

1003

content = self.factory.parse_fulltext(data, version_id)

1004

elif method == 'line-delta':

1005

delta = self.factory.parse_line_delta(data, version_id)

1006

content = content.copy()

1007

content._lines = self._apply_delta(content._lines,

1008

delta)

1009

content_map[component_id] = content

1010

1011

if 'no-eol' in self._index.get_options(version_id):

1012

content = content.copy()

1013

content.strip_last_line_newline()

1014

final_content[version_id] = content

1015

1016

# digest here is the digest from the last applied component.

1017

text = content.text()

1018

if sha_strings(text) != digest:

1019

raise KnitCorrupt(self.filename,

1020

'sha-1 does not match %s' % version_id)

1021

1022

text_map[version_id] = text

1023

return text_map, final_content

1024

1025

@staticmethod

1026

def _apply_delta(lines, delta):

1027

"""Apply delta to lines."""

1028

lines = list(lines)

1029

offset = 0

1030

for start, end, count, delta_lines in delta:

1031

lines[offset+start:offset+end] = delta_lines

1032

offset = offset + (start - end) + count

1033

return lines

1034

1035

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1036

pb=None):

1037

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1038

if version_ids is None:

1039

version_ids = self.versions()

1040

if pb is None:

1041

pb = progress.DummyProgress()

1042

# we don't care about inclusions, the caller cares.

1043

# but we need to setup a list of records to visit.

1044

# we need version_id, position, length

1045

version_id_records = []

1046

requested_versions = set(version_ids)

1047

# filter for available versions

1048

for version_id in requested_versions:

1049

if not self.has_version(version_id):

1050

raise RevisionNotPresent(version_id, self.filename)

1051

# get a in-component-order queue:

1052

for version_id in self.versions():

1053

if version_id in requested_versions:

1054

index_memo = self._index.get_position(version_id)

1055

version_id_records.append((version_id, index_memo))

1056

1057

total = len(version_id_records)

1058

for version_idx, (version_id, data, sha_value) in \

1059

enumerate(self._data.read_records_iter(version_id_records)):

1060

pb.update('Walking content.', version_idx, total)

1061

method = self._index.get_method(version_id)

1062

1063

assert method in ('fulltext', 'line-delta')

1064

if method == 'fulltext':

1065

line_iterator = self.factory.get_fulltext_content(data)

1066

else:

1067

line_iterator = self.factory.get_linedelta_content(data)

1068

for line in line_iterator:

1069

yield line

1070

1071

pb.update('Walking content.', total, total)

1072

1073

def iter_parents(self, version_ids):

1074

"""Iterate through the parents for many version ids.

1075

1076

:param version_ids: An iterable yielding version_ids.

1077

:return: An iterator that yields (version_id, parents). Requested

1078

version_ids not present in the versioned file are simply skipped.

1079

The order is undefined, allowing for different optimisations in

1080

the underlying implementation.

1081

"""

1082

return self._index.iter_parents(version_ids)

1083

1084

def num_versions(self):

1085

"""See VersionedFile.num_versions()."""

1086

return self._index.num_versions()

1087

1088

__len__ = num_versions

1089

1090

def annotate_iter(self, version_id):

1091

"""See VersionedFile.annotate_iter."""

1092

return self.factory.annotate_iter(self, version_id)

1093

1094

def get_parents(self, version_id):

1095

"""See VersionedFile.get_parents."""

1096

# perf notes:

1097

# optimism counts!

1098

# 52554 calls in 1264 872 internal down from 3674

1099

try:

1100

return self._index.get_parents(version_id)

1101

except KeyError:

1102

raise RevisionNotPresent(version_id, self.filename)

1103

1104

def get_parents_with_ghosts(self, version_id):

1105

"""See VersionedFile.get_parents."""

1106

try:

1107

return self._index.get_parents_with_ghosts(version_id)

1108

except KeyError:

1109

raise RevisionNotPresent(version_id, self.filename)

1110

1111

def get_ancestry(self, versions, topo_sorted=True):

1112

"""See VersionedFile.get_ancestry."""

1113

if isinstance(versions, basestring):

1114

versions = [versions]

1115

if not versions:

1116

return []

1117

return self._index.get_ancestry(versions, topo_sorted)

1118

1119

def get_ancestry_with_ghosts(self, versions):

1120

"""See VersionedFile.get_ancestry_with_ghosts."""

1121

if isinstance(versions, basestring):

1122

versions = [versions]

1123

if not versions:

1124

return []

1125

return self._index.get_ancestry_with_ghosts(versions)

1126

1127

def plan_merge(self, ver_a, ver_b):

1128

"""See VersionedFile.plan_merge."""

1129

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1130

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1131

annotated_a = self.annotate(ver_a)

1132

annotated_b = self.annotate(ver_b)

1133

return merge._plan_annotate_merge(annotated_a, annotated_b,

1134

ancestors_a, ancestors_b)

1135

1136

1137

class _KnitComponentFile(object):

1138

"""One of the files used to implement a knit database"""

1139

1140

def __init__(self, transport, filename, mode, file_mode=None,

1141

create_parent_dir=False, dir_mode=None):

1142

self._transport = transport

1143

self._filename = filename

1144

self._mode = mode

1145

self._file_mode = file_mode

1146

self._dir_mode = dir_mode

1147

self._create_parent_dir = create_parent_dir

1148

self._need_to_create = False

1149

1150

def _full_path(self):

1151

"""Return the full path to this file."""

1152

return self._transport.base + self._filename

1153

1154

def check_header(self, fp):

1155

line = fp.readline()

1156

if line == '':

1157

# An empty file can actually be treated as though the file doesn't

1158

# exist yet.

1159

raise errors.NoSuchFile(self._full_path())

1160

if line != self.HEADER:

1161

raise KnitHeaderError(badline=line,

1162

filename=self._transport.abspath(self._filename))

1163

1164

def __repr__(self):

1165

return '%s(%s)' % (self.__class__.__name__, self._filename)

1166

1167

1168

class _KnitIndex(_KnitComponentFile):

1169

"""Manages knit index file.

1170

1171

The index is already kept in memory and read on startup, to enable

1172

fast lookups of revision information. The cursor of the index

1173

file is always pointing to the end, making it easy to append

1174

entries.

1175

1176

_cache is a cache for fast mapping from version id to a Index

1177

object.

1178

1179

_history is a cache for fast mapping from indexes to version ids.

1180

1181

The index data format is dictionary compressed when it comes to

1182

parent references; a index entry may only have parents that with a

1183

lover index number. As a result, the index is topological sorted.

1184

1185

Duplicate entries may be written to the index for a single version id

1186

if this is done then the latter one completely replaces the former:

1187

this allows updates to correct version and parent information.

1188

Note that the two entries may share the delta, and that successive

1189

annotations and references MUST point to the first entry.

1190

1191

The index file on disc contains a header, followed by one line per knit

1192

record. The same revision can be present in an index file more than once.

1193

The first occurrence gets assigned a sequence number starting from 0.

1194

1195

The format of a single line is

1196

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1197

REVISION_ID is a utf8-encoded revision id

1198

FLAGS is a comma separated list of flags about the record. Values include

1199

no-eol, line-delta, fulltext.

1200

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1201

that the the compressed data starts at.

1202

LENGTH is the ascii representation of the length of the data file.

1203

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1204

REVISION_ID.

1205

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1206

revision id already in the knit that is a parent of REVISION_ID.

1207

The ' :' marker is the end of record marker.

1208

1209

partial writes:

1210

when a write is interrupted to the index file, it will result in a line

1211

that does not end in ' :'. If the ' :' is not present at the end of a line,

1212

or at the end of the file, then the record that is missing it will be

1213

ignored by the parser.

1214

1215

When writing new records to the index file, the data is preceded by '\n'

1216

to ensure that records always start on new lines even if the last write was

1217

interrupted. As a result its normal for the last line in the index to be

1218

missing a trailing newline. One can be added with no harmful effects.

1219

"""

1220

1221

HEADER = "# bzr knit index 8\n"

1222

1223

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1224

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1225

1226

def _cache_version(self, version_id, options, pos, size, parents):

1227

"""Cache a version record in the history array and index cache.

1228

1229

This is inlined into _load_data for performance. KEEP IN SYNC.

1230

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1231

indexes).

1232

"""

1233

# only want the _history index to reference the 1st index entry

1234

# for version_id

1235

if version_id not in self._cache:

1236

index = len(self._history)

1237

self._history.append(version_id)

1238

else:

1239

index = self._cache[version_id][5]

1240

self._cache[version_id] = (version_id,

1241

options,

1242

pos,

1243

size,

1244

parents,

1245

index)

1246

1247

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1248

create_parent_dir=False, delay_create=False, dir_mode=None):

1249

_KnitComponentFile.__init__(self, transport, filename, mode,

1250

file_mode=file_mode,

1251

create_parent_dir=create_parent_dir,

1252

dir_mode=dir_mode)

1253

self._cache = {}

1254

# position in _history is the 'official' index for a revision

1255

# but the values may have come from a newer entry.

1256

# so - wc -l of a knit index is != the number of unique names

1257

# in the knit.

1258

self._history = []

1259

try:

1260

fp = self._transport.get(self._filename)

1261

try:

1262

# _load_data may raise NoSuchFile if the target knit is

1263

# completely empty.

1264

_load_data(self, fp)

1265

finally:

1266

fp.close()

1267

except NoSuchFile:

1268

if mode != 'w' or not create:

1269

raise

1270

elif delay_create:

1271

self._need_to_create = True

1272

else:

1273

self._transport.put_bytes_non_atomic(

1274

self._filename, self.HEADER, mode=self._file_mode)

1275

1276

def get_graph(self):

1277

"""Return a list of the node:parents lists from this knit index."""

1278

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1279

1280

def get_ancestry(self, versions, topo_sorted=True):

1281

"""See VersionedFile.get_ancestry."""

1282

# get a graph of all the mentioned versions:

1283

graph = {}

1284

pending = set(versions)

1285

cache = self._cache

1286

while pending:

1287

version = pending.pop()

1288

# trim ghosts

1289

try:

1290

parents = [p for p in cache[version][4] if p in cache]

1291

except KeyError:

1292

raise RevisionNotPresent(version, self._filename)

1293

# if not completed and not a ghost

1294

pending.update([p for p in parents if p not in graph])

1295

graph[version] = parents

1296

if not topo_sorted:

1297

return graph.keys()

1298

return topo_sort(graph.items())

1299

1300

def get_ancestry_with_ghosts(self, versions):

1301

"""See VersionedFile.get_ancestry_with_ghosts."""

1302

# get a graph of all the mentioned versions:

1303

self.check_versions_present(versions)

1304

cache = self._cache

1305

graph = {}

1306

pending = set(versions)

1307

while pending:

1308

version = pending.pop()

1309

try:

1310

parents = cache[version][4]

1311

except KeyError:

1312

# ghost, fake it

1313

graph[version] = []

1314

else:

1315

# if not completed

1316

pending.update([p for p in parents if p not in graph])

1317

graph[version] = parents

1318

return topo_sort(graph.items())

1319

1320

def iter_parents(self, version_ids):

1321

"""Iterate through the parents for many version ids.

1322

1323

:param version_ids: An iterable yielding version_ids.

1324

:return: An iterator that yields (version_id, parents). Requested

1325

version_ids not present in the versioned file are simply skipped.

1326

The order is undefined, allowing for different optimisations in

1327

the underlying implementation.

1328

"""

1329

for version_id in version_ids:

1330

try:

1331

yield version_id, tuple(self.get_parents(version_id))

1332

except KeyError:

1333

pass

1334

1335

def num_versions(self):

1336

return len(self._history)

1337

1338

__len__ = num_versions

1339

1340

def get_versions(self):

1341

"""Get all the versions in the file. not topologically sorted."""

1342

return self._history

1343

1344

def _version_list_to_index(self, versions):

1345

result_list = []

1346

cache = self._cache

1347

for version in versions:

1348

if version in cache:

1349

# -- inlined lookup() --

1350

result_list.append(str(cache[version][5]))

1351

# -- end lookup () --

1352

else:

1353

result_list.append('.' + version)

1354

return ' '.join(result_list)

1355

1356

def add_version(self, version_id, options, index_memo, parents):

1357

"""Add a version record to the index."""

1358

self.add_versions(((version_id, options, index_memo, parents),))

1359

1360

def add_versions(self, versions, random_id=False):

1361

"""Add multiple versions to the index.

1362

1363

:param versions: a list of tuples:

1364

(version_id, options, pos, size, parents).

1365

:param random_id: If True the ids being added were randomly generated

1366

and no check for existence will be performed.

1367

"""

1368

lines = []

1369

orig_history = self._history[:]

1370

orig_cache = self._cache.copy()

1371

1372

try:

1373

for version_id, options, (index, pos, size), parents in versions:

1374

line = "\n%s %s %s %s %s :" % (version_id,

1375

','.join(options),

1376

pos,

1377

size,

1378

self._version_list_to_index(parents))

1379

assert isinstance(line, str), \

1380

'content must be utf-8 encoded: %r' % (line,)

1381

lines.append(line)

1382

self._cache_version(version_id, options, pos, size, parents)

1383

if not self._need_to_create:

1384

self._transport.append_bytes(self._filename, ''.join(lines))

1385

else:

1386

sio = StringIO()

1387

sio.write(self.HEADER)

1388

sio.writelines(lines)

1389

sio.seek(0)

1390

self._transport.put_file_non_atomic(self._filename, sio,

1391

create_parent_dir=self._create_parent_dir,

1392

mode=self._file_mode,

1393

dir_mode=self._dir_mode)

1394

self._need_to_create = False

1395

except:

1396

# If any problems happen, restore the original values and re-raise

1397

self._history = orig_history

1398

self._cache = orig_cache

1399

raise

1400

1401

def has_version(self, version_id):

1402

"""True if the version is in the index."""

1403

return version_id in self._cache

1404

1405

def get_position(self, version_id):

1406

"""Return details needed to access the version.

1407

1408

.kndx indices do not support split-out data, so return None for the

1409

index field.

1410

1411

:return: a tuple (None, data position, size) to hand to the access

1412

logic to get the record.

1413

"""

1414

entry = self._cache[version_id]

1415

return None, entry[2], entry[3]

1416

1417

def get_method(self, version_id):

1418

"""Return compression method of specified version."""

1419

try:

1420

options = self._cache[version_id][1]

1421

except KeyError:

1422

raise RevisionNotPresent(version_id, self._filename)

1423

if 'fulltext' in options:

1424

return 'fulltext'

1425

else:

1426

if 'line-delta' not in options:

1427

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1428

return 'line-delta'

1429

1430

def get_options(self, version_id):

1431

"""Return a string represention options.

1432

1433

e.g. foo,bar

1434

"""

1435

return self._cache[version_id][1]

1436

1437

def get_parents(self, version_id):

1438

"""Return parents of specified version ignoring ghosts."""

1439

return [parent for parent in self._cache[version_id][4]

1440

if parent in self._cache]

1441

1442

def get_parents_with_ghosts(self, version_id):

1443

"""Return parents of specified version with ghosts."""

1444

return self._cache[version_id][4]

1445

1446

def check_versions_present(self, version_ids):

1447

"""Check that all specified versions are present."""

1448

cache = self._cache

1449

for version_id in version_ids:

1450

if version_id not in cache:

1451

raise RevisionNotPresent(version_id, self._filename)

1452

1453

1454

class KnitGraphIndex(object):

1455

"""A knit index that builds on GraphIndex."""

1456

1457

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1458

"""Construct a KnitGraphIndex on a graph_index.

1459

1460

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1461

:param deltas: Allow delta-compressed records.

1462

:param add_callback: If not None, allow additions to the index and call

1463

this callback with a list of added GraphIndex nodes:

1464

[(node, value, node_refs), ...]

1465

:param parents: If True, record knits parents, if not do not record

1466

parents.

1467

"""

1468

self._graph_index = graph_index

1469

self._deltas = deltas

1470

self._add_callback = add_callback

1471

self._parents = parents

1472

if deltas and not parents:

1473

raise KnitCorrupt(self, "Cannot do delta compression without "

1474

"parent tracking.")

1475

1476

def _get_entries(self, keys, check_present=False):

1477

"""Get the entries for keys.

1478

1479

:param keys: An iterable of index keys, - 1-tuples.

1480

"""

1481

keys = set(keys)

1482

found_keys = set()

1483

if self._parents:

1484

for node in self._graph_index.iter_entries(keys):

1485

yield node

1486

found_keys.add(node[1])

1487

else:

1488

# adapt parentless index to the rest of the code.

1489

for node in self._graph_index.iter_entries(keys):

1490

yield node[0], node[1], node[2], ()

1491

found_keys.add(node[1])

1492

if check_present:

1493

missing_keys = keys.difference(found_keys)

1494

if missing_keys:

1495

raise RevisionNotPresent(missing_keys.pop(), self)

1496

1497

def _present_keys(self, version_ids):

1498

return set([

1499

node[1] for node in self._get_entries(version_ids)])

1500

1501

def _parentless_ancestry(self, versions):

1502

"""Honour the get_ancestry API for parentless knit indices."""

1503

wanted_keys = self._version_ids_to_keys(versions)

1504

present_keys = self._present_keys(wanted_keys)

1505

missing = set(wanted_keys).difference(present_keys)

1506

if missing:

1507

raise RevisionNotPresent(missing.pop(), self)

1508

return list(self._keys_to_version_ids(present_keys))

1509

1510

def get_ancestry(self, versions, topo_sorted=True):

1511

"""See VersionedFile.get_ancestry."""

1512

if not self._parents:

1513

return self._parentless_ancestry(versions)

1514

# XXX: This will do len(history) index calls - perhaps

1515

# it should be altered to be a index core feature?

1516

# get a graph of all the mentioned versions:

1517

graph = {}

1518

ghosts = set()

1519

versions = self._version_ids_to_keys(versions)

1520

pending = set(versions)

1521

while pending:

1522

# get all pending nodes

1523

this_iteration = pending

1524

new_nodes = self._get_entries(this_iteration)

1525

found = set()

1526

pending = set()

1527

for (index, key, value, node_refs) in new_nodes:

1528

# dont ask for ghosties - otherwise

1529

# we we can end up looping with pending

1530

# being entirely ghosted.

1531

graph[key] = [parent for parent in node_refs[0]

1532

if parent not in ghosts]

1533

# queue parents

1534

for parent in graph[key]:

1535

# dont examine known nodes again

1536

if parent in graph:

1537

continue

1538

pending.add(parent)

1539

found.add(key)

1540

ghosts.update(this_iteration.difference(found))

1541

if versions.difference(graph):

1542

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1543

if topo_sorted:

1544

result_keys = topo_sort(graph.items())

1545

else:

1546

result_keys = graph.iterkeys()

1547

return [key[0] for key in result_keys]

1548

1549

def get_ancestry_with_ghosts(self, versions):

1550

"""See VersionedFile.get_ancestry."""

1551

if not self._parents:

1552

return self._parentless_ancestry(versions)

1553

# XXX: This will do len(history) index calls - perhaps

1554

# it should be altered to be a index core feature?

1555

# get a graph of all the mentioned versions:

1556

graph = {}

1557

versions = self._version_ids_to_keys(versions)

1558

pending = set(versions)

1559

while pending:

1560

# get all pending nodes

1561

this_iteration = pending

1562

new_nodes = self._get_entries(this_iteration)

1563

pending = set()

1564

for (index, key, value, node_refs) in new_nodes:

1565

graph[key] = node_refs[0]

1566

# queue parents

1567

for parent in graph[key]:

1568

# dont examine known nodes again

1569

if parent in graph:

1570

continue

1571

pending.add(parent)

1572

missing_versions = this_iteration.difference(graph)

1573

missing_needed = versions.intersection(missing_versions)

1574

if missing_needed:

1575

raise RevisionNotPresent(missing_needed.pop(), self)

1576

for missing_version in missing_versions:

1577

# add a key, no parents

1578

graph[missing_version] = []

1579

pending.discard(missing_version) # don't look for it

1580

result_keys = topo_sort(graph.items())

1581

return [key[0] for key in result_keys]

1582

1583

def get_graph(self):

1584

"""Return a list of the node:parents lists from this knit index."""

1585

if not self._parents:

1586

return [(key, ()) for key in self.get_versions()]

1587

result = []

1588

for index, key, value, refs in self._graph_index.iter_all_entries():

1589

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1590

return result

1591

1592

def iter_parents(self, version_ids):

1593

"""Iterate through the parents for many version ids.

1594

1595

:param version_ids: An iterable yielding version_ids.

1596

:return: An iterator that yields (version_id, parents). Requested

1597

version_ids not present in the versioned file are simply skipped.

1598

The order is undefined, allowing for different optimisations in

1599

the underlying implementation.

1600

"""

1601

if self._parents:

1602

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1603

all_parents = set()

1604

present_parents = set()

1605

for node in all_nodes:

1606

all_parents.update(node[3][0])

1607

# any node we are querying must be present

1608

present_parents.add(node[1])

1609

unknown_parents = all_parents.difference(present_parents)

1610

present_parents.update(self._present_keys(unknown_parents))

1611

for node in all_nodes:

1612

parents = []

1613

for parent in node[3][0]:

1614

if parent in present_parents:

1615

parents.append(parent[0])

1616

yield node[1][0], tuple(parents)

1617

else:

1618

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1619

yield node[1][0], ()

1620

1621

def num_versions(self):

1622

return len(list(self._graph_index.iter_all_entries()))

1623

1624

__len__ = num_versions

1625

1626

def get_versions(self):

1627

"""Get all the versions in the file. not topologically sorted."""

1628

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1629

1630

def has_version(self, version_id):

1631

"""True if the version is in the index."""

1632

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1633

1634

def _keys_to_version_ids(self, keys):

1635

return tuple(key[0] for key in keys)

1636

1637

def get_position(self, version_id):

1638

"""Return details needed to access the version.

1639

1640

:return: a tuple (index, data position, size) to hand to the access

1641

logic to get the record.

1642

"""

1643

node = self._get_node(version_id)

1644

bits = node[2][1:].split(' ')

1645

return node[0], int(bits[0]), int(bits[1])

1646

1647

def get_method(self, version_id):

1648

"""Return compression method of specified version."""

1649

if not self._deltas:

1650

return 'fulltext'

1651

return self._parent_compression(self._get_node(version_id)[3][1])

1652

1653

def _parent_compression(self, reference_list):

1654

# use the second reference list to decide if this is delta'd or not.

1655

if len(reference_list):

1656

return 'line-delta'

1657

else:

1658

return 'fulltext'

1659

1660

def _get_node(self, version_id):

1661

try:

1662

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1663

except IndexError:

1664

raise RevisionNotPresent(version_id, self)

1665

1666

def get_options(self, version_id):

1667

"""Return a string represention options.

1668

1669

e.g. foo,bar

1670

"""

1671

node = self._get_node(version_id)

1672

if not self._deltas:

1673

options = ['fulltext']

1674

else:

1675

options = [self._parent_compression(node[3][1])]

1676

if node[2][0] == 'N':

1677

options.append('no-eol')

1678

return options

1679

1680

def get_parents(self, version_id):

1681

"""Return parents of specified version ignoring ghosts."""

1682

parents = list(self.iter_parents([version_id]))

1683

if not parents:

1684

# missing key

1685

raise errors.RevisionNotPresent(version_id, self)

1686

return parents[0][1]

1687

1688

def get_parents_with_ghosts(self, version_id):

1689

"""Return parents of specified version with ghosts."""

1690

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1691

check_present=True))

1692

if not self._parents:

1693

return ()

1694

return self._keys_to_version_ids(nodes[0][3][0])

1695

1696

def check_versions_present(self, version_ids):

1697

"""Check that all specified versions are present."""

1698

keys = self._version_ids_to_keys(version_ids)

1699

present = self._present_keys(keys)

1700

missing = keys.difference(present)

1701

if missing:

1702

raise RevisionNotPresent(missing.pop(), self)

1703

1704

def add_version(self, version_id, options, access_memo, parents):

1705

"""Add a version record to the index."""

1706

return self.add_versions(((version_id, options, access_memo, parents),))

1707

1708

def add_versions(self, versions, random_id=False):

1709

"""Add multiple versions to the index.

1710

1711

This function does not insert data into the Immutable GraphIndex

1712

backing the KnitGraphIndex, instead it prepares data for insertion by

1713

the caller and checks that it is safe to insert then calls

1714

self._add_callback with the prepared GraphIndex nodes.

1715

1716

:param versions: a list of tuples:

1717

(version_id, options, pos, size, parents).

1718

:param random_id: If True the ids being added were randomly generated

1719

and no check for existence will be performed.

1720

"""

1721

if not self._add_callback:

1722

raise errors.ReadOnlyError(self)

1723

# we hope there are no repositories with inconsistent parentage

1724

# anymore.

1725

# check for dups

1726

1727

keys = {}

1728

for (version_id, options, access_memo, parents) in versions:

1729

index, pos, size = access_memo

1730

key = (version_id, )

1731

parents = tuple((parent, ) for parent in parents)

1732

if 'no-eol' in options:

1733

value = 'N'

1734

else:

1735

value = ' '

1736

value += "%d %d" % (pos, size)

1737

if not self._deltas:

1738

if 'line-delta' in options:

1739

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1740

if self._parents:

1741

if self._deltas:

1742

if 'line-delta' in options:

1743

node_refs = (parents, (parents[0],))

1744

else:

1745

node_refs = (parents, ())

1746

else:

1747

node_refs = (parents, )

1748

else:

1749

if parents:

1750

raise KnitCorrupt(self, "attempt to add node with parents "

1751

"in parentless index.")

1752

node_refs = ()

1753

keys[key] = (value, node_refs)

1754

if not random_id:

1755

present_nodes = self._get_entries(keys)

1756

for (index, key, value, node_refs) in present_nodes:

1757

if (value, node_refs) != keys[key]:

1758

raise KnitCorrupt(self, "inconsistent details in add_versions"

1759

": %s %s" % ((value, node_refs), keys[key]))

1760

del keys[key]

1761

result = []

1762

if self._parents:

1763

for key, (value, node_refs) in keys.iteritems():

1764

result.append((key, value, node_refs))

1765

else:

1766

for key, (value, node_refs) in keys.iteritems():

1767

result.append((key, value))

1768

self._add_callback(result)

1769

1770

def _version_ids_to_keys(self, version_ids):

1771

return set((version_id, ) for version_id in version_ids)

1772

1773

1774

class _KnitAccess(object):

1775

"""Access to knit records in a .knit file."""

1776

1777

def __init__(self, transport, filename, _file_mode, _dir_mode,

1778

_need_to_create, _create_parent_dir):

1779

"""Create a _KnitAccess for accessing and inserting data.

1780

1781

:param transport: The transport the .knit is located on.

1782

:param filename: The filename of the .knit.

1783

"""

1784

self._transport = transport

1785

self._filename = filename

1786

self._file_mode = _file_mode

1787

self._dir_mode = _dir_mode

1788

self._need_to_create = _need_to_create

1789

self._create_parent_dir = _create_parent_dir

1790

1791

def add_raw_records(self, sizes, raw_data):

1792

"""Add raw knit bytes to a storage area.

1793

1794

The data is spooled to whereever the access method is storing data.

1795

1796

:param sizes: An iterable containing the size of each raw data segment.

1797

:param raw_data: A bytestring containing the data.

1798

:return: A list of memos to retrieve the record later. Each memo is a

1799

tuple - (index, pos, length), where the index field is always None

1800

for the .knit access method.

1801

"""

1802

assert type(raw_data) == str, \

1803

'data must be plain bytes was %s' % type(raw_data)

1804

if not self._need_to_create:

1805

base = self._transport.append_bytes(self._filename, raw_data)

1806

else:

1807

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1808

create_parent_dir=self._create_parent_dir,

1809

mode=self._file_mode,

1810

dir_mode=self._dir_mode)

1811

self._need_to_create = False

1812

base = 0

1813

result = []

1814

for size in sizes:

1815

result.append((None, base, size))

1816

base += size

1817

return result

1818

1819

def create(self):

1820

"""IFF this data access has its own storage area, initialise it.

1821

1822

:return: None.

1823

"""

1824

self._transport.put_bytes_non_atomic(self._filename, '',

1825

mode=self._file_mode)

1826

1827

def open_file(self):

1828

"""IFF this data access can be represented as a single file, open it.

1829

1830

For knits that are not mapped to a single file on disk this will

1831

always return None.

1832

1833

:return: None or a file handle.

1834

"""

1835

try:

1836

return self._transport.get(self._filename)

1837

except NoSuchFile:

1838

pass

1839

return None

1840

1841

def get_raw_records(self, memos_for_retrieval):

1842

"""Get the raw bytes for a records.

1843

1844

:param memos_for_retrieval: An iterable containing the (index, pos,

1845

length) memo for retrieving the bytes. The .knit method ignores

1846

the index as there is always only a single file.

1847

:return: An iterator over the bytes of the records.

1848

"""

1849

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1850

for pos, data in self._transport.readv(self._filename, read_vector):

1851

yield data

1852

1853

1854

class _PackAccess(object):

1855

"""Access to knit records via a collection of packs."""

1856

1857

def __init__(self, index_to_packs, writer=None):

1858

"""Create a _PackAccess object.

1859

1860

:param index_to_packs: A dict mapping index objects to the transport

1861

and file names for obtaining data.

1862

:param writer: A tuple (pack.ContainerWriter, write_index) which

1863

contains the pack to write, and the index that reads from it will

1864

be associated with.

1865

"""

1866

if writer:

1867

self.container_writer = writer[0]

1868

self.write_index = writer[1]

1869

else:

1870

self.container_writer = None

1871

self.write_index = None

1872

self.indices = index_to_packs

1873

1874

def add_raw_records(self, sizes, raw_data):

1875

"""Add raw knit bytes to a storage area.

1876

1877

The data is spooled to the container writer in one bytes-record per

1878

raw data item.

1879

1880

:param sizes: An iterable containing the size of each raw data segment.

1881

:param raw_data: A bytestring containing the data.

1882

:return: A list of memos to retrieve the record later. Each memo is a

1883

tuple - (index, pos, length), where the index field is the

1884

write_index object supplied to the PackAccess object.

1885

"""

1886

assert type(raw_data) == str, \

1887

'data must be plain bytes was %s' % type(raw_data)

1888

result = []

1889

offset = 0

1890

for size in sizes:

1891

p_offset, p_length = self.container_writer.add_bytes_record(

1892

raw_data[offset:offset+size], [])

1893

offset += size

1894

result.append((self.write_index, p_offset, p_length))

1895

return result

1896

1897

def create(self):

1898

"""Pack based knits do not get individually created."""

1899

1900

def get_raw_records(self, memos_for_retrieval):

1901

"""Get the raw bytes for a records.

1902

1903

:param memos_for_retrieval: An iterable containing the (index, pos,

1904

length) memo for retrieving the bytes. The Pack access method

1905

looks up the pack to use for a given record in its index_to_pack

1906

map.

1907

:return: An iterator over the bytes of the records.

1908

"""

1909

# first pass, group into same-index requests

1910

request_lists = []

1911

current_index = None

1912

for (index, offset, length) in memos_for_retrieval:

1913

if current_index == index:

1914

current_list.append((offset, length))

1915

else:

1916

if current_index is not None:

1917

request_lists.append((current_index, current_list))

1918

current_index = index

1919

current_list = [(offset, length)]

1920

# handle the last entry

1921

if current_index is not None:

1922

request_lists.append((current_index, current_list))

1923

for index, offsets in request_lists:

1924

transport, path = self.indices[index]

1925

reader = pack.make_readv_reader(transport, path, offsets)

1926

for names, read_func in reader.iter_records():

1927

yield read_func(None)

1928

1929

def open_file(self):

1930

"""Pack based knits have no single file."""

1931

return None

1932

1933

def set_writer(self, writer, index, (transport, packname)):

1934

"""Set a writer to use for adding data."""

1935

self.indices[index] = (transport, packname)

1936

self.container_writer = writer

1937

self.write_index = index

1938

1939

1940

class _KnitData(object):

1941

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1942

1943

The KnitData class provides the logic for parsing and using knit records,

1944

making use of an access method for the low level read and write operations.

1945

"""

1946

1947

def __init__(self, access):

1948

"""Create a KnitData object.

1949

1950

:param access: The access method to use. Access methods such as

1951

_KnitAccess manage the insertion of raw records and the subsequent

1952

retrieval of the same.

1953

"""

1954

self._access = access

1955

self._checked = False

1956

# TODO: jam 20060713 conceptually, this could spill to disk

1957

# if the cached size gets larger than a certain amount

1958

# but it complicates the model a bit, so for now just use

1959

# a simple dictionary

1960

self._cache = {}

1961

self._do_cache = False

1962

1963

def enable_cache(self):

1964

"""Enable caching of reads."""

1965

self._do_cache = True

1966

1967

def clear_cache(self):

1968

"""Clear the record cache."""

1969

self._do_cache = False

1970

self._cache = {}

1971

1972

def _open_file(self):

1973

return self._access.open_file()

1974

1975

def _record_to_data(self, version_id, digest, lines):

1976

"""Convert version_id, digest, lines into a raw data block.

1977

1978

:return: (len, a StringIO instance with the raw data ready to read.)

1979

"""

1980

bytes = (''.join(chain(

1981

["version %s %d %s\n" % (version_id,

1982

len(lines),

1983

digest)],

1984

lines,

1985

["end %s\n" % version_id])))

1986

assert bytes.__class__ == str

1987

compressed_bytes = bytes_to_gzip(bytes)

1988

return len(compressed_bytes), compressed_bytes

1989

1990

def add_raw_records(self, sizes, raw_data):

1991

"""Append a prepared record to the data file.

1992

1993

:param sizes: An iterable containing the size of each raw data segment.

1994

:param raw_data: A bytestring containing the data.

1995

:return: a list of index data for the way the data was stored.

1996

See the access method add_raw_records documentation for more

1997

details.

1998

"""

1999

return self._access.add_raw_records(sizes, raw_data)

2000

2001

def _parse_record_header(self, version_id, raw_data):

2002

"""Parse a record header for consistency.

2003

2004

:return: the header and the decompressor stream.

2005

as (stream, header_record)

2006

"""

2007

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2008

try:

2009

rec = self._check_header(version_id, df.readline())

2010

except Exception, e:

2011

raise KnitCorrupt(self._access,

2012

"While reading {%s} got %s(%s)"

2013

% (version_id, e.__class__.__name__, str(e)))

2014

return df, rec

2015

2016

def _check_header(self, version_id, line):

2017

rec = line.split()

2018

if len(rec) != 4:

2019

raise KnitCorrupt(self._access,

2020

'unexpected number of elements in record header')

2021

if rec[1] != version_id:

2022

raise KnitCorrupt(self._access,

2023

'unexpected version, wanted %r, got %r'

2024

% (version_id, rec[1]))

2025

return rec

2026

2027

def _parse_record(self, version_id, data):

2028

# profiling notes:

2029

# 4168 calls in 2880 217 internal

2030

# 4168 calls to _parse_record_header in 2121

2031

# 4168 calls to readlines in 330

2032

df = GzipFile(mode='rb', fileobj=StringIO(data))

2033

2034

try:

2035

record_contents = df.readlines()

2036

except Exception, e:

2037

raise KnitCorrupt(self._access,

2038

"While reading {%s} got %s(%s)"

2039

% (version_id, e.__class__.__name__, str(e)))

2040

header = record_contents.pop(0)

2041

rec = self._check_header(version_id, header)

2042

2043

last_line = record_contents.pop()

2044

if len(record_contents) != int(rec[2]):

2045

raise KnitCorrupt(self._access,

2046

'incorrect number of lines %s != %s'

2047

' for version {%s}'

2048

% (len(record_contents), int(rec[2]),

2049

version_id))

2050

if last_line != 'end %s\n' % rec[1]:

2051

raise KnitCorrupt(self._access,

2052

'unexpected version end line %r, wanted %r'

2053

% (last_line, version_id))

2054

df.close()

2055

return record_contents, rec[3]

2056

2057

def read_records_iter_raw(self, records):

2058

"""Read text records from data file and yield raw data.

2059

2060

This unpacks enough of the text record to validate the id is

2061

as expected but thats all.

2062

"""

2063

# setup an iterator of the external records:

2064

# uses readv so nice and fast we hope.

2065

if len(records):

2066

# grab the disk data needed.

2067

if self._cache:

2068

# Don't check _cache if it is empty

2069

needed_offsets = [index_memo for version_id, index_memo

2070

in records

2071

if version_id not in self._cache]

2072

else:

2073

needed_offsets = [index_memo for version_id, index_memo

2074

in records]

2075

2076

raw_records = self._access.get_raw_records(needed_offsets)

2077

2078

for version_id, index_memo in records:

2079

if version_id in self._cache:

2080

# This data has already been validated

2081

data = self._cache[version_id]

2082

else:

2083

data = raw_records.next()

2084

if self._do_cache:

2085

self._cache[version_id] = data

2086

2087

# validate the header

2088

df, rec = self._parse_record_header(version_id, data)

2089

df.close()

2090

yield version_id, data

2091

2092

def read_records_iter(self, records):

2093

"""Read text records from data file and yield result.

2094

2095

The result will be returned in whatever is the fastest to read.

2096

Not by the order requested. Also, multiple requests for the same

2097

record will only yield 1 response.

2098

:param records: A list of (version_id, pos, len) entries

2099

:return: Yields (version_id, contents, digest) in the order

2100

read, not the order requested

2101

"""

2102

if not records:

2103

return

2104

2105

if self._cache:

2106

# Skip records we have alread seen

2107

yielded_records = set()

2108

needed_records = set()

2109

for record in records:

2110

if record[0] in self._cache:

2111

if record[0] in yielded_records:

2112

continue

2113

yielded_records.add(record[0])

2114

data = self._cache[record[0]]

2115

content, digest = self._parse_record(record[0], data)

2116

yield (record[0], content, digest)

2117

else:

2118

needed_records.add(record)

2119

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2120

else:

2121

needed_records = sorted(set(records), key=operator.itemgetter(1))

2122

2123

if not needed_records:

2124

return

2125

2126

# The transport optimizes the fetching as well

2127

# (ie, reads continuous ranges.)

2128

raw_data = self._access.get_raw_records(

2129

[index_memo for version_id, index_memo in needed_records])

2130

2131

for (version_id, index_memo), data in \

2132

izip(iter(needed_records), raw_data):

2133

content, digest = self._parse_record(version_id, data)

2134

if self._do_cache:

2135

self._cache[version_id] = data

2136

yield version_id, content, digest

2137

2138

def read_records(self, records):

2139

"""Read records into a dictionary."""

2140

components = {}

2141

for record_id, content, digest in \

2142

self.read_records_iter(records):

2143

components[record_id] = (content, digest)

2144

return components

2145

2146

2147

class InterKnit(InterVersionedFile):

2148

"""Optimised code paths for knit to knit operations."""

2149

2150

_matching_file_from_factory = KnitVersionedFile

2151

_matching_file_to_factory = KnitVersionedFile

2152

2153

@staticmethod

2154

def is_compatible(source, target):

2155

"""Be compatible with knits. """

2156

try:

2157

return (isinstance(source, KnitVersionedFile) and

2158

isinstance(target, KnitVersionedFile))

2159

except AttributeError:

2160

return False

2161

2162

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2163

"""See InterVersionedFile.join."""

2164

assert isinstance(self.source, KnitVersionedFile)

2165

assert isinstance(self.target, KnitVersionedFile)

2166

2167

# If the source and target are mismatched w.r.t. annotations vs

2168

# plain, the data needs to be converted accordingly

2169

if self.source.factory.annotated == self.target.factory.annotated:

2170

converter = None

2171

elif self.source.factory.annotated:

2172

converter = self._anno_to_plain_converter

2173

else:

2174

# We're converting from a plain to an annotated knit. This requires

2175

# building the annotations from scratch. The generic join code

2176

# handles this implicitly so we delegate to it.

2177

return super(InterKnit, self).join(pb, msg, version_ids,

2178

ignore_missing)

2179

2180

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2181

if not version_ids:

2182

return 0

2183

2184

pb = ui.ui_factory.nested_progress_bar()

2185

try:

2186

version_ids = list(version_ids)

2187

if None in version_ids:

2188

version_ids.remove(None)

2189

2190

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2191

this_versions = set(self.target._index.get_versions())

2192

# XXX: For efficiency we should not look at the whole index,

2193

# we only need to consider the referenced revisions - they

2194

# must all be present, or the method must be full-text.

2195

# TODO, RBC 20070919

2196

needed_versions = self.source_ancestry - this_versions

2197

2198

if not needed_versions:

2199

return 0

2200

full_list = topo_sort(self.source.get_graph())

2201

2202

version_list = [i for i in full_list if (not self.target.has_version(i)

2203

and i in needed_versions)]

2204

2205

# plan the join:

2206

copy_queue = []

2207

copy_queue_records = []

2208

copy_set = set()

2209

for version_id in version_list:

2210

options = self.source._index.get_options(version_id)

2211

parents = self.source._index.get_parents_with_ghosts(version_id)

2212

# check that its will be a consistent copy:

2213

for parent in parents:

2214

# if source has the parent, we must :

2215

# * already have it or

2216

# * have it scheduled already

2217

# otherwise we don't care

2218

assert (self.target.has_version(parent) or

2219

parent in copy_set or

2220

not self.source.has_version(parent))

2221

index_memo = self.source._index.get_position(version_id)

2222

copy_queue_records.append((version_id, index_memo))

2223

copy_queue.append((version_id, options, parents))

2224

copy_set.add(version_id)

2225

2226

# data suck the join:

2227

count = 0

2228

total = len(version_list)

2229

raw_datum = []

2230

raw_records = []

2231

for (version_id, raw_data), \

2232

(version_id2, options, parents) in \

2233

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2234

copy_queue):

2235

assert version_id == version_id2, 'logic error, inconsistent results'

2236

count = count + 1

2237

pb.update("Joining knit", count, total)

2238

if converter:

2239

size, raw_data = converter(raw_data, version_id, options,

2240

parents)

2241

else:

2242

size = len(raw_data)

2243

raw_records.append((version_id, options, parents, size))

2244

raw_datum.append(raw_data)

2245

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2246

return count

2247

finally:

2248

pb.finished()

2249

2250

def _anno_to_plain_converter(self, raw_data, version_id, options,

2251

parents):

2252

"""Convert annotated content to plain content."""

2253

data, digest = self.source._data._parse_record(version_id, raw_data)

2254

if 'fulltext' in options:

2255

content = self.source.factory.parse_fulltext(data, version_id)

2256

lines = self.target.factory.lower_fulltext(content)

2257

else:

2258

delta = self.source.factory.parse_line_delta(data, version_id,

2259

plain=True)

2260

lines = self.target.factory.lower_line_delta(delta)

2261

return self.target._data._record_to_data(version_id, digest, lines)

2262

2263

2264

InterVersionedFile.register_optimiser(InterKnit)

2265

2266

2267

class WeaveToKnit(InterVersionedFile):

2268

"""Optimised code paths for weave to knit operations."""

2269

2270

_matching_file_from_factory = bzrlib.weave.WeaveFile

2271

_matching_file_to_factory = KnitVersionedFile

2272

2273

@staticmethod

2274

def is_compatible(source, target):

2275

"""Be compatible with weaves to knits."""

2276

try:

2277

return (isinstance(source, bzrlib.weave.Weave) and

2278

isinstance(target, KnitVersionedFile))

2279

except AttributeError:

2280

return False

2281

2282

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2283

"""See InterVersionedFile.join."""

2284

assert isinstance(self.source, bzrlib.weave.Weave)

2285

assert isinstance(self.target, KnitVersionedFile)

2286

2287

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2288

2289

if not version_ids:

2290

return 0

2291

2292

pb = ui.ui_factory.nested_progress_bar()

2293

try:

2294

version_ids = list(version_ids)

2295

2296

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2297

this_versions = set(self.target._index.get_versions())

2298

needed_versions = self.source_ancestry - this_versions

2299

2300

if not needed_versions:

2301

return 0

2302

full_list = topo_sort(self.source.get_graph())

2303

2304

version_list = [i for i in full_list if (not self.target.has_version(i)

2305

and i in needed_versions)]

2306

2307

# do the join:

2308

count = 0

2309

total = len(version_list)

2310

for version_id in version_list:

2311

pb.update("Converting to knit", count, total)

2312

parents = self.source.get_parents(version_id)

2313

# check that its will be a consistent copy:

2314

for parent in parents:

2315

# if source has the parent, we must already have it

2316

assert (self.target.has_version(parent))

2317

self.target.add_lines(

2318

version_id, parents, self.source.get_lines(version_id))

2319

count = count + 1

2320

return count

2321

finally:

2322

pb.finished()

2323

2324

2325

InterVersionedFile.register_optimiser(WeaveToKnit)

2326

2327

2328

# Deprecated, use PatienceSequenceMatcher instead

2329

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2330

2331

2332

def annotate_knit(knit, revision_id):

2333

"""Annotate a knit with no cached annotations.

2334

2335

This implementation is for knits with no cached annotations.

2336

It will work for knits with cached annotations, but this is not

2337

recommended.

2338

"""

2339

ancestry = knit.get_ancestry(revision_id)

2340

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2341

annotations = {}

2342

for candidate in ancestry:

2343

if candidate in annotations:

2344

continue

2345

parents = knit.get_parents(candidate)

2346

if len(parents) == 0:

2347

blocks = None

2348

elif knit._index.get_method(candidate) != 'line-delta':

2349

blocks = None

2350

else:

2351

parent, sha1, noeol, delta = knit.get_delta(candidate)

2352

blocks = KnitContent.get_line_delta_blocks(delta,

2353

fulltext[parents[0]], fulltext[candidate])

2354

annotations[candidate] = list(annotate.reannotate([annotations[p]

2355

for p in parents], fulltext[candidate], candidate, blocks))

2356

return iter(annotations[revision_id])

2357

2358

2359

try:

2360

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2361

except ImportError:

2362

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »