/brz/remove-bazaar : revision 3015.2.18

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2007-11-26 21:15:26 UTC
mto: This revision was merged to the branch mainline in revision 3033.
Revision ID: robertc@robertcollins.net-20071126211526-zp59n29oqm2m8s99

Lock correctness for test_upgrade.py.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/ftp_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/authentication_conf.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

dir.py

dulwich

dulwich/.bzrignore

dulwich/COPYING

dulwich/Makefile

dulwich/README

dulwich/bin

dulwich/bin/dul-daemon

dulwich/bin/dul-receive-pack

dulwich/bin/dul-upload-pack

dulwich/bin/dulwich

dulwich/docs

dulwich/docs/protocol.txt

dulwich/dulwich

dulwich/dulwich/__init__.py

dulwich/dulwich/client.py

dulwich/dulwich/commit.py

dulwich/dulwich/errors.py

dulwich/dulwich/objects.py

dulwich/dulwich/pack.py

dulwich/dulwich/protocol.py

dulwich/dulwich/repo.py

dulwich/dulwich/server.py

dulwich/dulwich/tests

dulwich/dulwich/tests/__init__.py

dulwich/dulwich/tests/data

dulwich/dulwich/tests/data/blobs

dulwich/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/commits

dulwich/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/packs

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/dulwich/tests/data/repos

dulwich/dulwich/tests/data/repos/a

dulwich/dulwich/tests/data/repos/a/.git

dulwich/dulwich/tests/data/repos/a/.git/HEAD

dulwich/dulwich/tests/data/repos/a/.git/index

dulwich/dulwich/tests/data/repos/a/.git/objects

dulwich/dulwich/tests/data/repos/a/.git/objects/2a

dulwich/dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/dulwich/tests/data/repos/a/.git/objects/4e

dulwich/dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/dulwich/tests/data/repos/a/.git/objects/4f

dulwich/dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/dulwich/tests/data/repos/a/.git/objects/7d

dulwich/dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/dulwich/tests/data/repos/a/.git/objects/a2

dulwich/dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/dulwich/tests/data/repos/a/.git/objects/a9

dulwich/dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/dulwich/tests/data/repos/a/.git/objects/ff

dulwich/dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/dulwich/tests/data/repos/a/.git/objects/info

dulwich/dulwich/tests/data/repos/a/.git/objects/pack

dulwich/dulwich/tests/data/repos/a/.git/refs

dulwich/dulwich/tests/data/repos/a/.git/refs/heads

dulwich/dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/a/.git/refs/tags

dulwich/dulwich/tests/data/repos/a/a

dulwich/dulwich/tests/data/repos/a/b

dulwich/dulwich/tests/data/repos/a/c

dulwich/dulwich/tests/data/repos/ooo_merge

dulwich/dulwich/tests/data/repos/ooo_merge/.git

dulwich/dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/ooo_merge/a

dulwich/dulwich/tests/data/repos/ooo_merge/b

dulwich/dulwich/tests/data/repos/ooo_merge/c

dulwich/dulwich/tests/data/repos/simple_merge

dulwich/dulwich/tests/data/repos/simple_merge/.git

dulwich/dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/simple_merge/.git/index

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/simple_merge/a

dulwich/dulwich/tests/data/repos/simple_merge/b

dulwich/dulwich/tests/data/repos/simple_merge/d

dulwich/dulwich/tests/data/repos/simple_merge/e

dulwich/dulwich/tests/data/trees

dulwich/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/test_objects.py

dulwich/dulwich/tests/test_pack.py

dulwich/dulwich/tests/test_repository.py

dulwich/setup.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

remote.py

repository.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_ids.py

tests/test_repository.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

lru_cache,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

100

KnitHeaderError,

101

RevisionNotPresent,

102

RevisionAlreadyPresent,

103

)

104

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

105

from bzrlib.osutils import (

106

contains_whitespace,

107

contains_linebreaks,

108

sha_string,

109

sha_strings,

110

)

111

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

112

from bzrlib.tsort import topo_sort

113

import bzrlib.ui

114

import bzrlib.weave

115

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

116

117

118

# TODO: Split out code specific to this format into an associated object.

119

120

# TODO: Can we put in some kind of value to check that the index and data

121

# files belong together?

122

123

# TODO: accommodate binaries, perhaps by storing a byte count

124

125

# TODO: function to check whole file

126

127

# TODO: atomically append data, then measure backwards from the cursor

128

# position after writing to work out where it was located. we may need to

129

# bypass python file buffering.

130

131

DATA_SUFFIX = '.knit'

132

INDEX_SUFFIX = '.kndx'

133

134

135

class KnitContent(object):

136

"""Content of a knit version to which deltas can be applied."""

137

138

def annotate(self):

139

"""Return a list of (origin, text) tuples."""

140

return list(self.annotate_iter())

141

142

def apply_delta(self, delta, new_version_id):

143

"""Apply delta to this object to become new_version_id."""

144

raise NotImplementedError(self.apply_delta)

145

146

def line_delta_iter(self, new_lines):

147

"""Generate line-based delta from this content to new_lines."""

148

new_texts = new_lines.text()

149

old_texts = self.text()

150

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

151

for tag, i1, i2, j1, j2 in s.get_opcodes():

152

if tag == 'equal':

153

continue

154

# ofrom, oto, length, data

155

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

156

157

def line_delta(self, new_lines):

158

return list(self.line_delta_iter(new_lines))

159

160

@staticmethod

161

def get_line_delta_blocks(knit_delta, source, target):

162

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

163

target_len = len(target)

164

s_pos = 0

165

t_pos = 0

166

for s_begin, s_end, t_len, new_text in knit_delta:

167

true_n = s_begin - s_pos

168

n = true_n

169

if n > 0:

170

# knit deltas do not provide reliable info about whether the

171

# last line of a file matches, due to eol handling.

172

if source[s_pos + n -1] != target[t_pos + n -1]:

173

n-=1

174

if n > 0:

175

yield s_pos, t_pos, n

176

t_pos += t_len + true_n

177

s_pos = s_end

178

n = target_len - t_pos

179

if n > 0:

180

if source[s_pos + n -1] != target[t_pos + n -1]:

181

n-=1

182

if n > 0:

183

yield s_pos, t_pos, n

184

yield s_pos + (target_len - t_pos), target_len, 0

185

186

187

class AnnotatedKnitContent(KnitContent):

188

"""Annotated content."""

189

190

def __init__(self, lines):

191

self._lines = lines

192

193

def annotate_iter(self):

194

"""Yield tuples of (origin, text) for each content line."""

195

return iter(self._lines)

196

197

def apply_delta(self, delta, new_version_id):

198

"""Apply delta to this object to become new_version_id."""

199

offset = 0

200

lines = self._lines

201

for start, end, count, delta_lines in delta:

202

lines[offset+start:offset+end] = delta_lines

203

offset = offset + (start - end) + count

204

205

def strip_last_line_newline(self):

206

line = self._lines[-1][1].rstrip('\n')

207

self._lines[-1] = (self._lines[-1][0], line)

208

209

def text(self):

210

try:

211

return [text for origin, text in self._lines]

212

except ValueError, e:

213

# most commonly (only?) caused by the internal form of the knit

214

# missing annotation information because of a bug - see thread

215

# around 20071015

216

raise KnitCorrupt(self,

217

"line in annotated knit missing annotation information: %s"

218

% (e,))

219

220

def copy(self):

221

return AnnotatedKnitContent(self._lines[:])

222

223

224

class PlainKnitContent(KnitContent):

225

"""Unannotated content.

226

227

When annotate[_iter] is called on this content, the same version is reported

228

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

229

objects.

230

"""

231

232

def __init__(self, lines, version_id):

233

self._lines = lines

234

self._version_id = version_id

235

236

def annotate_iter(self):

237

"""Yield tuples of (origin, text) for each content line."""

238

for line in self._lines:

239

yield self._version_id, line

240

241

def apply_delta(self, delta, new_version_id):

242

"""Apply delta to this object to become new_version_id."""

243

offset = 0

244

lines = self._lines

245

for start, end, count, delta_lines in delta:

246

lines[offset+start:offset+end] = delta_lines

247

offset = offset + (start - end) + count

248

self._version_id = new_version_id

249

250

def copy(self):

251

return PlainKnitContent(self._lines[:], self._version_id)

252

253

def strip_last_line_newline(self):

254

self._lines[-1] = self._lines[-1].rstrip('\n')

255

256

def text(self):

257

return self._lines

258

259

260

class KnitAnnotateFactory(object):

261

"""Factory for creating annotated Content objects."""

262

263

annotated = True

264

265

def make(self, lines, version_id):

266

num_lines = len(lines)

267

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

268

269

def parse_fulltext(self, content, version_id):

270

"""Convert fulltext to internal representation

271

272

fulltext content is of the format

273

revid(utf8) plaintext\n

274

internal representation is of the format:

275

(revid, plaintext)

276

"""

277

# TODO: jam 20070209 The tests expect this to be returned as tuples,

278

# but the code itself doesn't really depend on that.

279

# Figure out a way to not require the overhead of turning the

280

# list back into tuples.

281

lines = [tuple(line.split(' ', 1)) for line in content]

282

return AnnotatedKnitContent(lines)

283

284

def parse_line_delta_iter(self, lines):

285

return iter(self.parse_line_delta(lines))

286

287

def parse_line_delta(self, lines, version_id, plain=False):

288

"""Convert a line based delta into internal representation.

289

290

line delta is in the form of:

291

intstart intend intcount

292

1..count lines:

293

revid(utf8) newline\n

294

internal representation is

295

(start, end, count, [1..count tuples (revid, newline)])

296

297

:param plain: If True, the lines are returned as a plain

298

list without annotations, not as a list of (origin, content) tuples, i.e.

299

(start, end, count, [1..count newline])

300

"""

301

result = []

302

lines = iter(lines)

303

next = lines.next

304

305

cache = {}

306

def cache_and_return(line):

307

origin, text = line.split(' ', 1)

308

return cache.setdefault(origin, origin), text

309

310

# walk through the lines parsing.

311

# Note that the plain test is explicitly pulled out of the

312

# loop to minimise any performance impact

313

if plain:

314

for header in lines:

315

start, end, count = [int(n) for n in header.split(',')]

316

contents = [next().split(' ', 1)[1] for i in xrange(count)]

317

result.append((start, end, count, contents))

318

else:

319

for header in lines:

320

start, end, count = [int(n) for n in header.split(',')]

321

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

322

result.append((start, end, count, contents))

323

return result

324

325

def get_fulltext_content(self, lines):

326

"""Extract just the content lines from a fulltext."""

327

return (line.split(' ', 1)[1] for line in lines)

328

329

def get_linedelta_content(self, lines):

330

"""Extract just the content from a line delta.

331

332

This doesn't return all of the extra information stored in a delta.

333

Only the actual content lines.

334

"""

335

lines = iter(lines)

336

next = lines.next

337

for header in lines:

338

header = header.split(',')

339

count = int(header[2])

340

for i in xrange(count):

341

origin, text = next().split(' ', 1)

342

yield text

343

344

def lower_fulltext(self, content):

345

"""convert a fulltext content record into a serializable form.

346

347

see parse_fulltext which this inverts.

348

"""

349

# TODO: jam 20070209 We only do the caching thing to make sure that

350

# the origin is a valid utf-8 line, eventually we could remove it

351

return ['%s %s' % (o, t) for o, t in content._lines]

352

353

def lower_line_delta(self, delta):

354

"""convert a delta into a serializable form.

355

356

See parse_line_delta which this inverts.

357

"""

358

# TODO: jam 20070209 We only do the caching thing to make sure that

359

# the origin is a valid utf-8 line, eventually we could remove it

360

out = []

361

for start, end, c, lines in delta:

362

out.append('%d,%d,%d\n' % (start, end, c))

363

out.extend(origin + ' ' + text

364

for origin, text in lines)

365

return out

366

367

def annotate_iter(self, knit, version_id):

368

content = knit._get_content(version_id)

369

return content.annotate_iter()

370

371

372

class KnitPlainFactory(object):

373

"""Factory for creating plain Content objects."""

374

375

annotated = False

376

377

def make(self, lines, version_id):

378

return PlainKnitContent(lines, version_id)

379

380

def parse_fulltext(self, content, version_id):

381

"""This parses an unannotated fulltext.

382

383

Note that this is not a noop - the internal representation

384

has (versionid, line) - its just a constant versionid.

385

"""

386

return self.make(content, version_id)

387

388

def parse_line_delta_iter(self, lines, version_id):

389

cur = 0

390

num_lines = len(lines)

391

while cur < num_lines:

392

header = lines[cur]

393

cur += 1

394

start, end, c = [int(n) for n in header.split(',')]

395

yield start, end, c, lines[cur:cur+c]

396

cur += c

397

398

def parse_line_delta(self, lines, version_id):

399

return list(self.parse_line_delta_iter(lines, version_id))

400

401

def get_fulltext_content(self, lines):

402

"""Extract just the content lines from a fulltext."""

403

return iter(lines)

404

405

def get_linedelta_content(self, lines):

406

"""Extract just the content from a line delta.

407

408

This doesn't return all of the extra information stored in a delta.

409

Only the actual content lines.

410

"""

411

lines = iter(lines)

412

next = lines.next

413

for header in lines:

414

header = header.split(',')

415

count = int(header[2])

416

for i in xrange(count):

417

yield next()

418

419

def lower_fulltext(self, content):

420

return content.text()

421

422

def lower_line_delta(self, delta):

423

out = []

424

for start, end, c, lines in delta:

425

out.append('%d,%d,%d\n' % (start, end, c))

426

out.extend(lines)

427

return out

428

429

def annotate_iter(self, knit, version_id):

430

return annotate_knit(knit, version_id)

431

432

433

def make_empty_knit(transport, relpath):

434

"""Construct a empty knit at the specified location."""

435

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

436

437

438

class KnitVersionedFile(VersionedFile):

439

"""Weave-like structure with faster random access.

440

441

A knit stores a number of texts and a summary of the relationships

442

between them. Texts are identified by a string version-id. Texts

443

are normally stored and retrieved as a series of lines, but can

444

also be passed as single strings.

445

446

Lines are stored with the trailing newline (if any) included, to

447

avoid special cases for files with no final newline. Lines are

448

composed of 8-bit characters, not unicode. The combination of

449

these approaches should mean any 'binary' file can be safely

450

stored and retrieved.

451

"""

452

453

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

454

factory=None, delta=True, create=False, create_parent_dir=False,

455

delay_create=False, dir_mode=None, index=None, access_method=None):

456

"""Construct a knit at location specified by relpath.

457

458

:param create: If not True, only open an existing knit.

459

:param create_parent_dir: If True, create the parent directory if

460

creating the file fails. (This is used for stores with

461

hash-prefixes that may not exist yet)

462

:param delay_create: The calling code is aware that the knit won't

463

actually be created until the first data is stored.

464

:param index: An index to use for the knit.

465

"""

466

if access_mode is None:

467

access_mode = 'w'

468

super(KnitVersionedFile, self).__init__(access_mode)

469

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

470

self.transport = transport

471

self.filename = relpath

472

self.factory = factory or KnitAnnotateFactory()

473

self.writable = (access_mode == 'w')

474

self.delta = delta

475

476

self._max_delta_chain = 200

477

478

if index is None:

479

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

480

access_mode, create=create, file_mode=file_mode,

481

create_parent_dir=create_parent_dir, delay_create=delay_create,

482

dir_mode=dir_mode)

483

else:

484

self._index = index

485

if access_method is None:

486

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

487

((create and not len(self)) and delay_create), create_parent_dir)

488

else:

489

_access = access_method

490

if create and not len(self) and not delay_create:

491

_access.create()

492

self._data = _KnitData(_access)

493

494

def __repr__(self):

495

return '%s(%s)' % (self.__class__.__name__,

496

self.transport.abspath(self.filename))

497

498

def _check_should_delta(self, first_parents):

499

"""Iterate back through the parent listing, looking for a fulltext.

500

501

This is used when we want to decide whether to add a delta or a new

502

fulltext. It searches for _max_delta_chain parents. When it finds a

503

fulltext parent, it sees if the total size of the deltas leading up to

504

it is large enough to indicate that we want a new full text anyway.

505

506

Return True if we should create a new delta, False if we should use a

507

full text.

508

"""

509

delta_size = 0

510

fulltext_size = None

511

delta_parents = first_parents

512

for count in xrange(self._max_delta_chain):

513

parent = delta_parents[0]

514

method = self._index.get_method(parent)

515

index, pos, size = self._index.get_position(parent)

516

if method == 'fulltext':

517

fulltext_size = size

518

break

519

delta_size += size

520

delta_parents = self._index.get_parents(parent)

521

else:

522

# We couldn't find a fulltext, so we must create a new one

523

return False

524

525

return fulltext_size > delta_size

526

527

def _add_raw_records(self, records, data):

528

"""Add all the records 'records' with data pre-joined in 'data'.

529

530

:param records: A list of tuples(version_id, options, parents, size).

531

:param data: The data for the records. When it is written, the records

532

are adjusted to have pos pointing into data by the sum of

533

the preceding records sizes.

534

"""

535

# write all the data

536

raw_record_sizes = [record[3] for record in records]

537

positions = self._data.add_raw_records(raw_record_sizes, data)

538

offset = 0

539

index_entries = []

540

for (version_id, options, parents, size), access_memo in zip(

541

records, positions):

542

index_entries.append((version_id, options, access_memo, parents))

543

if self._data._do_cache:

544

self._data._cache[version_id] = data[offset:offset+size]

545

offset += size

546

self._index.add_versions(index_entries)

547

548

def enable_cache(self):

549

"""Start caching data for this knit"""

550

self._data.enable_cache()

551

552

def clear_cache(self):

553

"""Clear the data cache only."""

554

self._data.clear_cache()

555

556

def copy_to(self, name, transport):

557

"""See VersionedFile.copy_to()."""

558

# copy the current index to a temp index to avoid racing with local

559

# writes

560

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

561

self.transport.get(self._index._filename))

562

# copy the data file

563

f = self._data._open_file()

564

try:

565

transport.put_file(name + DATA_SUFFIX, f)

566

finally:

567

f.close()

568

# move the copied index into place

569

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

570

571

def create_empty(self, name, transport, mode=None):

572

return KnitVersionedFile(name, transport, factory=self.factory,

573

delta=self.delta, create=True)

574

575

def get_data_stream(self, required_versions):

576

"""Get a data stream for the specified versions.

577

578

Versions may be returned in any order, not necessarily the order

579

specified. They are returned in a partial order by compression

580

parent, so that the deltas can be applied as the data stream is

581

inserted; however note that compression parents will not be sent

582

unless they were specifically requested, as the client may already

583

have them.

584

585

:param required_versions: The exact set of versions to be extracted.

586

Unlike some other knit methods, this is not used to generate a

587

transitive closure, rather it is used precisely as given.

588

589

:returns: format_signature, list of (version, options, length, parents),

590

reader_callable.

591

"""

592

required_version_set = frozenset(required_versions)

593

version_index = {}

594

# list of revisions that can just be sent without waiting for their

595

# compression parent

596

ready_to_send = []

597

# map from revision to the children based on it

598

deferred = {}

599

# first, read all relevant index data, enough to sort into the right

600

# order to return

601

for version_id in required_versions:

602

if not self.has_version(version_id):

603

raise RevisionNotPresent(version_id, self.filename)

604

options = self._index.get_options(version_id)

605

parents = self._index.get_parents_with_ghosts(version_id)

606

index_memo = self._index.get_position(version_id)

607

version_index[version_id] = (index_memo, options, parents)

608

if parents and parents[0] in required_version_set:

609

# must wait until the parent has been sent

610

deferred.setdefault(parents[0], []). \

611

append(version_id)

612

else:

613

# either a fulltext, or a delta whose parent the client did

614

# not ask for and presumably already has

615

ready_to_send.append(version_id)

616

# build a list of results to return, plus instructions for data to

617

# read from the file

618

copy_queue_records = []

619

result_version_list = []

620

while ready_to_send:

621

# XXX: pushing and popping lists may be a bit inefficient

622

version_id = ready_to_send.pop(0)

623

(index_memo, options, parents) = version_index[version_id]

624

copy_queue_records.append((version_id, index_memo))

625

none, data_pos, data_size = index_memo

626

result_version_list.append((version_id, options, data_size,

627

parents))

628

if version_id in deferred:

629

# now we can send all the children of this revision - we could

630

# put them in anywhere, but we hope that sending them soon

631

# after the fulltext will give good locality in the receiver

632

ready_to_send[:0] = deferred.pop(version_id)

633

assert len(deferred) == 0, \

634

"Still have compressed child versions waiting to be sent"

635

# XXX:

636

# From here down to the return should really be logic in the returned

637

# callable -- in a class that adapts read_records_iter_raw to read

638

# requests.

639

raw_datum = []

640

for (version_id, raw_data), \

641

(version_id2, options, _, parents) in \

642

izip(self._data.read_records_iter_raw(copy_queue_records),

643

result_version_list):

644

assert version_id == version_id2, \

645

'logic error, inconsistent results'

646

raw_datum.append(raw_data)

647

pseudo_file = StringIO(''.join(raw_datum))

648

def read(length):

649

if length is None:

650

return pseudo_file.read()

651

else:

652

return pseudo_file.read(length)

653

return (self.get_format_signature(), result_version_list, read)

654

655

def _extract_blocks(self, version_id, source, target):

656

if self._index.get_method(version_id) != 'line-delta':

657

return None

658

parent, sha1, noeol, delta = self.get_delta(version_id)

659

return KnitContent.get_line_delta_blocks(delta, source, target)

660

661

def get_delta(self, version_id):

662

"""Get a delta for constructing version from some other version."""

663

self.check_not_reserved_id(version_id)

664

parents = self.get_parents(version_id)

665

if len(parents):

666

parent = parents[0]

667

else:

668

parent = None

669

index_memo = self._index.get_position(version_id)

670

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

671

noeol = 'no-eol' in self._index.get_options(version_id)

672

if 'fulltext' == self._index.get_method(version_id):

673

new_content = self.factory.parse_fulltext(data, version_id)

674

if parent is not None:

675

reference_content = self._get_content(parent)

676

old_texts = reference_content.text()

677

else:

678

old_texts = []

679

new_texts = new_content.text()

680

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

681

new_texts)

682

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

683

else:

684

delta = self.factory.parse_line_delta(data, version_id)

685

return parent, sha1, noeol, delta

686

687

def get_format_signature(self):

688

"""See VersionedFile.get_format_signature()."""

689

if self.factory.annotated:

690

annotated_part = "annotated"

691

else:

692

annotated_part = "plain"

693

return "knit-%s" % (annotated_part,)

694

695

def get_graph_with_ghosts(self):

696

"""See VersionedFile.get_graph_with_ghosts()."""

697

graph_items = self._index.get_graph()

698

return dict(graph_items)

699

700

def get_sha1(self, version_id):

701

return self.get_sha1s([version_id])[0]

702

703

def get_sha1s(self, version_ids):

704

"""See VersionedFile.get_sha1()."""

705

record_map = self._get_record_map(version_ids)

706

# record entry 2 is the 'digest'.

707

return [record_map[v][2] for v in version_ids]

708

709

@staticmethod

710

def get_suffixes():

711

"""See VersionedFile.get_suffixes()."""

712

return [DATA_SUFFIX, INDEX_SUFFIX]

713

714

def has_ghost(self, version_id):

715

"""True if there is a ghost reference in the file to version_id."""

716

# maybe we have it

717

if self.has_version(version_id):

718

return False

719

# optimisable if needed by memoising the _ghosts set.

720

items = self._index.get_graph()

721

for node, parents in items:

722

for parent in parents:

723

if parent not in self._index._cache:

724

if parent == version_id:

725

return True

726

return False

727

728

def insert_data_stream(self, (format, data_list, reader_callable)):

729

"""Insert knit records from a data stream into this knit.

730

731

If a version in the stream is already present in this knit, it will not

732

be inserted a second time. It will be checked for consistency with the

733

stored version however, and may cause a KnitCorrupt error to be raised

734

if the data in the stream disagrees with the already stored data.

735

736

:seealso: get_data_stream

737

"""

738

if format != self.get_format_signature():

739

trace.mutter('incompatible format signature inserting to %r', self)

740

raise KnitDataStreamIncompatible(

741

format, self.get_format_signature())

742

743

for version_id, options, length, parents in data_list:

744

if self.has_version(version_id):

745

# First check: the list of parents.

746

my_parents = self.get_parents_with_ghosts(version_id)

747

if my_parents != parents:

748

# XXX: KnitCorrupt is not quite the right exception here.

749

raise KnitCorrupt(

750

self.filename,

751

'parents list %r from data stream does not match '

752

'already recorded parents %r for %s'

753

% (parents, my_parents, version_id))

754

755

# Also check the SHA-1 of the fulltext this content will

756

# produce.

757

raw_data = reader_callable(length)

758

my_fulltext_sha1 = self.get_sha1(version_id)

759

df, rec = self._data._parse_record_header(version_id, raw_data)

760

stream_fulltext_sha1 = rec[3]

761

if my_fulltext_sha1 != stream_fulltext_sha1:

762

# Actually, we don't know if it's this knit that's corrupt,

763

# or the data stream we're trying to insert.

764

raise KnitCorrupt(

765

self.filename, 'sha-1 does not match %s' % version_id)

766

else:

767

if 'line-delta' in options:

768

# Make sure that this knit record is actually useful: a

769

# line-delta is no use unless we have its parent.

770

# Fetching from a broken repository with this problem

771

# shouldn't break the target repository.

772

if not self._index.has_version(parents[0]):

773

raise KnitCorrupt(

774

self.filename,

775

'line-delta from stream references '

776

'missing parent %s' % parents[0])

777

self._add_raw_records(

778

[(version_id, options, parents, length)],

779

reader_callable(length))

780

781

def versions(self):

782

"""See VersionedFile.versions."""

783

if 'evil' in debug.debug_flags:

784

trace.mutter_callsite(2, "versions scales with size of history")

785

return self._index.get_versions()

786

787

def has_version(self, version_id):

788

"""See VersionedFile.has_version."""

789

if 'evil' in debug.debug_flags:

790

trace.mutter_callsite(2, "has_version is a LBYL scenario")

791

return self._index.has_version(version_id)

792

793

__contains__ = has_version

794

795

def _merge_annotations(self, content, parents, parent_texts={},

796

delta=None, annotated=None,

797

left_matching_blocks=None):

798

"""Merge annotations for content. This is done by comparing

799

the annotations based on changed to the text.

800

"""

801

if left_matching_blocks is not None:

802

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

803

else:

804

delta_seq = None

805

if annotated:

806

for parent_id in parents:

807

merge_content = self._get_content(parent_id, parent_texts)

808

if (parent_id == parents[0] and delta_seq is not None):

809

seq = delta_seq

810

else:

811

seq = patiencediff.PatienceSequenceMatcher(

812

None, merge_content.text(), content.text())

813

for i, j, n in seq.get_matching_blocks():

814

if n == 0:

815

continue

816

# this appears to copy (origin, text) pairs across to the

817

# new content for any line that matches the last-checked

818

# parent.

819

content._lines[j:j+n] = merge_content._lines[i:i+n]

820

if delta:

821

if delta_seq is None:

822

reference_content = self._get_content(parents[0], parent_texts)

823

new_texts = content.text()

824

old_texts = reference_content.text()

825

delta_seq = patiencediff.PatienceSequenceMatcher(

826

None, old_texts, new_texts)

827

return self._make_line_delta(delta_seq, content)

828

829

def _make_line_delta(self, delta_seq, new_content):

830

"""Generate a line delta from delta_seq and new_content."""

831

diff_hunks = []

832

for op in delta_seq.get_opcodes():

833

if op[0] == 'equal':

834

continue

835

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

836

return diff_hunks

837

838

def _get_components_positions(self, version_ids):

839

"""Produce a map of position data for the components of versions.

840

841

This data is intended to be used for retrieving the knit records.

842

843

A dict of version_id to (method, data_pos, data_size, next) is

844

returned.

845

method is the way referenced data should be applied.

846

data_pos is the position of the data in the knit.

847

data_size is the size of the data in the knit.

848

next is the build-parent of the version, or None for fulltexts.

849

"""

850

component_data = {}

851

for version_id in version_ids:

852

cursor = version_id

853

854

while cursor is not None and cursor not in component_data:

855

method = self._index.get_method(cursor)

856

if method == 'fulltext':

857

next = None

858

else:

859

next = self.get_parents_with_ghosts(cursor)[0]

860

index_memo = self._index.get_position(cursor)

861

component_data[cursor] = (method, index_memo, next)

862

cursor = next

863

return component_data

864

865

def _get_content(self, version_id, parent_texts={}):

866

"""Returns a content object that makes up the specified

867

version."""

868

cached_version = parent_texts.get(version_id, None)

869

if cached_version is not None:

870

if not self.has_version(version_id):

871

raise RevisionNotPresent(version_id, self.filename)

872

return cached_version

873

874

text_map, contents_map = self._get_content_maps([version_id])

875

return contents_map[version_id]

876

877

def _check_versions_present(self, version_ids):

878

"""Check that all specified versions are present."""

879

self._index.check_versions_present(version_ids)

880

881

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

882

nostore_sha, random_id, check_content):

883

"""See VersionedFile.add_lines_with_ghosts()."""

884

self._check_add(version_id, lines, random_id, check_content)

885

return self._add(version_id, lines, parents, self.delta,

886

parent_texts, None, nostore_sha, random_id)

887

888

def _add_lines(self, version_id, parents, lines, parent_texts,

889

left_matching_blocks, nostore_sha, random_id, check_content):

890

"""See VersionedFile.add_lines."""

891

self._check_add(version_id, lines, random_id, check_content)

892

self._check_versions_present(parents)

893

return self._add(version_id, lines[:], parents, self.delta,

894

parent_texts, left_matching_blocks, nostore_sha, random_id)

895

896

def _check_add(self, version_id, lines, random_id, check_content):

897

"""check that version_id and lines are safe to add."""

898

if contains_whitespace(version_id):

899

raise InvalidRevisionId(version_id, self.filename)

900

self.check_not_reserved_id(version_id)

901

# Technically this could be avoided if we are happy to allow duplicate

902

# id insertion when other things than bzr core insert texts, but it

903

# seems useful for folk using the knit api directly to have some safety

904

# blanket that we can disable.

905

if not random_id and self.has_version(version_id):

906

raise RevisionAlreadyPresent(version_id, self.filename)

907

if check_content:

908

self._check_lines_not_unicode(lines)

909

self._check_lines_are_lines(lines)

910

911

def _add(self, version_id, lines, parents, delta, parent_texts,

912

left_matching_blocks, nostore_sha, random_id):

913

"""Add a set of lines on top of version specified by parents.

914

915

If delta is true, compress the text as a line-delta against

916

the first parent.

917

918

Any versions not present will be converted into ghosts.

919

"""

920

# first thing, if the content is something we don't need to store, find

921

# that out.

922

line_bytes = ''.join(lines)

923

digest = sha_string(line_bytes)

924

if nostore_sha == digest:

925

raise errors.ExistingContent

926

927

present_parents = []

928

if parent_texts is None:

929

parent_texts = {}

930

for parent in parents:

931

if self.has_version(parent):

932

present_parents.append(parent)

933

934

# can only compress against the left most present parent.

935

if (delta and

936

(len(present_parents) == 0 or

937

present_parents[0] != parents[0])):

938

delta = False

939

940

text_length = len(line_bytes)

941

options = []

942

if lines:

943

if lines[-1][-1] != '\n':

944

# copy the contents of lines.

945

lines = lines[:]

946

options.append('no-eol')

947

lines[-1] = lines[-1] + '\n'

948

line_bytes += '\n'

949

950

if delta:

951

# To speed the extract of texts the delta chain is limited

952

# to a fixed number of deltas. This should minimize both

953

# I/O and the time spend applying deltas.

954

delta = self._check_should_delta(present_parents)

955

956

assert isinstance(version_id, str)

957

content = self.factory.make(lines, version_id)

958

if delta or (self.factory.annotated and len(present_parents) > 0):

959

# Merge annotations from parent texts if needed.

960

delta_hunks = self._merge_annotations(content, present_parents,

961

parent_texts, delta, self.factory.annotated,

962

left_matching_blocks)

963

964

if delta:

965

options.append('line-delta')

966

store_lines = self.factory.lower_line_delta(delta_hunks)

967

size, bytes = self._data._record_to_data(version_id, digest,

968

store_lines)

969

else:

970

options.append('fulltext')

971

# isinstance is slower and we have no hierarchy.

972

if self.factory.__class__ == KnitPlainFactory:

973

# Use the already joined bytes saving iteration time in

974

# _record_to_data.

975

size, bytes = self._data._record_to_data(version_id, digest,

976

lines, [line_bytes])

977

else:

978

# get mixed annotation + content and feed it into the

979

# serialiser.

980

store_lines = self.factory.lower_fulltext(content)

981

size, bytes = self._data._record_to_data(version_id, digest,

982

store_lines)

983

984

access_memo = self._data.add_raw_records([size], bytes)[0]

985

self._index.add_versions(

986

((version_id, options, access_memo, parents),),

987

random_id=random_id)

988

return digest, text_length, content

989

990

def check(self, progress_bar=None):

991

"""See VersionedFile.check()."""

992

993

def _clone_text(self, new_version_id, old_version_id, parents):

994

"""See VersionedFile.clone_text()."""

995

# FIXME RBC 20060228 make fast by only inserting an index with null

996

# delta.

997

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

998

999

def get_lines(self, version_id):

1000

"""See VersionedFile.get_lines()."""

1001

return self.get_line_list([version_id])[0]

1002

1003

def _get_record_map(self, version_ids):

1004

"""Produce a dictionary of knit records.

1005

1006

The keys are version_ids, the values are tuples of (method, content,

1007

digest, next).

1008

method is the way the content should be applied.

1009

content is a KnitContent object.

1010

digest is the SHA1 digest of this version id after all steps are done

1011

next is the build-parent of the version, i.e. the leftmost ancestor.

1012

If the method is fulltext, next will be None.

1013

"""

1014

position_map = self._get_components_positions(version_ids)

1015

# c = component_id, m = method, i_m = index_memo, n = next

1016

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

1017

record_map = {}

1018

for component_id, content, digest in \

1019

self._data.read_records_iter(records):

1020

method, index_memo, next = position_map[component_id]

1021

record_map[component_id] = method, content, digest, next

1022

1023

return record_map

1024

1025

def get_text(self, version_id):

1026

"""See VersionedFile.get_text"""

1027

return self.get_texts([version_id])[0]

1028

1029

def get_texts(self, version_ids):

1030

return [''.join(l) for l in self.get_line_list(version_ids)]

1031

1032

def get_line_list(self, version_ids):

1033

"""Return the texts of listed versions as a list of strings."""

1034

for version_id in version_ids:

1035

self.check_not_reserved_id(version_id)

1036

text_map, content_map = self._get_content_maps(version_ids)

1037

return [text_map[v] for v in version_ids]

1038

1039

_get_lf_split_line_list = get_line_list

1040

1041

def _get_content_maps(self, version_ids):

1042

"""Produce maps of text and KnitContents

1043

1044

:return: (text_map, content_map) where text_map contains the texts for

1045

the requested versions and content_map contains the KnitContents.

1046

Both dicts take version_ids as their keys.

1047

"""

1048

# FUTURE: This function could be improved for the 'extract many' case

1049

# by tracking each component and only doing the copy when the number of

1050

# children than need to apply delta's to it is > 1 or it is part of the

1051

# final output.

1052

version_ids = list(version_ids)

1053

multiple_versions = len(version_ids) != 1

1054

record_map = self._get_record_map(version_ids)

1055

1056

text_map = {}

1057

content_map = {}

1058

final_content = {}

1059

for version_id in version_ids:

1060

components = []

1061

cursor = version_id

1062

while cursor is not None:

1063

method, data, digest, next = record_map[cursor]

1064

components.append((cursor, method, data, digest))

1065

if cursor in content_map:

1066

break

1067

cursor = next

1068

1069

content = None

1070

for component_id, method, data, digest in reversed(components):

1071

if component_id in content_map:

1072

content = content_map[component_id]

1073

else:

1074

if method == 'fulltext':

1075

assert content is None

1076

content = self.factory.parse_fulltext(data, version_id)

1077

elif method == 'line-delta':

1078

delta = self.factory.parse_line_delta(data, version_id)

1079

if multiple_versions:

1080

# only doing this when we want multiple versions

1081

# output avoids list copies - which reference and

1082

# dereference many strings.

1083

content = content.copy()

1084

content.apply_delta(delta, version_id)

1085

if multiple_versions:

1086

content_map[component_id] = content

1087

1088

if 'no-eol' in self._index.get_options(version_id):

1089

if multiple_versions:

1090

content = content.copy()

1091

content.strip_last_line_newline()

1092

final_content[version_id] = content

1093

1094

# digest here is the digest from the last applied component.

1095

text = content.text()

1096

actual_sha = sha_strings(text)

1097

if actual_sha != digest:

1098

raise KnitCorrupt(self.filename,

1099

'\n sha-1 %s'

1100

'\n of reconstructed text does not match'

1101

'\n expected %s'

1102

'\n for version %s' %

1103

(actual_sha, digest, version_id))

1104

text_map[version_id] = text

1105

return text_map, final_content

1106

1107

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1108

pb=None):

1109

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1110

if version_ids is None:

1111

version_ids = self.versions()

1112

if pb is None:

1113

pb = progress.DummyProgress()

1114

# we don't care about inclusions, the caller cares.

1115

# but we need to setup a list of records to visit.

1116

# we need version_id, position, length

1117

version_id_records = []

1118

requested_versions = set(version_ids)

1119

# filter for available versions

1120

for version_id in requested_versions:

1121

if not self.has_version(version_id):

1122

raise RevisionNotPresent(version_id, self.filename)

1123

# get a in-component-order queue:

1124

for version_id in self.versions():

1125

if version_id in requested_versions:

1126

index_memo = self._index.get_position(version_id)

1127

version_id_records.append((version_id, index_memo))

1128

1129

total = len(version_id_records)

1130

for version_idx, (version_id, data, sha_value) in \

1131

enumerate(self._data.read_records_iter(version_id_records)):

1132

pb.update('Walking content.', version_idx, total)

1133

method = self._index.get_method(version_id)

1134

1135

assert method in ('fulltext', 'line-delta')

1136

if method == 'fulltext':

1137

line_iterator = self.factory.get_fulltext_content(data)

1138

else:

1139

line_iterator = self.factory.get_linedelta_content(data)

1140

# XXX: It might be more efficient to yield (version_id,

1141

# line_iterator) in the future. However for now, this is a simpler

1142

# change to integrate into the rest of the codebase. RBC 20071110

1143

for line in line_iterator:

1144

yield line, version_id

1145

1146

pb.update('Walking content.', total, total)

1147

1148

def iter_parents(self, version_ids):

1149

"""Iterate through the parents for many version ids.

1150

1151

:param version_ids: An iterable yielding version_ids.

1152

:return: An iterator that yields (version_id, parents). Requested

1153

version_ids not present in the versioned file are simply skipped.

1154

The order is undefined, allowing for different optimisations in

1155

the underlying implementation.

1156

"""

1157

return self._index.iter_parents(version_ids)

1158

1159

def num_versions(self):

1160

"""See VersionedFile.num_versions()."""

1161

return self._index.num_versions()

1162

1163

__len__ = num_versions

1164

1165

def annotate_iter(self, version_id):

1166

"""See VersionedFile.annotate_iter."""

1167

return self.factory.annotate_iter(self, version_id)

1168

1169

def get_parents(self, version_id):

1170

"""See VersionedFile.get_parents."""

1171

# perf notes:

1172

# optimism counts!

1173

# 52554 calls in 1264 872 internal down from 3674

1174

try:

1175

return self._index.get_parents(version_id)

1176

except KeyError:

1177

raise RevisionNotPresent(version_id, self.filename)

1178

1179

def get_parents_with_ghosts(self, version_id):

1180

"""See VersionedFile.get_parents."""

1181

try:

1182

return self._index.get_parents_with_ghosts(version_id)

1183

except KeyError:

1184

raise RevisionNotPresent(version_id, self.filename)

1185

1186

def get_ancestry(self, versions, topo_sorted=True):

1187

"""See VersionedFile.get_ancestry."""

1188

if isinstance(versions, basestring):

1189

versions = [versions]

1190

if not versions:

1191

return []

1192

return self._index.get_ancestry(versions, topo_sorted)

1193

1194

def get_ancestry_with_ghosts(self, versions):

1195

"""See VersionedFile.get_ancestry_with_ghosts."""

1196

if isinstance(versions, basestring):

1197

versions = [versions]

1198

if not versions:

1199

return []

1200

return self._index.get_ancestry_with_ghosts(versions)

1201

1202

def plan_merge(self, ver_a, ver_b):

1203

"""See VersionedFile.plan_merge."""

1204

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1205

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1206

annotated_a = self.annotate(ver_a)

1207

annotated_b = self.annotate(ver_b)

1208

return merge._plan_annotate_merge(annotated_a, annotated_b,

1209

ancestors_a, ancestors_b)

1210

1211

1212

class _KnitComponentFile(object):

1213

"""One of the files used to implement a knit database"""

1214

1215

def __init__(self, transport, filename, mode, file_mode=None,

1216

create_parent_dir=False, dir_mode=None):

1217

self._transport = transport

1218

self._filename = filename

1219

self._mode = mode

1220

self._file_mode = file_mode

1221

self._dir_mode = dir_mode

1222

self._create_parent_dir = create_parent_dir

1223

self._need_to_create = False

1224

1225

def _full_path(self):

1226

"""Return the full path to this file."""

1227

return self._transport.base + self._filename

1228

1229

def check_header(self, fp):

1230

line = fp.readline()

1231

if line == '':

1232

# An empty file can actually be treated as though the file doesn't

1233

# exist yet.

1234

raise errors.NoSuchFile(self._full_path())

1235

if line != self.HEADER:

1236

raise KnitHeaderError(badline=line,

1237

filename=self._transport.abspath(self._filename))

1238

1239

def __repr__(self):

1240

return '%s(%s)' % (self.__class__.__name__, self._filename)

1241

1242

1243

class _KnitIndex(_KnitComponentFile):

1244

"""Manages knit index file.

1245

1246

The index is already kept in memory and read on startup, to enable

1247

fast lookups of revision information. The cursor of the index

1248

file is always pointing to the end, making it easy to append

1249

entries.

1250

1251

_cache is a cache for fast mapping from version id to a Index

1252

object.

1253

1254

_history is a cache for fast mapping from indexes to version ids.

1255

1256

The index data format is dictionary compressed when it comes to

1257

parent references; a index entry may only have parents that with a

1258

lover index number. As a result, the index is topological sorted.

1259

1260

Duplicate entries may be written to the index for a single version id

1261

if this is done then the latter one completely replaces the former:

1262

this allows updates to correct version and parent information.

1263

Note that the two entries may share the delta, and that successive

1264

annotations and references MUST point to the first entry.

1265

1266

The index file on disc contains a header, followed by one line per knit

1267

record. The same revision can be present in an index file more than once.

1268

The first occurrence gets assigned a sequence number starting from 0.

1269

1270

The format of a single line is

1271

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1272

REVISION_ID is a utf8-encoded revision id

1273

FLAGS is a comma separated list of flags about the record. Values include

1274

no-eol, line-delta, fulltext.

1275

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1276

that the the compressed data starts at.

1277

LENGTH is the ascii representation of the length of the data file.

1278

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1279

REVISION_ID.

1280

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1281

revision id already in the knit that is a parent of REVISION_ID.

1282

The ' :' marker is the end of record marker.

1283

1284

partial writes:

1285

when a write is interrupted to the index file, it will result in a line

1286

that does not end in ' :'. If the ' :' is not present at the end of a line,

1287

or at the end of the file, then the record that is missing it will be

1288

ignored by the parser.

1289

1290

When writing new records to the index file, the data is preceded by '\n'

1291

to ensure that records always start on new lines even if the last write was

1292

interrupted. As a result its normal for the last line in the index to be

1293

missing a trailing newline. One can be added with no harmful effects.

1294

"""

1295

1296

HEADER = "# bzr knit index 8\n"

1297

1298

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1299

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1300

1301

def _cache_version(self, version_id, options, pos, size, parents):

1302

"""Cache a version record in the history array and index cache.

1303

1304

This is inlined into _load_data for performance. KEEP IN SYNC.

1305

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1306

indexes).

1307

"""

1308

# only want the _history index to reference the 1st index entry

1309

# for version_id

1310

if version_id not in self._cache:

1311

index = len(self._history)

1312

self._history.append(version_id)

1313

else:

1314

index = self._cache[version_id][5]

1315

self._cache[version_id] = (version_id,

1316

options,

1317

pos,

1318

size,

1319

parents,

1320

index)

1321

1322

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1323

create_parent_dir=False, delay_create=False, dir_mode=None):

1324

_KnitComponentFile.__init__(self, transport, filename, mode,

1325

file_mode=file_mode,

1326

create_parent_dir=create_parent_dir,

1327

dir_mode=dir_mode)

1328

self._cache = {}

1329

# position in _history is the 'official' index for a revision

1330

# but the values may have come from a newer entry.

1331

# so - wc -l of a knit index is != the number of unique names

1332

# in the knit.

1333

self._history = []

1334

try:

1335

fp = self._transport.get(self._filename)

1336

try:

1337

# _load_data may raise NoSuchFile if the target knit is

1338

# completely empty.

1339

_load_data(self, fp)

1340

finally:

1341

fp.close()

1342

except NoSuchFile:

1343

if mode != 'w' or not create:

1344

raise

1345

elif delay_create:

1346

self._need_to_create = True

1347

else:

1348

self._transport.put_bytes_non_atomic(

1349

self._filename, self.HEADER, mode=self._file_mode)

1350

1351

def get_graph(self):

1352

"""Return a list of the node:parents lists from this knit index."""

1353

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1354

1355

def get_ancestry(self, versions, topo_sorted=True):

1356

"""See VersionedFile.get_ancestry."""

1357

# get a graph of all the mentioned versions:

1358

graph = {}

1359

pending = set(versions)

1360

cache = self._cache

1361

while pending:

1362

version = pending.pop()

1363

# trim ghosts

1364

try:

1365

parents = [p for p in cache[version][4] if p in cache]

1366

except KeyError:

1367

raise RevisionNotPresent(version, self._filename)

1368

# if not completed and not a ghost

1369

pending.update([p for p in parents if p not in graph])

1370

graph[version] = parents

1371

if not topo_sorted:

1372

return graph.keys()

1373

return topo_sort(graph.items())

1374

1375

def get_ancestry_with_ghosts(self, versions):

1376

"""See VersionedFile.get_ancestry_with_ghosts."""

1377

# get a graph of all the mentioned versions:

1378

self.check_versions_present(versions)

1379

cache = self._cache

1380

graph = {}

1381

pending = set(versions)

1382

while pending:

1383

version = pending.pop()

1384

try:

1385

parents = cache[version][4]

1386

except KeyError:

1387

# ghost, fake it

1388

graph[version] = []

1389

else:

1390

# if not completed

1391

pending.update([p for p in parents if p not in graph])

1392

graph[version] = parents

1393

return topo_sort(graph.items())

1394

1395

def iter_parents(self, version_ids):

1396

"""Iterate through the parents for many version ids.

1397

1398

:param version_ids: An iterable yielding version_ids.

1399

:return: An iterator that yields (version_id, parents). Requested

1400

version_ids not present in the versioned file are simply skipped.

1401

The order is undefined, allowing for different optimisations in

1402

the underlying implementation.

1403

"""

1404

for version_id in version_ids:

1405

try:

1406

yield version_id, tuple(self.get_parents(version_id))

1407

except KeyError:

1408

pass

1409

1410

def num_versions(self):

1411

return len(self._history)

1412

1413

__len__ = num_versions

1414

1415

def get_versions(self):

1416

"""Get all the versions in the file. not topologically sorted."""

1417

return self._history

1418

1419

def _version_list_to_index(self, versions):

1420

result_list = []

1421

cache = self._cache

1422

for version in versions:

1423

if version in cache:

1424

# -- inlined lookup() --

1425

result_list.append(str(cache[version][5]))

1426

# -- end lookup () --

1427

else:

1428

result_list.append('.' + version)

1429

return ' '.join(result_list)

1430

1431

def add_version(self, version_id, options, index_memo, parents):

1432

"""Add a version record to the index."""

1433

self.add_versions(((version_id, options, index_memo, parents),))

1434

1435

def add_versions(self, versions, random_id=False):

1436

"""Add multiple versions to the index.

1437

1438

:param versions: a list of tuples:

1439

(version_id, options, pos, size, parents).

1440

:param random_id: If True the ids being added were randomly generated

1441

and no check for existence will be performed.

1442

"""

1443

lines = []

1444

orig_history = self._history[:]

1445

orig_cache = self._cache.copy()

1446

1447

try:

1448

for version_id, options, (index, pos, size), parents in versions:

1449

line = "\n%s %s %s %s %s :" % (version_id,

1450

','.join(options),

1451

pos,

1452

size,

1453

self._version_list_to_index(parents))

1454

assert isinstance(line, str), \

1455

'content must be utf-8 encoded: %r' % (line,)

1456

lines.append(line)

1457

self._cache_version(version_id, options, pos, size, parents)

1458

if not self._need_to_create:

1459

self._transport.append_bytes(self._filename, ''.join(lines))

1460

else:

1461

sio = StringIO()

1462

sio.write(self.HEADER)

1463

sio.writelines(lines)

1464

sio.seek(0)

1465

self._transport.put_file_non_atomic(self._filename, sio,

1466

create_parent_dir=self._create_parent_dir,

1467

mode=self._file_mode,

1468

dir_mode=self._dir_mode)

1469

self._need_to_create = False

1470

except:

1471

# If any problems happen, restore the original values and re-raise

1472

self._history = orig_history

1473

self._cache = orig_cache

1474

raise

1475

1476

def has_version(self, version_id):

1477

"""True if the version is in the index."""

1478

return version_id in self._cache

1479

1480

def get_position(self, version_id):

1481

"""Return details needed to access the version.

1482

1483

.kndx indices do not support split-out data, so return None for the

1484

index field.

1485

1486

:return: a tuple (None, data position, size) to hand to the access

1487

logic to get the record.

1488

"""

1489

entry = self._cache[version_id]

1490

return None, entry[2], entry[3]

1491

1492

def get_method(self, version_id):

1493

"""Return compression method of specified version."""

1494

try:

1495

options = self._cache[version_id][1]

1496

except KeyError:

1497

raise RevisionNotPresent(version_id, self._filename)

1498

if 'fulltext' in options:

1499

return 'fulltext'

1500

else:

1501

if 'line-delta' not in options:

1502

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1503

return 'line-delta'

1504

1505

def get_options(self, version_id):

1506

"""Return a string represention options.

1507

1508

e.g. foo,bar

1509

"""

1510

return self._cache[version_id][1]

1511

1512

def get_parents(self, version_id):

1513

"""Return parents of specified version ignoring ghosts."""

1514

return [parent for parent in self._cache[version_id][4]

1515

if parent in self._cache]

1516

1517

def get_parents_with_ghosts(self, version_id):

1518

"""Return parents of specified version with ghosts."""

1519

return self._cache[version_id][4]

1520

1521

def check_versions_present(self, version_ids):

1522

"""Check that all specified versions are present."""

1523

cache = self._cache

1524

for version_id in version_ids:

1525

if version_id not in cache:

1526

raise RevisionNotPresent(version_id, self._filename)

1527

1528

1529

class KnitGraphIndex(object):

1530

"""A knit index that builds on GraphIndex."""

1531

1532

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1533

"""Construct a KnitGraphIndex on a graph_index.

1534

1535

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1536

:param deltas: Allow delta-compressed records.

1537

:param add_callback: If not None, allow additions to the index and call

1538

this callback with a list of added GraphIndex nodes:

1539

[(node, value, node_refs), ...]

1540

:param parents: If True, record knits parents, if not do not record

1541

parents.

1542

"""

1543

self._graph_index = graph_index

1544

self._deltas = deltas

1545

self._add_callback = add_callback

1546

self._parents = parents

1547

if deltas and not parents:

1548

raise KnitCorrupt(self, "Cannot do delta compression without "

1549

"parent tracking.")

1550

1551

def _get_entries(self, keys, check_present=False):

1552

"""Get the entries for keys.

1553

1554

:param keys: An iterable of index keys, - 1-tuples.

1555

"""

1556

keys = set(keys)

1557

found_keys = set()

1558

if self._parents:

1559

for node in self._graph_index.iter_entries(keys):

1560

yield node

1561

found_keys.add(node[1])

1562

else:

1563

# adapt parentless index to the rest of the code.

1564

for node in self._graph_index.iter_entries(keys):

1565

yield node[0], node[1], node[2], ()

1566

found_keys.add(node[1])

1567

if check_present:

1568

missing_keys = keys.difference(found_keys)

1569

if missing_keys:

1570

raise RevisionNotPresent(missing_keys.pop(), self)

1571

1572

def _present_keys(self, version_ids):

1573

return set([

1574

node[1] for node in self._get_entries(version_ids)])

1575

1576

def _parentless_ancestry(self, versions):

1577

"""Honour the get_ancestry API for parentless knit indices."""

1578

wanted_keys = self._version_ids_to_keys(versions)

1579

present_keys = self._present_keys(wanted_keys)

1580

missing = set(wanted_keys).difference(present_keys)

1581

if missing:

1582

raise RevisionNotPresent(missing.pop(), self)

1583

return list(self._keys_to_version_ids(present_keys))

1584

1585

def get_ancestry(self, versions, topo_sorted=True):

1586

"""See VersionedFile.get_ancestry."""

1587

if not self._parents:

1588

return self._parentless_ancestry(versions)

1589

# XXX: This will do len(history) index calls - perhaps

1590

# it should be altered to be a index core feature?

1591

# get a graph of all the mentioned versions:

1592

graph = {}

1593

ghosts = set()

1594

versions = self._version_ids_to_keys(versions)

1595

pending = set(versions)

1596

while pending:

1597

# get all pending nodes

1598

this_iteration = pending

1599

new_nodes = self._get_entries(this_iteration)

1600

found = set()

1601

pending = set()

1602

for (index, key, value, node_refs) in new_nodes:

1603

# dont ask for ghosties - otherwise

1604

# we we can end up looping with pending

1605

# being entirely ghosted.

1606

graph[key] = [parent for parent in node_refs[0]

1607

if parent not in ghosts]

1608

# queue parents

1609

for parent in graph[key]:

1610

# dont examine known nodes again

1611

if parent in graph:

1612

continue

1613

pending.add(parent)

1614

found.add(key)

1615

ghosts.update(this_iteration.difference(found))

1616

if versions.difference(graph):

1617

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1618

if topo_sorted:

1619

result_keys = topo_sort(graph.items())

1620

else:

1621

result_keys = graph.iterkeys()

1622

return [key[0] for key in result_keys]

1623

1624

def get_ancestry_with_ghosts(self, versions):

1625

"""See VersionedFile.get_ancestry."""

1626

if not self._parents:

1627

return self._parentless_ancestry(versions)

1628

# XXX: This will do len(history) index calls - perhaps

1629

# it should be altered to be a index core feature?

1630

# get a graph of all the mentioned versions:

1631

graph = {}

1632

versions = self._version_ids_to_keys(versions)

1633

pending = set(versions)

1634

while pending:

1635

# get all pending nodes

1636

this_iteration = pending

1637

new_nodes = self._get_entries(this_iteration)

1638

pending = set()

1639

for (index, key, value, node_refs) in new_nodes:

1640

graph[key] = node_refs[0]

1641

# queue parents

1642

for parent in graph[key]:

1643

# dont examine known nodes again

1644

if parent in graph:

1645

continue

1646

pending.add(parent)

1647

missing_versions = this_iteration.difference(graph)

1648

missing_needed = versions.intersection(missing_versions)

1649

if missing_needed:

1650

raise RevisionNotPresent(missing_needed.pop(), self)

1651

for missing_version in missing_versions:

1652

# add a key, no parents

1653

graph[missing_version] = []

1654

pending.discard(missing_version) # don't look for it

1655

result_keys = topo_sort(graph.items())

1656

return [key[0] for key in result_keys]

1657

1658

def get_graph(self):

1659

"""Return a list of the node:parents lists from this knit index."""

1660

if not self._parents:

1661

return [(key, ()) for key in self.get_versions()]

1662

result = []

1663

for index, key, value, refs in self._graph_index.iter_all_entries():

1664

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1665

return result

1666

1667

def iter_parents(self, version_ids):

1668

"""Iterate through the parents for many version ids.

1669

1670

:param version_ids: An iterable yielding version_ids.

1671

:return: An iterator that yields (version_id, parents). Requested

1672

version_ids not present in the versioned file are simply skipped.

1673

The order is undefined, allowing for different optimisations in

1674

the underlying implementation.

1675

"""

1676

if self._parents:

1677

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1678

all_parents = set()

1679

present_parents = set()

1680

for node in all_nodes:

1681

all_parents.update(node[3][0])

1682

# any node we are querying must be present

1683

present_parents.add(node[1])

1684

unknown_parents = all_parents.difference(present_parents)

1685

present_parents.update(self._present_keys(unknown_parents))

1686

for node in all_nodes:

1687

parents = []

1688

for parent in node[3][0]:

1689

if parent in present_parents:

1690

parents.append(parent[0])

1691

yield node[1][0], tuple(parents)

1692

else:

1693

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1694

yield node[1][0], ()

1695

1696

def num_versions(self):

1697

return len(list(self._graph_index.iter_all_entries()))

1698

1699

__len__ = num_versions

1700

1701

def get_versions(self):

1702

"""Get all the versions in the file. not topologically sorted."""

1703

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1704

1705

def has_version(self, version_id):

1706

"""True if the version is in the index."""

1707

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1708

1709

def _keys_to_version_ids(self, keys):

1710

return tuple(key[0] for key in keys)

1711

1712

def get_position(self, version_id):

1713

"""Return details needed to access the version.

1714

1715

:return: a tuple (index, data position, size) to hand to the access

1716

logic to get the record.

1717

"""

1718

node = self._get_node(version_id)

1719

bits = node[2][1:].split(' ')

1720

return node[0], int(bits[0]), int(bits[1])

1721

1722

def get_method(self, version_id):

1723

"""Return compression method of specified version."""

1724

if not self._deltas:

1725

return 'fulltext'

1726

return self._parent_compression(self._get_node(version_id)[3][1])

1727

1728

def _parent_compression(self, reference_list):

1729

# use the second reference list to decide if this is delta'd or not.

1730

if len(reference_list):

1731

return 'line-delta'

1732

else:

1733

return 'fulltext'

1734

1735

def _get_node(self, version_id):

1736

try:

1737

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1738

except IndexError:

1739

raise RevisionNotPresent(version_id, self)

1740

1741

def get_options(self, version_id):

1742

"""Return a string represention options.

1743

1744

e.g. foo,bar

1745

"""

1746

node = self._get_node(version_id)

1747

if not self._deltas:

1748

options = ['fulltext']

1749

else:

1750

options = [self._parent_compression(node[3][1])]

1751

if node[2][0] == 'N':

1752

options.append('no-eol')

1753

return options

1754

1755

def get_parents(self, version_id):

1756

"""Return parents of specified version ignoring ghosts."""

1757

parents = list(self.iter_parents([version_id]))

1758

if not parents:

1759

# missing key

1760

raise errors.RevisionNotPresent(version_id, self)

1761

return parents[0][1]

1762

1763

def get_parents_with_ghosts(self, version_id):

1764

"""Return parents of specified version with ghosts."""

1765

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1766

check_present=True))

1767

if not self._parents:

1768

return ()

1769

return self._keys_to_version_ids(nodes[0][3][0])

1770

1771

def check_versions_present(self, version_ids):

1772

"""Check that all specified versions are present."""

1773

keys = self._version_ids_to_keys(version_ids)

1774

present = self._present_keys(keys)

1775

missing = keys.difference(present)

1776

if missing:

1777

raise RevisionNotPresent(missing.pop(), self)

1778

1779

def add_version(self, version_id, options, access_memo, parents):

1780

"""Add a version record to the index."""

1781

return self.add_versions(((version_id, options, access_memo, parents),))

1782

1783

def add_versions(self, versions, random_id=False):

1784

"""Add multiple versions to the index.

1785

1786

This function does not insert data into the Immutable GraphIndex

1787

backing the KnitGraphIndex, instead it prepares data for insertion by

1788

the caller and checks that it is safe to insert then calls

1789

self._add_callback with the prepared GraphIndex nodes.

1790

1791

:param versions: a list of tuples:

1792

(version_id, options, pos, size, parents).

1793

:param random_id: If True the ids being added were randomly generated

1794

and no check for existence will be performed.

1795

"""

1796

if not self._add_callback:

1797

raise errors.ReadOnlyError(self)

1798

# we hope there are no repositories with inconsistent parentage

1799

# anymore.

1800

# check for dups

1801

1802

keys = {}

1803

for (version_id, options, access_memo, parents) in versions:

1804

index, pos, size = access_memo

1805

key = (version_id, )

1806

parents = tuple((parent, ) for parent in parents)

1807

if 'no-eol' in options:

1808

value = 'N'

1809

else:

1810

value = ' '

1811

value += "%d %d" % (pos, size)

1812

if not self._deltas:

1813

if 'line-delta' in options:

1814

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1815

if self._parents:

1816

if self._deltas:

1817

if 'line-delta' in options:

1818

node_refs = (parents, (parents[0],))

1819

else:

1820

node_refs = (parents, ())

1821

else:

1822

node_refs = (parents, )

1823

else:

1824

if parents:

1825

raise KnitCorrupt(self, "attempt to add node with parents "

1826

"in parentless index.")

1827

node_refs = ()

1828

keys[key] = (value, node_refs)

1829

if not random_id:

1830

present_nodes = self._get_entries(keys)

1831

for (index, key, value, node_refs) in present_nodes:

1832

if (value, node_refs) != keys[key]:

1833

raise KnitCorrupt(self, "inconsistent details in add_versions"

1834

": %s %s" % ((value, node_refs), keys[key]))

1835

del keys[key]

1836

result = []

1837

if self._parents:

1838

for key, (value, node_refs) in keys.iteritems():

1839

result.append((key, value, node_refs))

1840

else:

1841

for key, (value, node_refs) in keys.iteritems():

1842

result.append((key, value))

1843

self._add_callback(result)

1844

1845

def _version_ids_to_keys(self, version_ids):

1846

return set((version_id, ) for version_id in version_ids)

1847

1848

1849

class _KnitAccess(object):

1850

"""Access to knit records in a .knit file."""

1851

1852

def __init__(self, transport, filename, _file_mode, _dir_mode,

1853

_need_to_create, _create_parent_dir):

1854

"""Create a _KnitAccess for accessing and inserting data.

1855

1856

:param transport: The transport the .knit is located on.

1857

:param filename: The filename of the .knit.

1858

"""

1859

self._transport = transport

1860

self._filename = filename

1861

self._file_mode = _file_mode

1862

self._dir_mode = _dir_mode

1863

self._need_to_create = _need_to_create

1864

self._create_parent_dir = _create_parent_dir

1865

1866

def add_raw_records(self, sizes, raw_data):

1867

"""Add raw knit bytes to a storage area.

1868

1869

The data is spooled to whereever the access method is storing data.

1870

1871

:param sizes: An iterable containing the size of each raw data segment.

1872

:param raw_data: A bytestring containing the data.

1873

:return: A list of memos to retrieve the record later. Each memo is a

1874

tuple - (index, pos, length), where the index field is always None

1875

for the .knit access method.

1876

"""

1877

assert type(raw_data) == str, \

1878

'data must be plain bytes was %s' % type(raw_data)

1879

if not self._need_to_create:

1880

base = self._transport.append_bytes(self._filename, raw_data)

1881

else:

1882

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1883

create_parent_dir=self._create_parent_dir,

1884

mode=self._file_mode,

1885

dir_mode=self._dir_mode)

1886

self._need_to_create = False

1887

base = 0

1888

result = []

1889

for size in sizes:

1890

result.append((None, base, size))

1891

base += size

1892

return result

1893

1894

def create(self):

1895

"""IFF this data access has its own storage area, initialise it.

1896

1897

:return: None.

1898

"""

1899

self._transport.put_bytes_non_atomic(self._filename, '',

1900

mode=self._file_mode)

1901

1902

def open_file(self):

1903

"""IFF this data access can be represented as a single file, open it.

1904

1905

For knits that are not mapped to a single file on disk this will

1906

always return None.

1907

1908

:return: None or a file handle.

1909

"""

1910

try:

1911

return self._transport.get(self._filename)

1912

except NoSuchFile:

1913

pass

1914

return None

1915

1916

def get_raw_records(self, memos_for_retrieval):

1917

"""Get the raw bytes for a records.

1918

1919

:param memos_for_retrieval: An iterable containing the (index, pos,

1920

length) memo for retrieving the bytes. The .knit method ignores

1921

the index as there is always only a single file.

1922

:return: An iterator over the bytes of the records.

1923

"""

1924

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1925

for pos, data in self._transport.readv(self._filename, read_vector):

1926

yield data

1927

1928

1929

class _PackAccess(object):

1930

"""Access to knit records via a collection of packs."""

1931

1932

def __init__(self, index_to_packs, writer=None):

1933

"""Create a _PackAccess object.

1934

1935

:param index_to_packs: A dict mapping index objects to the transport

1936

and file names for obtaining data.

1937

:param writer: A tuple (pack.ContainerWriter, write_index) which

1938

contains the pack to write, and the index that reads from it will

1939

be associated with.

1940

"""

1941

if writer:

1942

self.container_writer = writer[0]

1943

self.write_index = writer[1]

1944

else:

1945

self.container_writer = None

1946

self.write_index = None

1947

self.indices = index_to_packs

1948

1949

def add_raw_records(self, sizes, raw_data):

1950

"""Add raw knit bytes to a storage area.

1951

1952

The data is spooled to the container writer in one bytes-record per

1953

raw data item.

1954

1955

:param sizes: An iterable containing the size of each raw data segment.

1956

:param raw_data: A bytestring containing the data.

1957

:return: A list of memos to retrieve the record later. Each memo is a

1958

tuple - (index, pos, length), where the index field is the

1959

write_index object supplied to the PackAccess object.

1960

"""

1961

assert type(raw_data) == str, \

1962

'data must be plain bytes was %s' % type(raw_data)

1963

result = []

1964

offset = 0

1965

for size in sizes:

1966

p_offset, p_length = self.container_writer.add_bytes_record(

1967

raw_data[offset:offset+size], [])

1968

offset += size

1969

result.append((self.write_index, p_offset, p_length))

1970

return result

1971

1972

def create(self):

1973

"""Pack based knits do not get individually created."""

1974

1975

def get_raw_records(self, memos_for_retrieval):

1976

"""Get the raw bytes for a records.

1977

1978

:param memos_for_retrieval: An iterable containing the (index, pos,

1979

length) memo for retrieving the bytes. The Pack access method

1980

looks up the pack to use for a given record in its index_to_pack

1981

map.

1982

:return: An iterator over the bytes of the records.

1983

"""

1984

# first pass, group into same-index requests

1985

request_lists = []

1986

current_index = None

1987

for (index, offset, length) in memos_for_retrieval:

1988

if current_index == index:

1989

current_list.append((offset, length))

1990

else:

1991

if current_index is not None:

1992

request_lists.append((current_index, current_list))

1993

current_index = index

1994

current_list = [(offset, length)]

1995

# handle the last entry

1996

if current_index is not None:

1997

request_lists.append((current_index, current_list))

1998

for index, offsets in request_lists:

1999

transport, path = self.indices[index]

2000

reader = pack.make_readv_reader(transport, path, offsets)

2001

for names, read_func in reader.iter_records():

2002

yield read_func(None)

2003

2004

def open_file(self):

2005

"""Pack based knits have no single file."""

2006

return None

2007

2008

def set_writer(self, writer, index, (transport, packname)):

2009

"""Set a writer to use for adding data."""

2010

if index is not None:

2011

self.indices[index] = (transport, packname)

2012

self.container_writer = writer

2013

self.write_index = index

2014

2015

2016

class _KnitData(object):

2017

"""Manage extraction of data from a KnitAccess, caching and decompressing.

2018

2019

The KnitData class provides the logic for parsing and using knit records,

2020

making use of an access method for the low level read and write operations.

2021

"""

2022

2023

def __init__(self, access):

2024

"""Create a KnitData object.

2025

2026

:param access: The access method to use. Access methods such as

2027

_KnitAccess manage the insertion of raw records and the subsequent

2028

retrieval of the same.

2029

"""

2030

self._access = access

2031

self._checked = False

2032

# TODO: jam 20060713 conceptually, this could spill to disk

2033

# if the cached size gets larger than a certain amount

2034

# but it complicates the model a bit, so for now just use

2035

# a simple dictionary

2036

self._cache = {}

2037

self._do_cache = False

2038

2039

def enable_cache(self):

2040

"""Enable caching of reads."""

2041

self._do_cache = True

2042

2043

def clear_cache(self):

2044

"""Clear the record cache."""

2045

self._do_cache = False

2046

self._cache = {}

2047

2048

def _open_file(self):

2049

return self._access.open_file()

2050

2051

def _record_to_data(self, version_id, digest, lines, dense_lines=None):

2052

"""Convert version_id, digest, lines into a raw data block.

2053

2054

:param dense_lines: The bytes of lines but in a denser form. For

2055

instance, if lines is a list of 1000 bytestrings each ending in \n,

2056

dense_lines may be a list with one line in it, containing all the

2057

1000's lines and their \n's. Using dense_lines if it is already

2058

known is a win because the string join to create bytes in this

2059

function spends less time resizing the final string.

2060

:return: (len, a StringIO instance with the raw data ready to read.)

2061

"""

2062

# Note: using a string copy here increases memory pressure with e.g.

2063

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

2064

# when doing the initial commit of a mozilla tree. RBC 20070921

2065

bytes = ''.join(chain(

2066

["version %s %d %s\n" % (version_id,

2067

len(lines),

2068

digest)],

2069

dense_lines or lines,

2070

["end %s\n" % version_id]))

2071

assert bytes.__class__ == str

2072

compressed_bytes = bytes_to_gzip(bytes)

2073

return len(compressed_bytes), compressed_bytes

2074

2075

def add_raw_records(self, sizes, raw_data):

2076

"""Append a prepared record to the data file.

2077

2078

:param sizes: An iterable containing the size of each raw data segment.

2079

:param raw_data: A bytestring containing the data.

2080

:return: a list of index data for the way the data was stored.

2081

See the access method add_raw_records documentation for more

2082

details.

2083

"""

2084

return self._access.add_raw_records(sizes, raw_data)

2085

2086

def _parse_record_header(self, version_id, raw_data):

2087

"""Parse a record header for consistency.

2088

2089

:return: the header and the decompressor stream.

2090

as (stream, header_record)

2091

"""

2092

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2093

try:

2094

rec = self._check_header(version_id, df.readline())

2095

except Exception, e:

2096

raise KnitCorrupt(self._access,

2097

"While reading {%s} got %s(%s)"

2098

% (version_id, e.__class__.__name__, str(e)))

2099

return df, rec

2100

2101

def _check_header(self, version_id, line):

2102

rec = line.split()

2103

if len(rec) != 4:

2104

raise KnitCorrupt(self._access,

2105

'unexpected number of elements in record header')

2106

if rec[1] != version_id:

2107

raise KnitCorrupt(self._access,

2108

'unexpected version, wanted %r, got %r'

2109

% (version_id, rec[1]))

2110

return rec

2111

2112

def _parse_record(self, version_id, data):

2113

# profiling notes:

2114

# 4168 calls in 2880 217 internal

2115

# 4168 calls to _parse_record_header in 2121

2116

# 4168 calls to readlines in 330

2117

df = GzipFile(mode='rb', fileobj=StringIO(data))

2118

2119

try:

2120

record_contents = df.readlines()

2121

except Exception, e:

2122

raise KnitCorrupt(self._access,

2123

"While reading {%s} got %s(%s)"

2124

% (version_id, e.__class__.__name__, str(e)))

2125

header = record_contents.pop(0)

2126

rec = self._check_header(version_id, header)

2127

2128

last_line = record_contents.pop()

2129

if len(record_contents) != int(rec[2]):

2130

raise KnitCorrupt(self._access,

2131

'incorrect number of lines %s != %s'

2132

' for version {%s}'

2133

% (len(record_contents), int(rec[2]),

2134

version_id))

2135

if last_line != 'end %s\n' % rec[1]:

2136

raise KnitCorrupt(self._access,

2137

'unexpected version end line %r, wanted %r'

2138

% (last_line, version_id))

2139

df.close()

2140

return record_contents, rec[3]

2141

2142

def read_records_iter_raw(self, records):

2143

"""Read text records from data file and yield raw data.

2144

2145

This unpacks enough of the text record to validate the id is

2146

as expected but thats all.

2147

"""

2148

# setup an iterator of the external records:

2149

# uses readv so nice and fast we hope.

2150

if len(records):

2151

# grab the disk data needed.

2152

if self._cache:

2153

# Don't check _cache if it is empty

2154

needed_offsets = [index_memo for version_id, index_memo

2155

in records

2156

if version_id not in self._cache]

2157

else:

2158

needed_offsets = [index_memo for version_id, index_memo

2159

in records]

2160

2161

raw_records = self._access.get_raw_records(needed_offsets)

2162

2163

for version_id, index_memo in records:

2164

if version_id in self._cache:

2165

# This data has already been validated

2166

data = self._cache[version_id]

2167

else:

2168

data = raw_records.next()

2169

if self._do_cache:

2170

self._cache[version_id] = data

2171

2172

# validate the header

2173

df, rec = self._parse_record_header(version_id, data)

2174

df.close()

2175

yield version_id, data

2176

2177

def read_records_iter(self, records):

2178

"""Read text records from data file and yield result.

2179

2180

The result will be returned in whatever is the fastest to read.

2181

Not by the order requested. Also, multiple requests for the same

2182

record will only yield 1 response.

2183

:param records: A list of (version_id, pos, len) entries

2184

:return: Yields (version_id, contents, digest) in the order

2185

read, not the order requested

2186

"""

2187

if not records:

2188

return

2189

2190

if self._cache:

2191

# Skip records we have alread seen

2192

yielded_records = set()

2193

needed_records = set()

2194

for record in records:

2195

if record[0] in self._cache:

2196

if record[0] in yielded_records:

2197

continue

2198

yielded_records.add(record[0])

2199

data = self._cache[record[0]]

2200

content, digest = self._parse_record(record[0], data)

2201

yield (record[0], content, digest)

2202

else:

2203

needed_records.add(record)

2204

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2205

else:

2206

needed_records = sorted(set(records), key=operator.itemgetter(1))

2207

2208

if not needed_records:

2209

return

2210

2211

# The transport optimizes the fetching as well

2212

# (ie, reads continuous ranges.)

2213

raw_data = self._access.get_raw_records(

2214

[index_memo for version_id, index_memo in needed_records])

2215

2216

for (version_id, index_memo), data in \

2217

izip(iter(needed_records), raw_data):

2218

content, digest = self._parse_record(version_id, data)

2219

if self._do_cache:

2220

self._cache[version_id] = data

2221

yield version_id, content, digest

2222

2223

def read_records(self, records):

2224

"""Read records into a dictionary."""

2225

components = {}

2226

for record_id, content, digest in \

2227

self.read_records_iter(records):

2228

components[record_id] = (content, digest)

2229

return components

2230

2231

2232

class InterKnit(InterVersionedFile):

2233

"""Optimised code paths for knit to knit operations."""

2234

2235

_matching_file_from_factory = KnitVersionedFile

2236

_matching_file_to_factory = KnitVersionedFile

2237

2238

@staticmethod

2239

def is_compatible(source, target):

2240

"""Be compatible with knits. """

2241

try:

2242

return (isinstance(source, KnitVersionedFile) and

2243

isinstance(target, KnitVersionedFile))

2244

except AttributeError:

2245

return False

2246

2247

def _copy_texts(self, pb, msg, version_ids, ignore_missing=False):

2248

"""Copy texts to the target by extracting and adding them one by one.

2249

2250

see join() for the parameter definitions.

2251

"""

2252

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2253

graph = self.source.get_graph(version_ids)

2254

order = topo_sort(graph.items())

2255

2256

def size_of_content(content):

2257

return sum(len(line) for line in content.text())

2258

# Cache at most 10MB of parent texts

2259

parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,

2260

compute_size=size_of_content)

2261

# TODO: jam 20071116 It would be nice to have a streaming interface to

2262

# get multiple texts from a source. The source could be smarter

2263

# about how it handled intermediate stages.

2264

# get_line_list() or make_mpdiffs() seem like a possibility, but

2265

# at the moment they extract all full texts into memory, which

2266

# causes us to store more than our 3x fulltext goal.

2267

# Repository.iter_files_bytes() may be another possibility

2268

to_process = [version for version in order

2269

if version not in self.target]

2270

total = len(to_process)

2271

pb = ui.ui_factory.nested_progress_bar()

2272

try:

2273

for index, version in enumerate(to_process):

2274

pb.update('Converting versioned data', index, total)

2275

sha1, num_bytes, parent_text = self.target.add_lines(version,

2276

self.source.get_parents(version),

2277

self.source.get_lines(version),

2278

parent_texts=parent_cache)

2279

parent_cache[version] = parent_text

2280

finally:

2281

pb.finished()

2282

return total

2283

2284

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2285

"""See InterVersionedFile.join."""

2286

assert isinstance(self.source, KnitVersionedFile)

2287

assert isinstance(self.target, KnitVersionedFile)

2288

2289

# If the source and target are mismatched w.r.t. annotations vs

2290

# plain, the data needs to be converted accordingly

2291

if self.source.factory.annotated == self.target.factory.annotated:

2292

converter = None

2293

elif self.source.factory.annotated:

2294

converter = self._anno_to_plain_converter

2295

else:

2296

# We're converting from a plain to an annotated knit. Copy them

2297

# across by full texts.

2298

return self._copy_texts(pb, msg, version_ids, ignore_missing)

2299

2300

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2301

if not version_ids:

2302

return 0

2303

2304

pb = ui.ui_factory.nested_progress_bar()

2305

try:

2306

version_ids = list(version_ids)

2307

if None in version_ids:

2308

version_ids.remove(None)

2309

2310

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2311

this_versions = set(self.target._index.get_versions())

2312

# XXX: For efficiency we should not look at the whole index,

2313

# we only need to consider the referenced revisions - they

2314

# must all be present, or the method must be full-text.

2315

# TODO, RBC 20070919

2316

needed_versions = self.source_ancestry - this_versions

2317

2318

if not needed_versions:

2319

return 0

2320

full_list = topo_sort(self.source.get_graph())

2321

2322

version_list = [i for i in full_list if (not self.target.has_version(i)

2323

and i in needed_versions)]

2324

2325

# plan the join:

2326

copy_queue = []

2327

copy_queue_records = []

2328

copy_set = set()

2329

for version_id in version_list:

2330

options = self.source._index.get_options(version_id)

2331

parents = self.source._index.get_parents_with_ghosts(version_id)

2332

# check that its will be a consistent copy:

2333

for parent in parents:

2334

# if source has the parent, we must :

2335

# * already have it or

2336

# * have it scheduled already

2337

# otherwise we don't care

2338

assert (self.target.has_version(parent) or

2339

parent in copy_set or

2340

not self.source.has_version(parent))

2341

index_memo = self.source._index.get_position(version_id)

2342

copy_queue_records.append((version_id, index_memo))

2343

copy_queue.append((version_id, options, parents))

2344

copy_set.add(version_id)

2345

2346

# data suck the join:

2347

count = 0

2348

total = len(version_list)

2349

raw_datum = []

2350

raw_records = []

2351

for (version_id, raw_data), \

2352

(version_id2, options, parents) in \

2353

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2354

copy_queue):

2355

assert version_id == version_id2, 'logic error, inconsistent results'

2356

count = count + 1

2357

pb.update("Joining knit", count, total)

2358

if converter:

2359

size, raw_data = converter(raw_data, version_id, options,

2360

parents)

2361

else:

2362

size = len(raw_data)

2363

raw_records.append((version_id, options, parents, size))

2364

raw_datum.append(raw_data)

2365

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2366

return count

2367

finally:

2368

pb.finished()

2369

2370

def _anno_to_plain_converter(self, raw_data, version_id, options,

2371

parents):

2372

"""Convert annotated content to plain content."""

2373

data, digest = self.source._data._parse_record(version_id, raw_data)

2374

if 'fulltext' in options:

2375

content = self.source.factory.parse_fulltext(data, version_id)

2376

lines = self.target.factory.lower_fulltext(content)

2377

else:

2378

delta = self.source.factory.parse_line_delta(data, version_id,

2379

plain=True)

2380

lines = self.target.factory.lower_line_delta(delta)

2381

return self.target._data._record_to_data(version_id, digest, lines)

2382

2383

2384

InterVersionedFile.register_optimiser(InterKnit)

2385

2386

2387

class WeaveToKnit(InterVersionedFile):

2388

"""Optimised code paths for weave to knit operations."""

2389

2390

_matching_file_from_factory = bzrlib.weave.WeaveFile

2391

_matching_file_to_factory = KnitVersionedFile

2392

2393

@staticmethod

2394

def is_compatible(source, target):

2395

"""Be compatible with weaves to knits."""

2396

try:

2397

return (isinstance(source, bzrlib.weave.Weave) and

2398

isinstance(target, KnitVersionedFile))

2399

except AttributeError:

2400

return False

2401

2402

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2403

"""See InterVersionedFile.join."""

2404

assert isinstance(self.source, bzrlib.weave.Weave)

2405

assert isinstance(self.target, KnitVersionedFile)

2406

2407

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2408

2409

if not version_ids:

2410

return 0

2411

2412

pb = ui.ui_factory.nested_progress_bar()

2413

try:

2414

version_ids = list(version_ids)

2415

2416

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2417

this_versions = set(self.target._index.get_versions())

2418

needed_versions = self.source_ancestry - this_versions

2419

2420

if not needed_versions:

2421

return 0

2422

full_list = topo_sort(self.source.get_graph())

2423

2424

version_list = [i for i in full_list if (not self.target.has_version(i)

2425

and i in needed_versions)]

2426

2427

# do the join:

2428

count = 0

2429

total = len(version_list)

2430

for version_id in version_list:

2431

pb.update("Converting to knit", count, total)

2432

parents = self.source.get_parents(version_id)

2433

# check that its will be a consistent copy:

2434

for parent in parents:

2435

# if source has the parent, we must already have it

2436

assert (self.target.has_version(parent))

2437

self.target.add_lines(

2438

version_id, parents, self.source.get_lines(version_id))

2439

count = count + 1

2440

return count

2441

finally:

2442

pb.finished()

2443

2444

2445

InterVersionedFile.register_optimiser(WeaveToKnit)

2446

2447

2448

# Deprecated, use PatienceSequenceMatcher instead

2449

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2450

2451

2452

def annotate_knit(knit, revision_id):

2453

"""Annotate a knit with no cached annotations.

2454

2455

This implementation is for knits with no cached annotations.

2456

It will work for knits with cached annotations, but this is not

2457

recommended.

2458

"""

2459

ancestry = knit.get_ancestry(revision_id)

2460

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2461

annotations = {}

2462

for candidate in ancestry:

2463

if candidate in annotations:

2464

continue

2465

parents = knit.get_parents(candidate)

2466

if len(parents) == 0:

2467

blocks = None

2468

elif knit._index.get_method(candidate) != 'line-delta':

2469

blocks = None

2470

else:

2471

parent, sha1, noeol, delta = knit.get_delta(candidate)

2472

blocks = KnitContent.get_line_delta_blocks(delta,

2473

fulltext[parents[0]], fulltext[candidate])

2474

annotations[candidate] = list(annotate.reannotate([annotations[p]

2475

for p in parents], fulltext[candidate], candidate, blocks))

2476

return iter(annotations[revision_id])

2477

2478

2479

try:

2480

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2481

except ImportError:

2482

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »