/brz/remove-bazaar : revision 3468.2.3

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2008-06-04 06:08:38 UTC
mto: This revision was merged to the branch mainline in revision 3475.
Revision ID: mbp@sourcefrog.net-20080604060838-yvgss0nv30glu8q8

doc

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bzr_access

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/revnos.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

graph as _mod_graph,

lru_cache,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.graph import Graph

104

from bzrlib.osutils import (

105

contains_whitespace,

106

contains_linebreaks,

107

sha_string,

108

sha_strings,

109

split_lines,

110

)

111

from bzrlib.tsort import topo_sort

112

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

113

import bzrlib.ui

114

from bzrlib.versionedfile import (

115

AbsentContentFactory,

116

adapter_registry,

117

ContentFactory,

118

InterVersionedFile,

119

VersionedFile,

120

)

121

import bzrlib.weave

122

123

124

# TODO: Split out code specific to this format into an associated object.

125

126

# TODO: Can we put in some kind of value to check that the index and data

127

# files belong together?

128

129

# TODO: accommodate binaries, perhaps by storing a byte count

130

131

# TODO: function to check whole file

132

133

# TODO: atomically append data, then measure backwards from the cursor

134

# position after writing to work out where it was located. we may need to

135

# bypass python file buffering.

136

137

DATA_SUFFIX = '.knit'

138

INDEX_SUFFIX = '.kndx'

139

140

141

class KnitAdapter(object):

142

"""Base class for knit record adaption."""

143

144

def __init__(self, basis_vf):

145

"""Create an adapter which accesses full texts from basis_vf.

146

147

:param basis_vf: A versioned file to access basis texts of deltas from.

148

May be None for adapters that do not need to access basis texts.

149

"""

150

self._data = _KnitData(None)

151

self._annotate_factory = KnitAnnotateFactory()

152

self._plain_factory = KnitPlainFactory()

153

self._basis_vf = basis_vf

154

155

156

class FTAnnotatedToUnannotated(KnitAdapter):

157

"""An adapter from FT annotated knits to unannotated ones."""

158

159

def get_bytes(self, factory, annotated_compressed_bytes):

160

rec, contents = \

161

self._data._parse_record_unchecked(annotated_compressed_bytes)

162

content = self._annotate_factory.parse_fulltext(contents, rec[1])

163

size, bytes = self._data._record_to_data(rec[1], rec[3], content.text())

164

return bytes

165

166

167

class DeltaAnnotatedToUnannotated(KnitAdapter):

168

"""An adapter for deltas from annotated to unannotated."""

169

170

def get_bytes(self, factory, annotated_compressed_bytes):

171

rec, contents = \

172

self._data._parse_record_unchecked(annotated_compressed_bytes)

173

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

174

plain=True)

175

contents = self._plain_factory.lower_line_delta(delta)

176

size, bytes = self._data._record_to_data(rec[1], rec[3], contents)

177

return bytes

178

179

180

class FTAnnotatedToFullText(KnitAdapter):

181

"""An adapter from FT annotated knits to unannotated ones."""

182

183

def get_bytes(self, factory, annotated_compressed_bytes):

184

rec, contents = \

185

self._data._parse_record_unchecked(annotated_compressed_bytes)

186

content, delta = self._annotate_factory.parse_record(factory.key[0],

187

contents, factory._build_details, None)

188

return ''.join(content.text())

189

190

191

class DeltaAnnotatedToFullText(KnitAdapter):

192

"""An adapter for deltas from annotated to unannotated."""

193

194

def get_bytes(self, factory, annotated_compressed_bytes):

195

rec, contents = \

196

self._data._parse_record_unchecked(annotated_compressed_bytes)

197

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

198

plain=True)

199

compression_parent = factory.parents[0][0]

200

basis_lines = self._basis_vf.get_lines(compression_parent)

201

# Manually apply the delta because we have one annotated content and

202

# one plain.

203

basis_content = PlainKnitContent(basis_lines, compression_parent)

204

basis_content.apply_delta(delta, rec[1])

205

basis_content._should_strip_eol = factory._build_details[1]

206

return ''.join(basis_content.text())

207

208

209

class FTPlainToFullText(KnitAdapter):

210

"""An adapter from FT plain knits to unannotated ones."""

211

212

def get_bytes(self, factory, compressed_bytes):

213

rec, contents = \

214

self._data._parse_record_unchecked(compressed_bytes)

215

content, delta = self._plain_factory.parse_record(factory.key[0],

216

contents, factory._build_details, None)

217

return ''.join(content.text())

218

219

220

class DeltaPlainToFullText(KnitAdapter):

221

"""An adapter for deltas from annotated to unannotated."""

222

223

def get_bytes(self, factory, compressed_bytes):

224

rec, contents = \

225

self._data._parse_record_unchecked(compressed_bytes)

226

delta = self._plain_factory.parse_line_delta(contents, rec[1])

227

compression_parent = factory.parents[0][0]

228

basis_lines = self._basis_vf.get_lines(compression_parent)

229

basis_content = PlainKnitContent(basis_lines, compression_parent)

230

# Manually apply the delta because we have one annotated content and

231

# one plain.

232

content, _ = self._plain_factory.parse_record(rec[1], contents,

233

factory._build_details, basis_content)

234

return ''.join(content.text())

235

236

237

class KnitContentFactory(ContentFactory):

238

"""Content factory for streaming from knits.

239

240

:seealso ContentFactory:

241

"""

242

243

def __init__(self, version, parents, build_details, sha1, raw_record,

244

annotated, knit=None):

245

"""Create a KnitContentFactory for version.

246

247

:param version: The version.

248

:param parents: The parents.

249

:param build_details: The build details as returned from

250

get_build_details.

251

:param sha1: The sha1 expected from the full text of this object.

252

:param raw_record: The bytes of the knit data from disk.

253

:param annotated: True if the raw data is annotated.

254

"""

255

ContentFactory.__init__(self)

256

self.sha1 = sha1

257

self.key = (version,)

258

self.parents = tuple((parent,) for parent in parents)

259

if build_details[0] == 'line-delta':

260

kind = 'delta'

261

else:

262

kind = 'ft'

263

if annotated:

264

annotated_kind = 'annotated-'

265

else:

266

annotated_kind = ''

267

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

268

self._raw_record = raw_record

269

self._build_details = build_details

270

self._knit = knit

271

272

def get_bytes_as(self, storage_kind):

273

if storage_kind == self.storage_kind:

274

return self._raw_record

275

if storage_kind == 'fulltext' and self._knit is not None:

276

return self._knit.get_text(self.key[0])

277

else:

278

raise errors.UnavailableRepresentation(self.key, storage_kind,

279

self.storage_kind)

280

281

282

class KnitContent(object):

283

"""Content of a knit version to which deltas can be applied."""

284

285

def __init__(self):

286

self._should_strip_eol = False

287

288

def apply_delta(self, delta, new_version_id):

289

"""Apply delta to this object to become new_version_id."""

290

raise NotImplementedError(self.apply_delta)

291

292

def cleanup_eol(self, copy_on_mutate=True):

293

if self._should_strip_eol:

294

if copy_on_mutate:

295

self._lines = self._lines[:]

296

self.strip_last_line_newline()

297

298

def line_delta_iter(self, new_lines):

299

"""Generate line-based delta from this content to new_lines."""

300

new_texts = new_lines.text()

301

old_texts = self.text()

302

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

303

for tag, i1, i2, j1, j2 in s.get_opcodes():

304

if tag == 'equal':

305

continue

306

# ofrom, oto, length, data

307

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

308

309

def line_delta(self, new_lines):

310

return list(self.line_delta_iter(new_lines))

311

312

@staticmethod

313

def get_line_delta_blocks(knit_delta, source, target):

314

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

315

target_len = len(target)

316

s_pos = 0

317

t_pos = 0

318

for s_begin, s_end, t_len, new_text in knit_delta:

319

true_n = s_begin - s_pos

320

n = true_n

321

if n > 0:

322

# knit deltas do not provide reliable info about whether the

323

# last line of a file matches, due to eol handling.

324

if source[s_pos + n -1] != target[t_pos + n -1]:

325

n-=1

326

if n > 0:

327

yield s_pos, t_pos, n

328

t_pos += t_len + true_n

329

s_pos = s_end

330

n = target_len - t_pos

331

if n > 0:

332

if source[s_pos + n -1] != target[t_pos + n -1]:

333

n-=1

334

if n > 0:

335

yield s_pos, t_pos, n

336

yield s_pos + (target_len - t_pos), target_len, 0

337

338

339

class AnnotatedKnitContent(KnitContent):

340

"""Annotated content."""

341

342

def __init__(self, lines):

343

KnitContent.__init__(self)

344

self._lines = lines

345

346

def annotate(self):

347

"""Return a list of (origin, text) for each content line."""

348

return list(self._lines)

349

350

def apply_delta(self, delta, new_version_id):

351

"""Apply delta to this object to become new_version_id."""

352

offset = 0

353

lines = self._lines

354

for start, end, count, delta_lines in delta:

355

lines[offset+start:offset+end] = delta_lines

356

offset = offset + (start - end) + count

357

358

def strip_last_line_newline(self):

359

line = self._lines[-1][1].rstrip('\n')

360

self._lines[-1] = (self._lines[-1][0], line)

361

self._should_strip_eol = False

362

363

def text(self):

364

try:

365

lines = [text for origin, text in self._lines]

366

except ValueError, e:

367

# most commonly (only?) caused by the internal form of the knit

368

# missing annotation information because of a bug - see thread

369

# around 20071015

370

raise KnitCorrupt(self,

371

"line in annotated knit missing annotation information: %s"

372

% (e,))

373

374

if self._should_strip_eol:

375

lines[-1] = lines[-1].rstrip('\n')

376

return lines

377

378

def copy(self):

379

return AnnotatedKnitContent(self._lines[:])

380

381

382

class PlainKnitContent(KnitContent):

383

"""Unannotated content.

384

385

When annotate[_iter] is called on this content, the same version is reported

386

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

387

objects.

388

"""

389

390

def __init__(self, lines, version_id):

391

KnitContent.__init__(self)

392

self._lines = lines

393

self._version_id = version_id

394

395

def annotate(self):

396

"""Return a list of (origin, text) for each content line."""

397

return [(self._version_id, line) for line in self._lines]

398

399

def apply_delta(self, delta, new_version_id):

400

"""Apply delta to this object to become new_version_id."""

401

offset = 0

402

lines = self._lines

403

for start, end, count, delta_lines in delta:

404

lines[offset+start:offset+end] = delta_lines

405

offset = offset + (start - end) + count

406

self._version_id = new_version_id

407

408

def copy(self):

409

return PlainKnitContent(self._lines[:], self._version_id)

410

411

def strip_last_line_newline(self):

412

self._lines[-1] = self._lines[-1].rstrip('\n')

413

self._should_strip_eol = False

414

415

def text(self):

416

lines = self._lines

417

if self._should_strip_eol:

418

lines = lines[:]

419

lines[-1] = lines[-1].rstrip('\n')

420

return lines

421

422

423

class _KnitFactory(object):

424

"""Base class for common Factory functions."""

425

426

def parse_record(self, version_id, record, record_details,

427

base_content, copy_base_content=True):

428

"""Parse a record into a full content object.

429

430

:param version_id: The official version id for this content

431

:param record: The data returned by read_records_iter()

432

:param record_details: Details about the record returned by

433

get_build_details

434

:param base_content: If get_build_details returns a compression_parent,

435

you must return a base_content here, else use None

436

:param copy_base_content: When building from the base_content, decide

437

you can either copy it and return a new object, or modify it in

438

place.

439

:return: (content, delta) A Content object and possibly a line-delta,

440

delta may be None

441

"""

442

method, noeol = record_details

443

if method == 'line-delta':

444

if copy_base_content:

445

content = base_content.copy()

446

else:

447

content = base_content

448

delta = self.parse_line_delta(record, version_id)

449

content.apply_delta(delta, version_id)

450

else:

451

content = self.parse_fulltext(record, version_id)

452

delta = None

453

content._should_strip_eol = noeol

454

return (content, delta)

455

456

457

class KnitAnnotateFactory(_KnitFactory):

458

"""Factory for creating annotated Content objects."""

459

460

annotated = True

461

462

def make(self, lines, version_id):

463

num_lines = len(lines)

464

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

465

466

def parse_fulltext(self, content, version_id):

467

"""Convert fulltext to internal representation

468

469

fulltext content is of the format

470

revid(utf8) plaintext\n

471

internal representation is of the format:

472

(revid, plaintext)

473

"""

474

# TODO: jam 20070209 The tests expect this to be returned as tuples,

475

# but the code itself doesn't really depend on that.

476

# Figure out a way to not require the overhead of turning the

477

# list back into tuples.

478

lines = [tuple(line.split(' ', 1)) for line in content]

479

return AnnotatedKnitContent(lines)

480

481

def parse_line_delta_iter(self, lines):

482

return iter(self.parse_line_delta(lines))

483

484

def parse_line_delta(self, lines, version_id, plain=False):

485

"""Convert a line based delta into internal representation.

486

487

line delta is in the form of:

488

intstart intend intcount

489

1..count lines:

490

revid(utf8) newline\n

491

internal representation is

492

(start, end, count, [1..count tuples (revid, newline)])

493

494

:param plain: If True, the lines are returned as a plain

495

list without annotations, not as a list of (origin, content) tuples, i.e.

496

(start, end, count, [1..count newline])

497

"""

498

result = []

499

lines = iter(lines)

500

next = lines.next

501

502

cache = {}

503

def cache_and_return(line):

504

origin, text = line.split(' ', 1)

505

return cache.setdefault(origin, origin), text

506

507

# walk through the lines parsing.

508

# Note that the plain test is explicitly pulled out of the

509

# loop to minimise any performance impact

510

if plain:

511

for header in lines:

512

start, end, count = [int(n) for n in header.split(',')]

513

contents = [next().split(' ', 1)[1] for i in xrange(count)]

514

result.append((start, end, count, contents))

515

else:

516

for header in lines:

517

start, end, count = [int(n) for n in header.split(',')]

518

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

519

result.append((start, end, count, contents))

520

return result

521

522

def get_fulltext_content(self, lines):

523

"""Extract just the content lines from a fulltext."""

524

return (line.split(' ', 1)[1] for line in lines)

525

526

def get_linedelta_content(self, lines):

527

"""Extract just the content from a line delta.

528

529

This doesn't return all of the extra information stored in a delta.

530

Only the actual content lines.

531

"""

532

lines = iter(lines)

533

next = lines.next

534

for header in lines:

535

header = header.split(',')

536

count = int(header[2])

537

for i in xrange(count):

538

origin, text = next().split(' ', 1)

539

yield text

540

541

def lower_fulltext(self, content):

542

"""convert a fulltext content record into a serializable form.

543

544

see parse_fulltext which this inverts.

545

"""

546

# TODO: jam 20070209 We only do the caching thing to make sure that

547

# the origin is a valid utf-8 line, eventually we could remove it

548

return ['%s %s' % (o, t) for o, t in content._lines]

549

550

def lower_line_delta(self, delta):

551

"""convert a delta into a serializable form.

552

553

See parse_line_delta which this inverts.

554

"""

555

# TODO: jam 20070209 We only do the caching thing to make sure that

556

# the origin is a valid utf-8 line, eventually we could remove it

557

out = []

558

for start, end, c, lines in delta:

559

out.append('%d,%d,%d\n' % (start, end, c))

560

out.extend(origin + ' ' + text

561

for origin, text in lines)

562

return out

563

564

def annotate(self, knit, version_id):

565

content = knit._get_content(version_id)

566

return content.annotate()

567

568

569

class KnitPlainFactory(_KnitFactory):

570

"""Factory for creating plain Content objects."""

571

572

annotated = False

573

574

def make(self, lines, version_id):

575

return PlainKnitContent(lines, version_id)

576

577

def parse_fulltext(self, content, version_id):

578

"""This parses an unannotated fulltext.

579

580

Note that this is not a noop - the internal representation

581

has (versionid, line) - its just a constant versionid.

582

"""

583

return self.make(content, version_id)

584

585

def parse_line_delta_iter(self, lines, version_id):

586

cur = 0

587

num_lines = len(lines)

588

while cur < num_lines:

589

header = lines[cur]

590

cur += 1

591

start, end, c = [int(n) for n in header.split(',')]

592

yield start, end, c, lines[cur:cur+c]

593

cur += c

594

595

def parse_line_delta(self, lines, version_id):

596

return list(self.parse_line_delta_iter(lines, version_id))

597

598

def get_fulltext_content(self, lines):

599

"""Extract just the content lines from a fulltext."""

600

return iter(lines)

601

602

def get_linedelta_content(self, lines):

603

"""Extract just the content from a line delta.

604

605

This doesn't return all of the extra information stored in a delta.

606

Only the actual content lines.

607

"""

608

lines = iter(lines)

609

next = lines.next

610

for header in lines:

611

header = header.split(',')

612

count = int(header[2])

613

for i in xrange(count):

614

yield next()

615

616

def lower_fulltext(self, content):

617

return content.text()

618

619

def lower_line_delta(self, delta):

620

out = []

621

for start, end, c, lines in delta:

622

out.append('%d,%d,%d\n' % (start, end, c))

623

out.extend(lines)

624

return out

625

626

def annotate(self, knit, version_id):

627

annotator = _KnitAnnotator(knit)

628

return annotator.annotate(version_id)

629

630

631

def make_empty_knit(transport, relpath):

632

"""Construct a empty knit at the specified location."""

633

k = make_file_knit(transport, relpath, 'w', KnitPlainFactory)

634

635

636

def make_file_knit(name, transport, file_mode=None, access_mode='w',

637

factory=None, delta=True, create=False, create_parent_dir=False,

638

delay_create=False, dir_mode=None, get_scope=None):

639

"""Factory to create a KnitVersionedFile for a .knit/.kndx file pair."""

640

if factory is None:

641

factory = KnitAnnotateFactory()

642

if get_scope is None:

643

get_scope = lambda:None

644

index = _KnitIndex(transport, name + INDEX_SUFFIX,

645

access_mode, create=create, file_mode=file_mode,

646

create_parent_dir=create_parent_dir, delay_create=delay_create,

647

dir_mode=dir_mode, get_scope=get_scope)

648

access = _KnitAccess(transport, name + DATA_SUFFIX, file_mode,

649

dir_mode, ((create and not len(index)) and delay_create),

650

create_parent_dir)

651

return KnitVersionedFile(name, transport, factory=factory,

652

create=create, delay_create=delay_create, index=index,

653

access_method=access)

654

655

656

def get_suffixes():

657

"""Return the suffixes used by file based knits."""

658

return [DATA_SUFFIX, INDEX_SUFFIX]

659

make_file_knit.get_suffixes = get_suffixes

660

661

662

class KnitVersionedFile(VersionedFile):

663

"""Weave-like structure with faster random access.

664

665

A knit stores a number of texts and a summary of the relationships

666

between them. Texts are identified by a string version-id. Texts

667

are normally stored and retrieved as a series of lines, but can

668

also be passed as single strings.

669

670

Lines are stored with the trailing newline (if any) included, to

671

avoid special cases for files with no final newline. Lines are

672

composed of 8-bit characters, not unicode. The combination of

673

these approaches should mean any 'binary' file can be safely

674

stored and retrieved.

675

"""

676

677

def __init__(self, relpath, transport, file_mode=None,

678

factory=None, delta=True, create=False, create_parent_dir=False,

679

delay_create=False, dir_mode=None, index=None, access_method=None):

680

"""Construct a knit at location specified by relpath.

681

682

:param create: If not True, only open an existing knit.

683

:param create_parent_dir: If True, create the parent directory if

684

creating the file fails. (This is used for stores with

685

hash-prefixes that may not exist yet)

686

:param delay_create: The calling code is aware that the knit won't

687

actually be created until the first data is stored.

688

:param index: An index to use for the knit.

689

"""

690

super(KnitVersionedFile, self).__init__()

691

self.transport = transport

692

self.filename = relpath

693

self.factory = factory or KnitAnnotateFactory()

694

self.delta = delta

695

696

self._max_delta_chain = 200

697

698

if None in (access_method, index):

699

raise ValueError("No default access_method or index any more")

700

self._index = index

701

_access = access_method

702

if create and not len(self) and not delay_create:

703

_access.create()

704

self._data = _KnitData(_access)

705

706

def __repr__(self):

707

return '%s(%s)' % (self.__class__.__name__,

708

self.transport.abspath(self.filename))

709

710

def _check_should_delta(self, first_parents):

711

"""Iterate back through the parent listing, looking for a fulltext.

712

713

This is used when we want to decide whether to add a delta or a new

714

fulltext. It searches for _max_delta_chain parents. When it finds a

715

fulltext parent, it sees if the total size of the deltas leading up to

716

it is large enough to indicate that we want a new full text anyway.

717

718

Return True if we should create a new delta, False if we should use a

719

full text.

720

"""

721

delta_size = 0

722

fulltext_size = None

723

delta_parents = first_parents

724

for count in xrange(self._max_delta_chain):

725

parent = delta_parents[0]

726

method = self._index.get_method(parent)

727

index, pos, size = self._index.get_position(parent)

728

if method == 'fulltext':

729

fulltext_size = size

730

break

731

delta_size += size

732

delta_parents = self._index.get_parent_map([parent])[parent]

733

else:

734

# We couldn't find a fulltext, so we must create a new one

735

return False

736

737

return fulltext_size > delta_size

738

739

def _check_write_ok(self):

740

return self._index._check_write_ok()

741

742

def _add_raw_records(self, records, data):

743

"""Add all the records 'records' with data pre-joined in 'data'.

744

745

:param records: A list of tuples(version_id, options, parents, size).

746

:param data: The data for the records. When it is written, the records

747

are adjusted to have pos pointing into data by the sum of

748

the preceding records sizes.

749

"""

750

# write all the data

751

raw_record_sizes = [record[3] for record in records]

752

positions = self._data.add_raw_records(raw_record_sizes, data)

753

index_entries = []

754

for (version_id, options, parents, _), access_memo in zip(

755

records, positions):

756

index_entries.append((version_id, options, access_memo, parents))

757

self._index.add_versions(index_entries)

758

759

def copy_to(self, name, transport):

760

"""See VersionedFile.copy_to()."""

761

# copy the current index to a temp index to avoid racing with local

762

# writes

763

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

764

self.transport.get(self._index._filename))

765

# copy the data file

766

f = self._data._open_file()

767

try:

768

transport.put_file(name + DATA_SUFFIX, f)

769

finally:

770

f.close()

771

# move the copied index into place

772

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

773

774

def get_data_stream(self, required_versions):

775

"""Get a data stream for the specified versions.

776

777

Versions may be returned in any order, not necessarily the order

778

specified. They are returned in a partial order by compression

779

parent, so that the deltas can be applied as the data stream is

780

inserted; however note that compression parents will not be sent

781

unless they were specifically requested, as the client may already

782

have them.

783

784

:param required_versions: The exact set of versions to be extracted.

785

Unlike some other knit methods, this is not used to generate a

786

transitive closure, rather it is used precisely as given.

787

788

:returns: format_signature, list of (version, options, length, parents),

789

reader_callable.

790

"""

791

required_version_set = frozenset(required_versions)

792

version_index = {}

793

# list of revisions that can just be sent without waiting for their

794

# compression parent

795

ready_to_send = []

796

# map from revision to the children based on it

797

deferred = {}

798

# first, read all relevant index data, enough to sort into the right

799

# order to return

800

for version_id in required_versions:

801

options = self._index.get_options(version_id)

802

parents = self._index.get_parents_with_ghosts(version_id)

803

index_memo = self._index.get_position(version_id)

804

version_index[version_id] = (index_memo, options, parents)

805

if ('line-delta' in options

806

and parents[0] in required_version_set):

807

# must wait until the parent has been sent

808

deferred.setdefault(parents[0], []). \

809

append(version_id)

810

else:

811

# either a fulltext, or a delta whose parent the client did

812

# not ask for and presumably already has

813

ready_to_send.append(version_id)

814

# build a list of results to return, plus instructions for data to

815

# read from the file

816

copy_queue_records = []

817

temp_version_list = []

818

while ready_to_send:

819

# XXX: pushing and popping lists may be a bit inefficient

820

version_id = ready_to_send.pop(0)

821

(index_memo, options, parents) = version_index[version_id]

822

copy_queue_records.append((version_id, index_memo))

823

none, data_pos, data_size = index_memo

824

temp_version_list.append((version_id, options, data_size,

825

parents))

826

if version_id in deferred:

827

# now we can send all the children of this revision - we could

828

# put them in anywhere, but we hope that sending them soon

829

# after the fulltext will give good locality in the receiver

830

ready_to_send[:0] = deferred.pop(version_id)

831

if not (len(deferred) == 0):

832

raise AssertionError("Still have compressed child versions waiting to be sent")

833

# XXX: The stream format is such that we cannot stream it - we have to

834

# know the length of all the data a-priori.

835

raw_datum = []

836

result_version_list = []

837

for (version_id, raw_data, _), \

838

(version_id2, options, _, parents) in \

839

izip(self._data.read_records_iter_raw(copy_queue_records),

840

temp_version_list):

841

if not (version_id == version_id2):

842

raise AssertionError('logic error, inconsistent results')

843

raw_datum.append(raw_data)

844

result_version_list.append(

845

(version_id, options, len(raw_data), parents))

846

# provide a callback to get data incrementally.

847

pseudo_file = StringIO(''.join(raw_datum))

848

def read(length):

849

if length is None:

850

return pseudo_file.read()

851

else:

852

return pseudo_file.read(length)

853

return (self.get_format_signature(), result_version_list, read)

854

855

def get_record_stream(self, versions, ordering, include_delta_closure):

856

"""Get a stream of records for versions.

857

858

:param versions: The versions to include. Each version is a tuple

859

(version,).

860

:param ordering: Either 'unordered' or 'topological'. A topologically

861

sorted stream has compression parents strictly before their

862

children.

863

:param include_delta_closure: If True then the closure across any

864

compression parents will be included (in the opaque data).

865

:return: An iterator of ContentFactory objects, each of which is only

866

valid until the iterator is advanced.

867

"""

868

if include_delta_closure:

869

# Nb: what we should do is plan the data to stream to allow

870

# reconstruction of all the texts without excessive buffering,

871

# including re-sending common bases as needed. This makes the most

872

# sense when we start serialising these streams though, so for now

873

# we just fallback to individual text construction behind the

874

# abstraction barrier.

875

knit = self

876

else:

877

knit = None

878

# We end up doing multiple index lookups here for parents details and

879

# disk layout details - we need a unified api ?

880

parent_map = self.get_parent_map(versions)

881

absent_versions = set(versions) - set(parent_map)

882

if ordering == 'topological':

883

present_versions = topo_sort(parent_map)

884

else:

885

# List comprehension to keep the requested order (as that seems

886

# marginally useful, at least until we start doing IO optimising

887

# here.

888

present_versions = [version for version in versions if version in

889

parent_map]

890

position_map = self._get_components_positions(present_versions)

891

records = [(version, position_map[version][1]) for version in

892

present_versions]

893

record_map = {}

894

for version in absent_versions:

895

yield AbsentContentFactory((version,))

896

for version, raw_data, sha1 in \

897

self._data.read_records_iter_raw(records):

898

(record_details, index_memo, _) = position_map[version]

899

yield KnitContentFactory(version, parent_map[version],

900

record_details, sha1, raw_data, self.factory.annotated, knit)

901

902

def _extract_blocks(self, version_id, source, target):

903

if self._index.get_method(version_id) != 'line-delta':

904

return None

905

parent, sha1, noeol, delta = self.get_delta(version_id)

906

return KnitContent.get_line_delta_blocks(delta, source, target)

907

908

def get_delta(self, version_id):

909

"""Get a delta for constructing version from some other version."""

910

self.check_not_reserved_id(version_id)

911

parents = self.get_parent_map([version_id])[version_id]

912

if len(parents):

913

parent = parents[0]

914

else:

915

parent = None

916

index_memo = self._index.get_position(version_id)

917

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

918

noeol = 'no-eol' in self._index.get_options(version_id)

919

if 'fulltext' == self._index.get_method(version_id):

920

new_content = self.factory.parse_fulltext(data, version_id)

921

if parent is not None:

922

reference_content = self._get_content(parent)

923

old_texts = reference_content.text()

924

else:

925

old_texts = []

926

new_texts = new_content.text()

927

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

928

new_texts)

929

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

930

else:

931

delta = self.factory.parse_line_delta(data, version_id)

932

return parent, sha1, noeol, delta

933

934

def get_format_signature(self):

935

"""See VersionedFile.get_format_signature()."""

936

if self.factory.annotated:

937

annotated_part = "annotated"

938

else:

939

annotated_part = "plain"

940

return "knit-%s" % (annotated_part,)

941

942

def get_sha1s(self, version_ids):

943

"""See VersionedFile.get_sha1s()."""

944

record_map = self._get_record_map(version_ids)

945

# record entry 2 is the 'digest'.

946

return [record_map[v][2] for v in version_ids]

947

948

def insert_data_stream(self, (format, data_list, reader_callable)):

949

"""Insert knit records from a data stream into this knit.

950

951

If a version in the stream is already present in this knit, it will not

952

be inserted a second time. It will be checked for consistency with the

953

stored version however, and may cause a KnitCorrupt error to be raised

954

if the data in the stream disagrees with the already stored data.

955

956

:seealso: get_data_stream

957

"""

958

if format != self.get_format_signature():

959

if 'knit' in debug.debug_flags:

960

trace.mutter(

961

'incompatible format signature inserting to %r', self)

962

source = self._knit_from_datastream(

963

(format, data_list, reader_callable))

964

stream = source.get_record_stream(source.versions(), 'unordered', False)

965

self.insert_record_stream(stream)

966

return

967

968

for version_id, options, length, parents in data_list:

969

if self.has_version(version_id):

970

# First check: the list of parents.

971

my_parents = self.get_parents_with_ghosts(version_id)

972

if tuple(my_parents) != tuple(parents):

973

# XXX: KnitCorrupt is not quite the right exception here.

974

raise KnitCorrupt(

975

self.filename,

976

'parents list %r from data stream does not match '

977

'already recorded parents %r for %s'

978

% (parents, my_parents, version_id))

979

980

# Also check the SHA-1 of the fulltext this content will

981

# produce.

982

raw_data = reader_callable(length)

983

my_fulltext_sha1 = self.get_sha1s([version_id])[0]

984

df, rec = self._data._parse_record_header(version_id, raw_data)

985

stream_fulltext_sha1 = rec[3]

986

if my_fulltext_sha1 != stream_fulltext_sha1:

987

# Actually, we don't know if it's this knit that's corrupt,

988

# or the data stream we're trying to insert.

989

raise KnitCorrupt(

990

self.filename, 'sha-1 does not match %s' % version_id)

991

else:

992

if 'line-delta' in options:

993

# Make sure that this knit record is actually useful: a

994

# line-delta is no use unless we have its parent.

995

# Fetching from a broken repository with this problem

996

# shouldn't break the target repository.

997

998

# See https://bugs.launchpad.net/bzr/+bug/164443

999

if not self._index.has_version(parents[0]):

1000

raise KnitCorrupt(

1001

self.filename,

1002

'line-delta from stream '

1003

'for version %s '

1004

'references '

1005

'missing parent %s\n'

1006

'Try running "bzr check" '

1007

'on the source repository, and "bzr reconcile" '

1008

'if necessary.' %

1009

(version_id, parents[0]))

1010

if not self.delta:

1011

# We received a line-delta record for a non-delta knit.

1012

# Convert it to a fulltext.

1013

gzip_bytes = reader_callable(length)

1014

self._convert_line_delta_to_fulltext(

1015

gzip_bytes, version_id, parents)

1016

continue

1017

1018

self._add_raw_records(

1019

[(version_id, options, parents, length)],

1020

reader_callable(length))

1021

1022

def _convert_line_delta_to_fulltext(self, gzip_bytes, version_id, parents):

1023

lines, sha1 = self._data._parse_record(version_id, gzip_bytes)

1024

delta = self.factory.parse_line_delta(lines, version_id)

1025

content = self.factory.make(self.get_lines(parents[0]), parents[0])

1026

content.apply_delta(delta, version_id)

1027

digest, len, content = self.add_lines(

1028

version_id, parents, content.text())

1029

if digest != sha1:

1030

raise errors.VersionedFileInvalidChecksum(version_id)

1031

1032

def _knit_from_datastream(self, (format, data_list, reader_callable)):

1033

"""Create a knit object from a data stream.

1034

1035

This method exists to allow conversion of data streams that do not

1036

match the signature of this knit. Generally it will be slower and use

1037

more memory to use this method to insert data, but it will work.

1038

1039

:seealso: get_data_stream for details on datastreams.

1040

:return: A knit versioned file which can be used to join the datastream

1041

into self.

1042

"""

1043

if format == "knit-plain":

1044

factory = KnitPlainFactory()

1045

elif format == "knit-annotated":

1046

factory = KnitAnnotateFactory()

1047

else:

1048

raise errors.KnitDataStreamUnknown(format)

1049

index = _StreamIndex(data_list, self._index)

1050

access = _StreamAccess(reader_callable, index, self, factory)

1051

return KnitVersionedFile(self.filename, self.transport,

1052

factory=factory, index=index, access_method=access)

1053

1054

def insert_record_stream(self, stream):

1055

"""Insert a record stream into this versioned file.

1056

1057

:param stream: A stream of records to insert.

1058

:return: None

1059

:seealso VersionedFile.get_record_stream:

1060

"""

1061

def get_adapter(adapter_key):

1062

try:

1063

return adapters[adapter_key]

1064

except KeyError:

1065

adapter_factory = adapter_registry.get(adapter_key)

1066

adapter = adapter_factory(self)

1067

adapters[adapter_key] = adapter

1068

return adapter

1069

if self.factory.annotated:

1070

# self is annotated, we need annotated knits to use directly.

1071

annotated = "annotated-"

1072

convertibles = []

1073

else:

1074

# self is not annotated, but we can strip annotations cheaply.

1075

annotated = ""

1076

convertibles = set(["knit-annotated-delta-gz",

1077

"knit-annotated-ft-gz"])

1078

# The set of types we can cheaply adapt without needing basis texts.

1079

native_types = set()

1080

native_types.add("knit-%sdelta-gz" % annotated)

1081

native_types.add("knit-%sft-gz" % annotated)

1082

knit_types = native_types.union(convertibles)

1083

adapters = {}

1084

# Buffer all index entries that we can't add immediately because their

1085

# basis parent is missing. We don't buffer all because generating

1086

# annotations may require access to some of the new records. However we

1087

# can't generate annotations from new deltas until their basis parent

1088

# is present anyway, so we get away with not needing an index that

1089

# includes the new keys.

1090

# key = basis_parent, value = index entry to add

1091

buffered_index_entries = {}

1092

for record in stream:

1093

# Raise an error when a record is missing.

1094

if record.storage_kind == 'absent':

1095

raise RevisionNotPresent([record.key[0]], self)

1096

# adapt to non-tuple interface

1097

parents = [parent[0] for parent in record.parents]

1098

if record.storage_kind in knit_types:

1099

if record.storage_kind not in native_types:

1100

try:

1101

adapter_key = (record.storage_kind, "knit-delta-gz")

1102

adapter = get_adapter(adapter_key)

1103

except KeyError:

1104

adapter_key = (record.storage_kind, "knit-ft-gz")

1105

adapter = get_adapter(adapter_key)

1106

bytes = adapter.get_bytes(

1107

record, record.get_bytes_as(record.storage_kind))

1108

else:

1109

bytes = record.get_bytes_as(record.storage_kind)

1110

options = [record._build_details[0]]

1111

if record._build_details[1]:

1112

options.append('no-eol')

1113

# Just blat it across.

1114

# Note: This does end up adding data on duplicate keys. As

1115

# modern repositories use atomic insertions this should not

1116

# lead to excessive growth in the event of interrupted fetches.

1117

# 'knit' repositories may suffer excessive growth, but as a

1118

# deprecated format this is tolerable. It can be fixed if

1119

# needed by in the kndx index support raising on a duplicate

1120

# add with identical parents and options.

1121

access_memo = self._data.add_raw_records([len(bytes)], bytes)[0]

1122

index_entry = (record.key[0], options, access_memo, parents)

1123

buffered = False

1124

if 'fulltext' not in options:

1125

basis_parent = parents[0]

1126

if not self.has_version(basis_parent):

1127

pending = buffered_index_entries.setdefault(

1128

basis_parent, [])

1129

pending.append(index_entry)

1130

buffered = True

1131

if not buffered:

1132

self._index.add_versions([index_entry])

1133

elif record.storage_kind == 'fulltext':

1134

self.add_lines(record.key[0], parents,

1135

split_lines(record.get_bytes_as('fulltext')))

1136

else:

1137

adapter_key = record.storage_kind, 'fulltext'

1138

adapter = get_adapter(adapter_key)

1139

lines = split_lines(adapter.get_bytes(

1140

record, record.get_bytes_as(record.storage_kind)))

1141

try:

1142

self.add_lines(record.key[0], parents, lines)

1143

except errors.RevisionAlreadyPresent:

1144

pass

1145

# Add any records whose basis parent is now available.

1146

added_keys = [record.key[0]]

1147

while added_keys:

1148

key = added_keys.pop(0)

1149

if key in buffered_index_entries:

1150

index_entries = buffered_index_entries[key]

1151

self._index.add_versions(index_entries)

1152

added_keys.extend(

1153

[index_entry[0] for index_entry in index_entries])

1154

del buffered_index_entries[key]

1155

# If there were any deltas which had a missing basis parent, error.

1156

if buffered_index_entries:

1157

raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],

1158

self)

1159

1160

def versions(self):

1161

"""See VersionedFile.versions."""

1162

if 'evil' in debug.debug_flags:

1163

trace.mutter_callsite(2, "versions scales with size of history")

1164

return self._index.get_versions()

1165

1166

def has_version(self, version_id):

1167

"""See VersionedFile.has_version."""

1168

if 'evil' in debug.debug_flags:

1169

trace.mutter_callsite(2, "has_version is a LBYL scenario")

1170

return self._index.has_version(version_id)

1171

1172

__contains__ = has_version

1173

1174

def _merge_annotations(self, content, parents, parent_texts={},

1175

delta=None, annotated=None,

1176

left_matching_blocks=None):

1177

"""Merge annotations for content. This is done by comparing

1178

the annotations based on changed to the text.

1179

"""

1180

if left_matching_blocks is not None:

1181

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1182

else:

1183

delta_seq = None

1184

if annotated:

1185

for parent_id in parents:

1186

merge_content = self._get_content(parent_id, parent_texts)

1187

if (parent_id == parents[0] and delta_seq is not None):

1188

seq = delta_seq

1189

else:

1190

seq = patiencediff.PatienceSequenceMatcher(

1191

None, merge_content.text(), content.text())

1192

for i, j, n in seq.get_matching_blocks():

1193

if n == 0:

1194

continue

1195

# this appears to copy (origin, text) pairs across to the

1196

# new content for any line that matches the last-checked

1197

# parent.

1198

content._lines[j:j+n] = merge_content._lines[i:i+n]

1199

if delta:

1200

if delta_seq is None:

1201

reference_content = self._get_content(parents[0], parent_texts)

1202

new_texts = content.text()

1203

old_texts = reference_content.text()

1204

delta_seq = patiencediff.PatienceSequenceMatcher(

1205

None, old_texts, new_texts)

1206

return self._make_line_delta(delta_seq, content)

1207

1208

def _make_line_delta(self, delta_seq, new_content):

1209

"""Generate a line delta from delta_seq and new_content."""

1210

diff_hunks = []

1211

for op in delta_seq.get_opcodes():

1212

if op[0] == 'equal':

1213

continue

1214

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1215

return diff_hunks

1216

1217

def _get_components_positions(self, version_ids):

1218

"""Produce a map of position data for the components of versions.

1219

1220

This data is intended to be used for retrieving the knit records.

1221

1222

A dict of version_id to (record_details, index_memo, next, parents) is

1223

returned.

1224

method is the way referenced data should be applied.

1225

index_memo is the handle to pass to the data access to actually get the

1226

data

1227

next is the build-parent of the version, or None for fulltexts.

1228

parents is the version_ids of the parents of this version

1229

"""

1230

component_data = {}

1231

pending_components = version_ids

1232

while pending_components:

1233

build_details = self._index.get_build_details(pending_components)

1234

current_components = set(pending_components)

1235

pending_components = set()

1236

for version_id, details in build_details.iteritems():

1237

(index_memo, compression_parent, parents,

1238

record_details) = details

1239

method = record_details[0]

1240

if compression_parent is not None:

1241

pending_components.add(compression_parent)

1242

component_data[version_id] = (record_details, index_memo,

1243

compression_parent)

1244

missing = current_components.difference(build_details)

1245

if missing:

1246

raise errors.RevisionNotPresent(missing.pop(), self.filename)

1247

return component_data

1248

1249

def _get_content(self, version_id, parent_texts={}):

1250

"""Returns a content object that makes up the specified

1251

version."""

1252

cached_version = parent_texts.get(version_id, None)

1253

if cached_version is not None:

1254

if not self.has_version(version_id):

1255

raise RevisionNotPresent(version_id, self.filename)

1256

return cached_version

1257

1258

text_map, contents_map = self._get_content_maps([version_id])

1259

return contents_map[version_id]

1260

1261

def _check_versions_present(self, version_ids):

1262

"""Check that all specified versions are present."""

1263

self._index.check_versions_present(version_ids)

1264

1265

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

1266

nostore_sha, random_id, check_content, left_matching_blocks):

1267

"""See VersionedFile.add_lines_with_ghosts()."""

1268

self._check_add(version_id, lines, random_id, check_content)

1269

return self._add(version_id, lines, parents, self.delta,

1270

parent_texts, left_matching_blocks, nostore_sha, random_id)

1271

1272

def _add_lines(self, version_id, parents, lines, parent_texts,

1273

left_matching_blocks, nostore_sha, random_id, check_content):

1274

"""See VersionedFile.add_lines."""

1275

self._check_add(version_id, lines, random_id, check_content)

1276

self._check_versions_present(parents)

1277

return self._add(version_id, lines[:], parents, self.delta,

1278

parent_texts, left_matching_blocks, nostore_sha, random_id)

1279

1280

def _check_add(self, version_id, lines, random_id, check_content):

1281

"""check that version_id and lines are safe to add."""

1282

if contains_whitespace(version_id):

1283

raise InvalidRevisionId(version_id, self.filename)

1284

self.check_not_reserved_id(version_id)

1285

# Technically this could be avoided if we are happy to allow duplicate

1286

# id insertion when other things than bzr core insert texts, but it

1287

# seems useful for folk using the knit api directly to have some safety

1288

# blanket that we can disable.

1289

if not random_id and self.has_version(version_id):

1290

raise RevisionAlreadyPresent(version_id, self.filename)

1291

if check_content:

1292

self._check_lines_not_unicode(lines)

1293

self._check_lines_are_lines(lines)

1294

1295

def _add(self, version_id, lines, parents, delta, parent_texts,

1296

left_matching_blocks, nostore_sha, random_id):

1297

"""Add a set of lines on top of version specified by parents.

1298

1299

If delta is true, compress the text as a line-delta against

1300

the first parent.

1301

1302

Any versions not present will be converted into ghosts.

1303

"""

1304

# first thing, if the content is something we don't need to store, find

1305

# that out.

1306

line_bytes = ''.join(lines)

1307

digest = sha_string(line_bytes)

1308

if nostore_sha == digest:

1309

raise errors.ExistingContent

1310

1311

present_parents = []

1312

if parent_texts is None:

1313

parent_texts = {}

1314

for parent in parents:

1315

if self.has_version(parent):

1316

present_parents.append(parent)

1317

1318

# can only compress against the left most present parent.

1319

if (delta and

1320

(len(present_parents) == 0 or

1321

present_parents[0] != parents[0])):

1322

delta = False

1323

1324

text_length = len(line_bytes)

1325

options = []

1326

if lines:

1327

if lines[-1][-1] != '\n':

1328

# copy the contents of lines.

1329

lines = lines[:]

1330

options.append('no-eol')

1331

lines[-1] = lines[-1] + '\n'

1332

line_bytes += '\n'

1333

1334

if delta:

1335

# To speed the extract of texts the delta chain is limited

1336

# to a fixed number of deltas. This should minimize both

1337

# I/O and the time spend applying deltas.

1338

delta = self._check_should_delta(present_parents)

1339

1340

content = self.factory.make(lines, version_id)

1341

if delta or (self.factory.annotated and len(present_parents) > 0):

1342

# Merge annotations from parent texts if needed.

1343

delta_hunks = self._merge_annotations(content, present_parents,

1344

parent_texts, delta, self.factory.annotated,

1345

left_matching_blocks)

1346

1347

if delta:

1348

options.append('line-delta')

1349

store_lines = self.factory.lower_line_delta(delta_hunks)

1350

size, bytes = self._data._record_to_data(version_id, digest,

1351

store_lines)

1352

else:

1353

options.append('fulltext')

1354

# isinstance is slower and we have no hierarchy.

1355

if self.factory.__class__ == KnitPlainFactory:

1356

# Use the already joined bytes saving iteration time in

1357

# _record_to_data.

1358

size, bytes = self._data._record_to_data(version_id, digest,

1359

lines, [line_bytes])

1360

else:

1361

# get mixed annotation + content and feed it into the

1362

# serialiser.

1363

store_lines = self.factory.lower_fulltext(content)

1364

size, bytes = self._data._record_to_data(version_id, digest,

1365

store_lines)

1366

1367

access_memo = self._data.add_raw_records([size], bytes)[0]

1368

self._index.add_versions(

1369

((version_id, options, access_memo, parents),),

1370

random_id=random_id)

1371

return digest, text_length, content

1372

1373

def check(self, progress_bar=None):

1374

"""See VersionedFile.check()."""

1375

# This doesn't actually test extraction of everything, but that will

1376

# impact 'bzr check' substantially, and needs to be integrated with

1377

# care. However, it does check for the obvious problem of a delta with

1378

# no basis.

1379

versions = self.versions()

1380

parent_map = self.get_parent_map(versions)

1381

for version in versions:

1382

if self._index.get_method(version) != 'fulltext':

1383

compression_parent = parent_map[version][0]

1384

if compression_parent not in parent_map:

1385

raise errors.KnitCorrupt(self,

1386

"Missing basis parent %s for %s" % (

1387

compression_parent, version))

1388

1389

def get_lines(self, version_id):

1390

"""See VersionedFile.get_lines()."""

1391

return self.get_line_list([version_id])[0]

1392

1393

def _get_record_map(self, version_ids):

1394

"""Produce a dictionary of knit records.

1395

1396

:return: {version_id:(record, record_details, digest, next)}

1397

record

1398

data returned from read_records

1399

record_details

1400

opaque information to pass to parse_record

1401

digest

1402

SHA1 digest of the full text after all steps are done

1403

1404

build-parent of the version, i.e. the leftmost ancestor.

1405

Will be None if the record is not a delta.

1406

"""

1407

position_map = self._get_components_positions(version_ids)

1408

# c = component_id, r = record_details, i_m = index_memo, n = next

1409

records = [(c, i_m) for c, (r, i_m, n)

1410

in position_map.iteritems()]

1411

record_map = {}

1412

for component_id, record, digest in \

1413

self._data.read_records_iter(records):

1414

(record_details, index_memo, next) = position_map[component_id]

1415

record_map[component_id] = record, record_details, digest, next

1416

1417

return record_map

1418

1419

def get_text(self, version_id):

1420

"""See VersionedFile.get_text"""

1421

return self.get_texts([version_id])[0]

1422

1423

def get_texts(self, version_ids):

1424

return [''.join(l) for l in self.get_line_list(version_ids)]

1425

1426

def get_line_list(self, version_ids):

1427

"""Return the texts of listed versions as a list of strings."""

1428

for version_id in version_ids:

1429

self.check_not_reserved_id(version_id)

1430

text_map, content_map = self._get_content_maps(version_ids)

1431

return [text_map[v] for v in version_ids]

1432

1433

_get_lf_split_line_list = get_line_list

1434

1435

def _get_content_maps(self, version_ids):

1436

"""Produce maps of text and KnitContents

1437

1438

:return: (text_map, content_map) where text_map contains the texts for

1439

the requested versions and content_map contains the KnitContents.

1440

Both dicts take version_ids as their keys.

1441

"""

1442

# FUTURE: This function could be improved for the 'extract many' case

1443

# by tracking each component and only doing the copy when the number of

1444

# children than need to apply delta's to it is > 1 or it is part of the

1445

# final output.

1446

version_ids = list(version_ids)

1447

multiple_versions = len(version_ids) != 1

1448

record_map = self._get_record_map(version_ids)

1449

1450

text_map = {}

1451

content_map = {}

1452

final_content = {}

1453

for version_id in version_ids:

1454

components = []

1455

cursor = version_id

1456

while cursor is not None:

1457

record, record_details, digest, next = record_map[cursor]

1458

components.append((cursor, record, record_details, digest))

1459

if cursor in content_map:

1460

break

1461

cursor = next

1462

1463

content = None

1464

for (component_id, record, record_details,

1465

digest) in reversed(components):

1466

if component_id in content_map:

1467

content = content_map[component_id]

1468

else:

1469

content, delta = self.factory.parse_record(version_id,

1470

record, record_details, content,

1471

copy_base_content=multiple_versions)

1472

if multiple_versions:

1473

content_map[component_id] = content

1474

1475

content.cleanup_eol(copy_on_mutate=multiple_versions)

1476

final_content[version_id] = content

1477

1478

# digest here is the digest from the last applied component.

1479

text = content.text()

1480

actual_sha = sha_strings(text)

1481

if actual_sha != digest:

1482

raise KnitCorrupt(self.filename,

1483

'\n sha-1 %s'

1484

'\n of reconstructed text does not match'

1485

'\n expected %s'

1486

'\n for version %s' %

1487

(actual_sha, digest, version_id))

1488

text_map[version_id] = text

1489

return text_map, final_content

1490

1491

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1492

pb=None):

1493

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1494

if version_ids is None:

1495

version_ids = self.versions()

1496

if pb is None:

1497

pb = progress.DummyProgress()

1498

# we don't care about inclusions, the caller cares.

1499

# but we need to setup a list of records to visit.

1500

# we need version_id, position, length

1501

version_id_records = []

1502

requested_versions = set(version_ids)

1503

# filter for available versions

1504

for version_id in requested_versions:

1505

if not self.has_version(version_id):

1506

raise RevisionNotPresent(version_id, self.filename)

1507

# get a in-component-order queue:

1508

for version_id in self.versions():

1509

if version_id in requested_versions:

1510

index_memo = self._index.get_position(version_id)

1511

version_id_records.append((version_id, index_memo))

1512

1513

total = len(version_id_records)

1514

for version_idx, (version_id, data, sha_value) in \

1515

enumerate(self._data.read_records_iter(version_id_records)):

1516

pb.update('Walking content.', version_idx, total)

1517

method = self._index.get_method(version_id)

1518

if method == 'fulltext':

1519

line_iterator = self.factory.get_fulltext_content(data)

1520

elif method == 'line-delta':

1521

line_iterator = self.factory.get_linedelta_content(data)

1522

else:

1523

raise ValueError('invalid method %r' % (method,))

1524

# XXX: It might be more efficient to yield (version_id,

1525

# line_iterator) in the future. However for now, this is a simpler

1526

# change to integrate into the rest of the codebase. RBC 20071110

1527

for line in line_iterator:

1528

yield line, version_id

1529

1530

pb.update('Walking content.', total, total)

1531

1532

def num_versions(self):

1533

"""See VersionedFile.num_versions()."""

1534

return self._index.num_versions()

1535

1536

__len__ = num_versions

1537

1538

def annotate(self, version_id):

1539

"""See VersionedFile.annotate."""

1540

return self.factory.annotate(self, version_id)

1541

1542

def get_parent_map(self, version_ids):

1543

"""See VersionedFile.get_parent_map."""

1544

return self._index.get_parent_map(version_ids)

1545

1546

def get_ancestry(self, versions, topo_sorted=True):

1547

"""See VersionedFile.get_ancestry."""

1548

if isinstance(versions, basestring):

1549

versions = [versions]

1550

if not versions:

1551

return []

1552

return self._index.get_ancestry(versions, topo_sorted)

1553

1554

def get_ancestry_with_ghosts(self, versions):

1555

"""See VersionedFile.get_ancestry_with_ghosts."""

1556

if isinstance(versions, basestring):

1557

versions = [versions]

1558

if not versions:

1559

return []

1560

return self._index.get_ancestry_with_ghosts(versions)

1561

1562

def plan_merge(self, ver_a, ver_b):

1563

"""See VersionedFile.plan_merge."""

1564

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1565

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1566

annotated_a = self.annotate(ver_a)

1567

annotated_b = self.annotate(ver_b)

1568

return merge._plan_annotate_merge(annotated_a, annotated_b,

1569

ancestors_a, ancestors_b)

1570

1571

1572

class _KnitComponentFile(object):

1573

"""One of the files used to implement a knit database"""

1574

1575

def __init__(self, transport, filename, mode, file_mode=None,

1576

create_parent_dir=False, dir_mode=None):

1577

self._transport = transport

1578

self._filename = filename

1579

self._mode = mode

1580

self._file_mode = file_mode

1581

self._dir_mode = dir_mode

1582

self._create_parent_dir = create_parent_dir

1583

self._need_to_create = False

1584

1585

def _full_path(self):

1586

"""Return the full path to this file."""

1587

return self._transport.base + self._filename

1588

1589

def check_header(self, fp):

1590

line = fp.readline()

1591

if line == '':

1592

# An empty file can actually be treated as though the file doesn't

1593

# exist yet.

1594

raise errors.NoSuchFile(self._full_path())

1595

if line != self.HEADER:

1596

raise KnitHeaderError(badline=line,

1597

filename=self._transport.abspath(self._filename))

1598

1599

def __repr__(self):

1600

return '%s(%s)' % (self.__class__.__name__, self._filename)

1601

1602

1603

class _KnitIndex(_KnitComponentFile):

1604

"""Manages knit index file.

1605

1606

The index is already kept in memory and read on startup, to enable

1607

fast lookups of revision information. The cursor of the index

1608

file is always pointing to the end, making it easy to append

1609

entries.

1610

1611

_cache is a cache for fast mapping from version id to a Index

1612

object.

1613

1614

_history is a cache for fast mapping from indexes to version ids.

1615

1616

The index data format is dictionary compressed when it comes to

1617

parent references; a index entry may only have parents that with a

1618

lover index number. As a result, the index is topological sorted.

1619

1620

Duplicate entries may be written to the index for a single version id

1621

if this is done then the latter one completely replaces the former:

1622

this allows updates to correct version and parent information.

1623

Note that the two entries may share the delta, and that successive

1624

annotations and references MUST point to the first entry.

1625

1626

The index file on disc contains a header, followed by one line per knit

1627

record. The same revision can be present in an index file more than once.

1628

The first occurrence gets assigned a sequence number starting from 0.

1629

1630

The format of a single line is

1631

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1632

REVISION_ID is a utf8-encoded revision id

1633

FLAGS is a comma separated list of flags about the record. Values include

1634

no-eol, line-delta, fulltext.

1635

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1636

that the the compressed data starts at.

1637

LENGTH is the ascii representation of the length of the data file.

1638

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1639

REVISION_ID.

1640

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1641

revision id already in the knit that is a parent of REVISION_ID.

1642

The ' :' marker is the end of record marker.

1643

1644

partial writes:

1645

when a write is interrupted to the index file, it will result in a line

1646

that does not end in ' :'. If the ' :' is not present at the end of a line,

1647

or at the end of the file, then the record that is missing it will be

1648

ignored by the parser.

1649

1650

When writing new records to the index file, the data is preceded by '\n'

1651

to ensure that records always start on new lines even if the last write was

1652

interrupted. As a result its normal for the last line in the index to be

1653

missing a trailing newline. One can be added with no harmful effects.

1654

"""

1655

1656

HEADER = "# bzr knit index 8\n"

1657

1658

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1659

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1660

1661

def _cache_version(self, version_id, options, pos, size, parents):

1662

"""Cache a version record in the history array and index cache.

1663

1664

This is inlined into _load_data for performance. KEEP IN SYNC.

1665

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1666

indexes).

1667

"""

1668

# only want the _history index to reference the 1st index entry

1669

# for version_id

1670

if version_id not in self._cache:

1671

index = len(self._history)

1672

self._history.append(version_id)

1673

else:

1674

index = self._cache[version_id][5]

1675

self._cache[version_id] = (version_id,

1676

options,

1677

pos,

1678

size,

1679

parents,

1680

index)

1681

1682

def _check_write_ok(self):

1683

if self._get_scope() != self._scope:

1684

raise errors.OutSideTransaction()

1685

if self._mode != 'w':

1686

raise errors.ReadOnlyObjectDirtiedError(self)

1687

1688

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1689

create_parent_dir=False, delay_create=False, dir_mode=None,

1690

get_scope=None):

1691

_KnitComponentFile.__init__(self, transport, filename, mode,

1692

file_mode=file_mode,

1693

create_parent_dir=create_parent_dir,

1694

dir_mode=dir_mode)

1695

self._cache = {}

1696

# position in _history is the 'official' index for a revision

1697

# but the values may have come from a newer entry.

1698

# so - wc -l of a knit index is != the number of unique names

1699

# in the knit.

1700

self._history = []

1701

try:

1702

fp = self._transport.get(self._filename)

1703

try:

1704

# _load_data may raise NoSuchFile if the target knit is

1705

# completely empty.

1706

_load_data(self, fp)

1707

finally:

1708

fp.close()

1709

except NoSuchFile:

1710

if mode != 'w' or not create:

1711

raise

1712

elif delay_create:

1713

self._need_to_create = True

1714

else:

1715

self._transport.put_bytes_non_atomic(

1716

self._filename, self.HEADER, mode=self._file_mode)

1717

self._scope = get_scope()

1718

self._get_scope = get_scope

1719

1720

def get_ancestry(self, versions, topo_sorted=True):

1721

"""See VersionedFile.get_ancestry."""

1722

# get a graph of all the mentioned versions:

1723

graph = {}

1724

pending = set(versions)

1725

cache = self._cache

1726

while pending:

1727

version = pending.pop()

1728

# trim ghosts

1729

try:

1730

parents = [p for p in cache[version][4] if p in cache]

1731

except KeyError:

1732

raise RevisionNotPresent(version, self._filename)

1733

# if not completed and not a ghost

1734

pending.update([p for p in parents if p not in graph])

1735

graph[version] = parents

1736

if not topo_sorted:

1737

return graph.keys()

1738

return topo_sort(graph.items())

1739

1740

def get_ancestry_with_ghosts(self, versions):

1741

"""See VersionedFile.get_ancestry_with_ghosts."""

1742

# get a graph of all the mentioned versions:

1743

self.check_versions_present(versions)

1744

cache = self._cache

1745

graph = {}

1746

pending = set(versions)

1747

while pending:

1748

version = pending.pop()

1749

try:

1750

parents = cache[version][4]

1751

except KeyError:

1752

# ghost, fake it

1753

graph[version] = []

1754

else:

1755

# if not completed

1756

pending.update([p for p in parents if p not in graph])

1757

graph[version] = parents

1758

return topo_sort(graph.items())

1759

1760

def get_build_details(self, version_ids):

1761

"""Get the method, index_memo and compression parent for version_ids.

1762

1763

Ghosts are omitted from the result.

1764

1765

:param version_ids: An iterable of version_ids.

1766

:return: A dict of version_id:(index_memo, compression_parent,

1767

parents, record_details).

1768

index_memo

1769

opaque structure to pass to read_records to extract the raw

1770

data

1771

compression_parent

1772

Content that this record is built upon, may be None

1773

parents

1774

Logical parents of this node

1775

record_details

1776

extra information about the content which needs to be passed to

1777

Factory.parse_record

1778

"""

1779

result = {}

1780

for version_id in version_ids:

1781

if version_id not in self._cache:

1782

# ghosts are omitted

1783

continue

1784

method = self.get_method(version_id)

1785

parents = self.get_parents_with_ghosts(version_id)

1786

if method == 'fulltext':

1787

compression_parent = None

1788

else:

1789

compression_parent = parents[0]

1790

noeol = 'no-eol' in self.get_options(version_id)

1791

index_memo = self.get_position(version_id)

1792

result[version_id] = (index_memo, compression_parent,

1793

parents, (method, noeol))

1794

return result

1795

1796

def num_versions(self):

1797

return len(self._history)

1798

1799

__len__ = num_versions

1800

1801

def get_versions(self):

1802

"""Get all the versions in the file. not topologically sorted."""

1803

return self._history

1804

1805

def _version_list_to_index(self, versions):

1806

result_list = []

1807

cache = self._cache

1808

for version in versions:

1809

if version in cache:

1810

# -- inlined lookup() --

1811

result_list.append(str(cache[version][5]))

1812

# -- end lookup () --

1813

else:

1814

result_list.append('.' + version)

1815

return ' '.join(result_list)

1816

1817

def add_version(self, version_id, options, index_memo, parents):

1818

"""Add a version record to the index."""

1819

self.add_versions(((version_id, options, index_memo, parents),))

1820

1821

def add_versions(self, versions, random_id=False):

1822

"""Add multiple versions to the index.

1823

1824

:param versions: a list of tuples:

1825

(version_id, options, pos, size, parents).

1826

:param random_id: If True the ids being added were randomly generated

1827

and no check for existence will be performed.

1828

"""

1829

lines = []

1830

orig_history = self._history[:]

1831

orig_cache = self._cache.copy()

1832

1833

try:

1834

for version_id, options, (index, pos, size), parents in versions:

1835

line = "\n%s %s %s %s %s :" % (version_id,

1836

','.join(options),

1837

pos,

1838

size,

1839

self._version_list_to_index(parents))

1840

lines.append(line)

1841

self._cache_version(version_id, options, pos, size, tuple(parents))

1842

if not self._need_to_create:

1843

self._transport.append_bytes(self._filename, ''.join(lines))

1844

else:

1845

sio = StringIO()

1846

sio.write(self.HEADER)

1847

sio.writelines(lines)

1848

sio.seek(0)

1849

self._transport.put_file_non_atomic(self._filename, sio,

1850

create_parent_dir=self._create_parent_dir,

1851

mode=self._file_mode,

1852

dir_mode=self._dir_mode)

1853

self._need_to_create = False

1854

except:

1855

# If any problems happen, restore the original values and re-raise

1856

self._history = orig_history

1857

self._cache = orig_cache

1858

raise

1859

1860

def has_version(self, version_id):

1861

"""True if the version is in the index."""

1862

return version_id in self._cache

1863

1864

def get_position(self, version_id):

1865

"""Return details needed to access the version.

1866

1867

.kndx indices do not support split-out data, so return None for the

1868

index field.

1869

1870

:return: a tuple (None, data position, size) to hand to the access

1871

logic to get the record.

1872

"""

1873

entry = self._cache[version_id]

1874

return None, entry[2], entry[3]

1875

1876

def get_method(self, version_id):

1877

"""Return compression method of specified version."""

1878

try:

1879

options = self._cache[version_id][1]

1880

except KeyError:

1881

raise RevisionNotPresent(version_id, self._filename)

1882

if 'fulltext' in options:

1883

return 'fulltext'

1884

else:

1885

if 'line-delta' not in options:

1886

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1887

return 'line-delta'

1888

1889

def get_options(self, version_id):

1890

"""Return a list representing options.

1891

1892

e.g. ['foo', 'bar']

1893

"""

1894

return self._cache[version_id][1]

1895

1896

def get_parent_map(self, version_ids):

1897

"""Passed through to by KnitVersionedFile.get_parent_map."""

1898

result = {}

1899

for version_id in version_ids:

1900

try:

1901

result[version_id] = tuple(self._cache[version_id][4])

1902

except KeyError:

1903

pass

1904

return result

1905

1906

def get_parents_with_ghosts(self, version_id):

1907

"""Return parents of specified version with ghosts."""

1908

try:

1909

return self.get_parent_map([version_id])[version_id]

1910

except KeyError:

1911

raise RevisionNotPresent(version_id, self)

1912

1913

def check_versions_present(self, version_ids):

1914

"""Check that all specified versions are present."""

1915

cache = self._cache

1916

for version_id in version_ids:

1917

if version_id not in cache:

1918

raise RevisionNotPresent(version_id, self._filename)

1919

1920

1921

class KnitGraphIndex(object):

1922

"""A knit index that builds on GraphIndex."""

1923

1924

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1925

"""Construct a KnitGraphIndex on a graph_index.

1926

1927

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1928

:param deltas: Allow delta-compressed records.

1929

:param add_callback: If not None, allow additions to the index and call

1930

this callback with a list of added GraphIndex nodes:

1931

[(node, value, node_refs), ...]

1932

:param parents: If True, record knits parents, if not do not record

1933

parents.

1934

"""

1935

self._graph_index = graph_index

1936

self._deltas = deltas

1937

self._add_callback = add_callback

1938

self._parents = parents

1939

if deltas and not parents:

1940

raise KnitCorrupt(self, "Cannot do delta compression without "

1941

"parent tracking.")

1942

1943

def _check_write_ok(self):

1944

pass

1945

1946

def _get_entries(self, keys, check_present=False):

1947

"""Get the entries for keys.

1948

1949

:param keys: An iterable of index keys, - 1-tuples.

1950

"""

1951

keys = set(keys)

1952

found_keys = set()

1953

if self._parents:

1954

for node in self._graph_index.iter_entries(keys):

1955

yield node

1956

found_keys.add(node[1])

1957

else:

1958

# adapt parentless index to the rest of the code.

1959

for node in self._graph_index.iter_entries(keys):

1960

yield node[0], node[1], node[2], ()

1961

found_keys.add(node[1])

1962

if check_present:

1963

missing_keys = keys.difference(found_keys)

1964

if missing_keys:

1965

raise RevisionNotPresent(missing_keys.pop(), self)

1966

1967

def _present_keys(self, version_ids):

1968

return set([

1969

node[1] for node in self._get_entries(version_ids)])

1970

1971

def _parentless_ancestry(self, versions):

1972

"""Honour the get_ancestry API for parentless knit indices."""

1973

wanted_keys = self._version_ids_to_keys(versions)

1974

present_keys = self._present_keys(wanted_keys)

1975

missing = set(wanted_keys).difference(present_keys)

1976

if missing:

1977

raise RevisionNotPresent(missing.pop(), self)

1978

return list(self._keys_to_version_ids(present_keys))

1979

1980

def get_ancestry(self, versions, topo_sorted=True):

1981

"""See VersionedFile.get_ancestry."""

1982

if not self._parents:

1983

return self._parentless_ancestry(versions)

1984

# XXX: This will do len(history) index calls - perhaps

1985

# it should be altered to be a index core feature?

1986

# get a graph of all the mentioned versions:

1987

graph = {}

1988

ghosts = set()

1989

versions = self._version_ids_to_keys(versions)

1990

pending = set(versions)

1991

while pending:

1992

# get all pending nodes

1993

this_iteration = pending

1994

new_nodes = self._get_entries(this_iteration)

1995

found = set()

1996

pending = set()

1997

for (index, key, value, node_refs) in new_nodes:

1998

# dont ask for ghosties - otherwise

1999

# we we can end up looping with pending

2000

# being entirely ghosted.

2001

graph[key] = [parent for parent in node_refs[0]

2002

if parent not in ghosts]

2003

# queue parents

2004

for parent in graph[key]:

2005

# dont examine known nodes again

2006

if parent in graph:

2007

continue

2008

pending.add(parent)

2009

found.add(key)

2010

ghosts.update(this_iteration.difference(found))

2011

if versions.difference(graph):

2012

raise RevisionNotPresent(versions.difference(graph).pop(), self)

2013

if topo_sorted:

2014

result_keys = topo_sort(graph.items())

2015

else:

2016

result_keys = graph.iterkeys()

2017

return [key[0] for key in result_keys]

2018

2019

def get_ancestry_with_ghosts(self, versions):

2020

"""See VersionedFile.get_ancestry."""

2021

if not self._parents:

2022

return self._parentless_ancestry(versions)

2023

# XXX: This will do len(history) index calls - perhaps

2024

# it should be altered to be a index core feature?

2025

# get a graph of all the mentioned versions:

2026

graph = {}

2027

versions = self._version_ids_to_keys(versions)

2028

pending = set(versions)

2029

while pending:

2030

# get all pending nodes

2031

this_iteration = pending

2032

new_nodes = self._get_entries(this_iteration)

2033

pending = set()

2034

for (index, key, value, node_refs) in new_nodes:

2035

graph[key] = node_refs[0]

2036

# queue parents

2037

for parent in graph[key]:

2038

# dont examine known nodes again

2039

if parent in graph:

2040

continue

2041

pending.add(parent)

2042

missing_versions = this_iteration.difference(graph)

2043

missing_needed = versions.intersection(missing_versions)

2044

if missing_needed:

2045

raise RevisionNotPresent(missing_needed.pop(), self)

2046

for missing_version in missing_versions:

2047

# add a key, no parents

2048

graph[missing_version] = []

2049

pending.discard(missing_version) # don't look for it

2050

result_keys = topo_sort(graph.items())

2051

return [key[0] for key in result_keys]

2052

2053

def get_build_details(self, version_ids):

2054

"""Get the method, index_memo and compression parent for version_ids.

2055

2056

Ghosts are omitted from the result.

2057

2058

:param version_ids: An iterable of version_ids.

2059

:return: A dict of version_id:(index_memo, compression_parent,

2060

parents, record_details).

2061

index_memo

2062

opaque structure to pass to read_records to extract the raw

2063

data

2064

compression_parent

2065

Content that this record is built upon, may be None

2066

parents

2067

Logical parents of this node

2068

record_details

2069

extra information about the content which needs to be passed to

2070

Factory.parse_record

2071

"""

2072

result = {}

2073

entries = self._get_entries(self._version_ids_to_keys(version_ids), True)

2074

for entry in entries:

2075

version_id = self._keys_to_version_ids((entry[1],))[0]

2076

if not self._parents:

2077

parents = ()

2078

else:

2079

parents = self._keys_to_version_ids(entry[3][0])

2080

if not self._deltas:

2081

compression_parent = None

2082

else:

2083

compression_parent_key = self._compression_parent(entry)

2084

if compression_parent_key:

2085

compression_parent = self._keys_to_version_ids(

2086

(compression_parent_key,))[0]

2087

else:

2088

compression_parent = None

2089

noeol = (entry[2][0] == 'N')

2090

if compression_parent:

2091

method = 'line-delta'

2092

else:

2093

method = 'fulltext'

2094

result[version_id] = (self._node_to_position(entry),

2095

compression_parent, parents,

2096

(method, noeol))

2097

return result

2098

2099

def _compression_parent(self, an_entry):

2100

# return the key that an_entry is compressed against, or None

2101

# Grab the second parent list (as deltas implies parents currently)

2102

compression_parents = an_entry[3][1]

2103

if not compression_parents:

2104

return None

2105

return compression_parents[0]

2106

2107

def _get_method(self, node):

2108

if not self._deltas:

2109

return 'fulltext'

2110

if self._compression_parent(node):

2111

return 'line-delta'

2112

else:

2113

return 'fulltext'

2114

2115

def num_versions(self):

2116

return len(list(self._graph_index.iter_all_entries()))

2117

2118

__len__ = num_versions

2119

2120

def get_versions(self):

2121

"""Get all the versions in the file. not topologically sorted."""

2122

return [node[1][0] for node in self._graph_index.iter_all_entries()]

2123

2124

def has_version(self, version_id):

2125

"""True if the version is in the index."""

2126

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

2127

2128

def _keys_to_version_ids(self, keys):

2129

return tuple(key[0] for key in keys)

2130

2131

def get_position(self, version_id):

2132

"""Return details needed to access the version.

2133

2134

:return: a tuple (index, data position, size) to hand to the access

2135

logic to get the record.

2136

"""

2137

node = self._get_node(version_id)

2138

return self._node_to_position(node)

2139

2140

def _node_to_position(self, node):

2141

"""Convert an index value to position details."""

2142

bits = node[2][1:].split(' ')

2143

return node[0], int(bits[0]), int(bits[1])

2144

2145

def get_method(self, version_id):

2146

"""Return compression method of specified version."""

2147

return self._get_method(self._get_node(version_id))

2148

2149

def _get_node(self, version_id):

2150

try:

2151

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

2152

except IndexError:

2153

raise RevisionNotPresent(version_id, self)

2154

2155

def get_options(self, version_id):

2156

"""Return a list representing options.

2157

2158

e.g. ['foo', 'bar']

2159

"""

2160

node = self._get_node(version_id)

2161

options = [self._get_method(node)]

2162

if node[2][0] == 'N':

2163

options.append('no-eol')

2164

return options

2165

2166

def get_parent_map(self, version_ids):

2167

"""Passed through to by KnitVersionedFile.get_parent_map."""

2168

nodes = self._get_entries(self._version_ids_to_keys(version_ids))

2169

result = {}

2170

if self._parents:

2171

for node in nodes:

2172

result[node[1][0]] = self._keys_to_version_ids(node[3][0])

2173

else:

2174

for node in nodes:

2175

result[node[1][0]] = ()

2176

return result

2177

2178

def get_parents_with_ghosts(self, version_id):

2179

"""Return parents of specified version with ghosts."""

2180

try:

2181

return self.get_parent_map([version_id])[version_id]

2182

except KeyError:

2183

raise RevisionNotPresent(version_id, self)

2184

2185

def check_versions_present(self, version_ids):

2186

"""Check that all specified versions are present."""

2187

keys = self._version_ids_to_keys(version_ids)

2188

present = self._present_keys(keys)

2189

missing = keys.difference(present)

2190

if missing:

2191

raise RevisionNotPresent(missing.pop(), self)

2192

2193

def add_version(self, version_id, options, access_memo, parents):

2194

"""Add a version record to the index."""

2195

return self.add_versions(((version_id, options, access_memo, parents),))

2196

2197

def add_versions(self, versions, random_id=False):

2198

"""Add multiple versions to the index.

2199

2200

This function does not insert data into the Immutable GraphIndex

2201

backing the KnitGraphIndex, instead it prepares data for insertion by

2202

the caller and checks that it is safe to insert then calls

2203

self._add_callback with the prepared GraphIndex nodes.

2204

2205

:param versions: a list of tuples:

2206

(version_id, options, pos, size, parents).

2207

:param random_id: If True the ids being added were randomly generated

2208

and no check for existence will be performed.

2209

"""

2210

if not self._add_callback:

2211

raise errors.ReadOnlyError(self)

2212

# we hope there are no repositories with inconsistent parentage

2213

# anymore.

2214

# check for dups

2215

2216

keys = {}

2217

for (version_id, options, access_memo, parents) in versions:

2218

index, pos, size = access_memo

2219

key = (version_id, )

2220

parents = tuple((parent, ) for parent in parents)

2221

if 'no-eol' in options:

2222

value = 'N'

2223

else:

2224

value = ' '

2225

value += "%d %d" % (pos, size)

2226

if not self._deltas:

2227

if 'line-delta' in options:

2228

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2229

if self._parents:

2230

if self._deltas:

2231

if 'line-delta' in options:

2232

node_refs = (parents, (parents[0],))

2233

else:

2234

node_refs = (parents, ())

2235

else:

2236

node_refs = (parents, )

2237

else:

2238

if parents:

2239

raise KnitCorrupt(self, "attempt to add node with parents "

2240

"in parentless index.")

2241

node_refs = ()

2242

keys[key] = (value, node_refs)

2243

if not random_id:

2244

present_nodes = self._get_entries(keys)

2245

for (index, key, value, node_refs) in present_nodes:

2246

if (value, node_refs) != keys[key]:

2247

raise KnitCorrupt(self, "inconsistent details in add_versions"

2248

": %s %s" % ((value, node_refs), keys[key]))

2249

del keys[key]

2250

result = []

2251

if self._parents:

2252

for key, (value, node_refs) in keys.iteritems():

2253

result.append((key, value, node_refs))

2254

else:

2255

for key, (value, node_refs) in keys.iteritems():

2256

result.append((key, value))

2257

self._add_callback(result)

2258

2259

def _version_ids_to_keys(self, version_ids):

2260

return set((version_id, ) for version_id in version_ids)

2261

2262

2263

class _KnitAccess(object):

2264

"""Access to knit records in a .knit file."""

2265

2266

def __init__(self, transport, filename, _file_mode, _dir_mode,

2267

_need_to_create, _create_parent_dir):

2268

"""Create a _KnitAccess for accessing and inserting data.

2269

2270

:param transport: The transport the .knit is located on.

2271

:param filename: The filename of the .knit.

2272

"""

2273

self._transport = transport

2274

self._filename = filename

2275

self._file_mode = _file_mode

2276

self._dir_mode = _dir_mode

2277

self._need_to_create = _need_to_create

2278

self._create_parent_dir = _create_parent_dir

2279

2280

def __repr__(self):

2281

try:

2282

return "%s(%r)" % (self.__class__.__name__,

2283

self._transport.abspath(self._filename))

2284

except:

2285

return "_KnitAccess(**unprintable**)"

2286

2287

def add_raw_records(self, sizes, raw_data):

2288

"""Add raw knit bytes to a storage area.

2289

2290

The data is spooled to whereever the access method is storing data.

2291

2292

:param sizes: An iterable containing the size of each raw data segment.

2293

:param raw_data: A bytestring containing the data.

2294

:return: A list of memos to retrieve the record later. Each memo is a

2295

tuple - (index, pos, length), where the index field is always None

2296

for the .knit access method.

2297

"""

2298

if not self._need_to_create:

2299

base = self._transport.append_bytes(self._filename, raw_data)

2300

else:

2301

self._transport.put_bytes_non_atomic(self._filename, raw_data,

2302

create_parent_dir=self._create_parent_dir,

2303

mode=self._file_mode,

2304

dir_mode=self._dir_mode)

2305

self._need_to_create = False

2306

base = 0

2307

result = []

2308

for size in sizes:

2309

result.append((None, base, size))

2310

base += size

2311

return result

2312

2313

def create(self):

2314

"""IFF this data access has its own storage area, initialise it.

2315

2316

:return: None.

2317

"""

2318

self._transport.put_bytes_non_atomic(self._filename, '',

2319

mode=self._file_mode)

2320

2321

def open_file(self):

2322

"""IFF this data access can be represented as a single file, open it.

2323

2324

For knits that are not mapped to a single file on disk this will

2325

always return None.

2326

2327

:return: None or a file handle.

2328

"""

2329

try:

2330

return self._transport.get(self._filename)

2331

except NoSuchFile:

2332

pass

2333

return None

2334

2335

def get_raw_records(self, memos_for_retrieval):

2336

"""Get the raw bytes for a records.

2337

2338

:param memos_for_retrieval: An iterable containing the (index, pos,

2339

length) memo for retrieving the bytes. The .knit method ignores

2340

the index as there is always only a single file.

2341

:return: An iterator over the bytes of the records.

2342

"""

2343

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

2344

for pos, data in self._transport.readv(self._filename, read_vector):

2345

yield data

2346

2347

2348

class _PackAccess(object):

2349

"""Access to knit records via a collection of packs."""

2350

2351

def __init__(self, index_to_packs, writer=None):

2352

"""Create a _PackAccess object.

2353

2354

:param index_to_packs: A dict mapping index objects to the transport

2355

and file names for obtaining data.

2356

:param writer: A tuple (pack.ContainerWriter, write_index) which

2357

contains the pack to write, and the index that reads from it will

2358

be associated with.

2359

"""

2360

if writer:

2361

self.container_writer = writer[0]

2362

self.write_index = writer[1]

2363

else:

2364

self.container_writer = None

2365

self.write_index = None

2366

self.indices = index_to_packs

2367

2368

def add_raw_records(self, sizes, raw_data):

2369

"""Add raw knit bytes to a storage area.

2370

2371

The data is spooled to the container writer in one bytes-record per

2372

raw data item.

2373

2374

:param sizes: An iterable containing the size of each raw data segment.

2375

:param raw_data: A bytestring containing the data.

2376

:return: A list of memos to retrieve the record later. Each memo is a

2377

tuple - (index, pos, length), where the index field is the

2378

write_index object supplied to the PackAccess object.

2379

"""

2380

result = []

2381

offset = 0

2382

for size in sizes:

2383

p_offset, p_length = self.container_writer.add_bytes_record(

2384

raw_data[offset:offset+size], [])

2385

offset += size

2386

result.append((self.write_index, p_offset, p_length))

2387

return result

2388

2389

def create(self):

2390

"""Pack based knits do not get individually created."""

2391

2392

def get_raw_records(self, memos_for_retrieval):

2393

"""Get the raw bytes for a records.

2394

2395

:param memos_for_retrieval: An iterable containing the (index, pos,

2396

length) memo for retrieving the bytes. The Pack access method

2397

looks up the pack to use for a given record in its index_to_pack

2398

map.

2399

:return: An iterator over the bytes of the records.

2400

"""

2401

# first pass, group into same-index requests

2402

request_lists = []

2403

current_index = None

2404

for (index, offset, length) in memos_for_retrieval:

2405

if current_index == index:

2406

current_list.append((offset, length))

2407

else:

2408

if current_index is not None:

2409

request_lists.append((current_index, current_list))

2410

current_index = index

2411

current_list = [(offset, length)]

2412

# handle the last entry

2413

if current_index is not None:

2414

request_lists.append((current_index, current_list))

2415

for index, offsets in request_lists:

2416

transport, path = self.indices[index]

2417

reader = pack.make_readv_reader(transport, path, offsets)

2418

for names, read_func in reader.iter_records():

2419

yield read_func(None)

2420

2421

def open_file(self):

2422

"""Pack based knits have no single file."""

2423

return None

2424

2425

def set_writer(self, writer, index, (transport, packname)):

2426

"""Set a writer to use for adding data."""

2427

if index is not None:

2428

self.indices[index] = (transport, packname)

2429

self.container_writer = writer

2430

self.write_index = index

2431

2432

2433

class _StreamAccess(object):

2434

"""A Knit Access object that provides data from a datastream.

2435

2436

It also provides a fallback to present as unannotated data, annotated data

2437

from a *backing* access object.

2438

2439

This is triggered by a index_memo which is pointing to a different index

2440

than this was constructed with, and is used to allow extracting full

2441

unannotated texts for insertion into annotated knits.

2442

"""

2443

2444

def __init__(self, reader_callable, stream_index, backing_knit,

2445

orig_factory):

2446

"""Create a _StreamAccess object.

2447

2448

:param reader_callable: The reader_callable from the datastream.

2449

This is called to buffer all the data immediately, for

2450

random access.

2451

:param stream_index: The index the data stream this provides access to

2452

which will be present in native index_memo's.

2453

:param backing_knit: The knit object that will provide access to

2454

annotated texts which are not available in the stream, so as to

2455

create unannotated texts.

2456

:param orig_factory: The original content factory used to generate the

2457

stream. This is used for checking whether the thunk code for

2458

supporting _copy_texts will generate the correct form of data.

2459

"""

2460

self.data = reader_callable(None)

2461

self.stream_index = stream_index

2462

self.backing_knit = backing_knit

2463

self.orig_factory = orig_factory

2464

2465

def get_raw_records(self, memos_for_retrieval):

2466

"""Get the raw bytes for a records.

2467

2468

:param memos_for_retrieval: An iterable of memos from the

2469

_StreamIndex object identifying bytes to read; for these classes

2470

they are (from_backing_knit, index, start, end) and can point to

2471

either the backing knit or streamed data.

2472

:return: An iterator yielding a byte string for each record in

2473

memos_for_retrieval.

2474

"""

2475

# use a generator for memory friendliness

2476

for from_backing_knit, version_id, start, end in memos_for_retrieval:

2477

if not from_backing_knit:

2478

if version_id is not self.stream_index:

2479

raise AssertionError()

2480

yield self.data[start:end]

2481

continue

2482

# we have been asked to thunk. This thunking only occurs when

2483

# we are obtaining plain texts from an annotated backing knit

2484

# so that _copy_texts will work.

2485

# We could improve performance here by scanning for where we need

2486

# to do this and using get_line_list, then interleaving the output

2487

# as desired. However, for now, this is sufficient.

2488

if self.orig_factory.__class__ != KnitPlainFactory:

2489

raise errors.KnitCorrupt(

2490

self, 'Bad thunk request %r cannot be backed by %r' %

2491

(version_id, self.orig_factory))

2492

lines = self.backing_knit.get_lines(version_id)

2493

line_bytes = ''.join(lines)

2494

digest = sha_string(line_bytes)

2495

# the packed form of the fulltext always has a trailing newline,

2496

# even if the actual text does not, unless the file is empty. the

2497

# record options including the noeol flag are passed through by

2498

# _StreamIndex, so this is safe.

2499

if lines:

2500

if lines[-1][-1] != '\n':

2501

lines[-1] = lines[-1] + '\n'

2502

line_bytes += '\n'

2503

# We want plain data, because we expect to thunk only to allow text

2504

# extraction.

2505

size, bytes = self.backing_knit._data._record_to_data(version_id,

2506

digest, lines, line_bytes)

2507

yield bytes

2508

2509

2510

class _StreamIndex(object):

2511

"""A Knit Index object that uses the data map from a datastream."""

2512

2513

def __init__(self, data_list, backing_index):

2514

"""Create a _StreamIndex object.

2515

2516

:param data_list: The data_list from the datastream.

2517

:param backing_index: The index which will supply values for nodes

2518

referenced outside of this stream.

2519

"""

2520

self.data_list = data_list

2521

self.backing_index = backing_index

2522

self._by_version = {}

2523

pos = 0

2524

for key, options, length, parents in data_list:

2525

self._by_version[key] = options, (pos, pos + length), parents

2526

pos += length

2527

2528

def get_ancestry(self, versions, topo_sorted):

2529

"""Get an ancestry list for versions."""

2530

if topo_sorted:

2531

# Not needed for basic joins

2532

raise NotImplementedError(self.get_ancestry)

2533

# get a graph of all the mentioned versions:

2534

# Little ugly - basically copied from KnitIndex, but don't want to

2535

# accidentally incorporate too much of that index's code.

2536

ancestry = set()

2537

pending = set(versions)

2538

cache = self._by_version

2539

while pending:

2540

version = pending.pop()

2541

# trim ghosts

2542

try:

2543

parents = [p for p in cache[version][2] if p in cache]

2544

except KeyError:

2545

raise RevisionNotPresent(version, self)

2546

# if not completed and not a ghost

2547

pending.update([p for p in parents if p not in ancestry])

2548

ancestry.add(version)

2549

return list(ancestry)

2550

2551

def get_build_details(self, version_ids):

2552

"""Get the method, index_memo and compression parent for version_ids.

2553

2554

Ghosts are omitted from the result.

2555

2556

:param version_ids: An iterable of version_ids.

2557

:return: A dict of version_id:(index_memo, compression_parent,

2558

parents, record_details).

2559

index_memo

2560

opaque memo that can be passed to _StreamAccess.read_records

2561

to extract the raw data; for these classes it is

2562

(from_backing_knit, index, start, end)

2563

compression_parent

2564

Content that this record is built upon, may be None

2565

parents

2566

Logical parents of this node

2567

record_details

2568

extra information about the content which needs to be passed to

2569

Factory.parse_record

2570

"""

2571

result = {}

2572

for version_id in version_ids:

2573

try:

2574

method = self.get_method(version_id)

2575

except errors.RevisionNotPresent:

2576

# ghosts are omitted

2577

continue

2578

parent_ids = self.get_parents_with_ghosts(version_id)

2579

noeol = ('no-eol' in self.get_options(version_id))

2580

index_memo = self.get_position(version_id)

2581

from_backing_knit = index_memo[0]

2582

if from_backing_knit:

2583

# texts retrieved from the backing knit are always full texts

2584

method = 'fulltext'

2585

if method == 'fulltext':

2586

compression_parent = None

2587

else:

2588

compression_parent = parent_ids[0]

2589

result[version_id] = (index_memo, compression_parent,

2590

parent_ids, (method, noeol))

2591

return result

2592

2593

def get_method(self, version_id):

2594

"""Return compression method of specified version."""

2595

options = self.get_options(version_id)

2596

if 'fulltext' in options:

2597

return 'fulltext'

2598

elif 'line-delta' in options:

2599

return 'line-delta'

2600

else:

2601

raise errors.KnitIndexUnknownMethod(self, options)

2602

2603

def get_options(self, version_id):

2604

"""Return a list representing options.

2605

2606

e.g. ['foo', 'bar']

2607

"""

2608

try:

2609

return self._by_version[version_id][0]

2610

except KeyError:

2611

options = list(self.backing_index.get_options(version_id))

2612

if 'fulltext' in options:

2613

pass

2614

elif 'line-delta' in options:

2615

# Texts from the backing knit are always returned from the stream

2616

# as full texts

2617

options.remove('line-delta')

2618

options.append('fulltext')

2619

else:

2620

raise errors.KnitIndexUnknownMethod(self, options)

2621

return tuple(options)

2622

2623

def get_parent_map(self, version_ids):

2624

"""Passed through to by KnitVersionedFile.get_parent_map."""

2625

result = {}

2626

pending_ids = set()

2627

for version_id in version_ids:

2628

try:

2629

result[version_id] = self._by_version[version_id][2]

2630

except KeyError:

2631

pending_ids.add(version_id)

2632

result.update(self.backing_index.get_parent_map(pending_ids))

2633

return result

2634

2635

def get_parents_with_ghosts(self, version_id):

2636

"""Return parents of specified version with ghosts."""

2637

try:

2638

return self.get_parent_map([version_id])[version_id]

2639

except KeyError:

2640

raise RevisionNotPresent(version_id, self)

2641

2642

def get_position(self, version_id):

2643

"""Return details needed to access the version.

2644

2645

_StreamAccess has the data as a big array, so we return slice

2646

coordinates into that (as index_memo's are opaque outside the

2647

index and matching access class).

2648

2649

:return: a tuple (from_backing_knit, index, start, end) that can

2650

be passed e.g. to get_raw_records.

2651

If from_backing_knit is False, index will be self, otherwise it

2652

will be a version id.

2653

"""

2654

try:

2655

start, end = self._by_version[version_id][1]

2656

return False, self, start, end

2657

except KeyError:

2658

# Signal to the access object to handle this from the backing knit.

2659

return (True, version_id, None, None)

2660

2661

def get_versions(self):

2662

"""Get all the versions in the stream."""

2663

return self._by_version.keys()

2664

2665

2666

class _KnitData(object):

2667

"""Manage extraction of data from a KnitAccess, caching and decompressing.

2668

2669

The KnitData class provides the logic for parsing and using knit records,

2670

making use of an access method for the low level read and write operations.

2671

"""

2672

2673

def __init__(self, access):

2674

"""Create a KnitData object.

2675

2676

:param access: The access method to use. Access methods such as

2677

_KnitAccess manage the insertion of raw records and the subsequent

2678

retrieval of the same.

2679

"""

2680

self._access = access

2681

self._checked = False

2682

2683

def _open_file(self):

2684

return self._access.open_file()

2685

2686

def _record_to_data(self, version_id, digest, lines, dense_lines=None):

2687

"""Convert version_id, digest, lines into a raw data block.

2688

2689

:param dense_lines: The bytes of lines but in a denser form. For

2690

instance, if lines is a list of 1000 bytestrings each ending in \n,

2691

dense_lines may be a list with one line in it, containing all the

2692

1000's lines and their \n's. Using dense_lines if it is already

2693

known is a win because the string join to create bytes in this

2694

function spends less time resizing the final string.

2695

:return: (len, a StringIO instance with the raw data ready to read.)

2696

"""

2697

# Note: using a string copy here increases memory pressure with e.g.

2698

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

2699

# when doing the initial commit of a mozilla tree. RBC 20070921

2700

bytes = ''.join(chain(

2701

["version %s %d %s\n" % (version_id,

2702

len(lines),

2703

digest)],

2704

dense_lines or lines,

2705

["end %s\n" % version_id]))

2706

compressed_bytes = bytes_to_gzip(bytes)

2707

return len(compressed_bytes), compressed_bytes

2708

2709

def add_raw_records(self, sizes, raw_data):

2710

"""Append a prepared record to the data file.

2711

2712

:param sizes: An iterable containing the size of each raw data segment.

2713

:param raw_data: A bytestring containing the data.

2714

:return: a list of index data for the way the data was stored.

2715

See the access method add_raw_records documentation for more

2716

details.

2717

"""

2718

return self._access.add_raw_records(sizes, raw_data)

2719

2720

def _parse_record_header(self, version_id, raw_data):

2721

"""Parse a record header for consistency.

2722

2723

:return: the header and the decompressor stream.

2724

as (stream, header_record)

2725

"""

2726

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2727

try:

2728

rec = self._check_header(version_id, df.readline())

2729

except Exception, e:

2730

raise KnitCorrupt(self._access,

2731

"While reading {%s} got %s(%s)"

2732

% (version_id, e.__class__.__name__, str(e)))

2733

return df, rec

2734

2735

def _split_header(self, line):

2736

rec = line.split()

2737

if len(rec) != 4:

2738

raise KnitCorrupt(self._access,

2739

'unexpected number of elements in record header')

2740

return rec

2741

2742

def _check_header_version(self, rec, version_id):

2743

if rec[1] != version_id:

2744

raise KnitCorrupt(self._access,

2745

'unexpected version, wanted %r, got %r'

2746

% (version_id, rec[1]))

2747

2748

def _check_header(self, version_id, line):

2749

rec = self._split_header(line)

2750

self._check_header_version(rec, version_id)

2751

return rec

2752

2753

def _parse_record_unchecked(self, data):

2754

# profiling notes:

2755

# 4168 calls in 2880 217 internal

2756

# 4168 calls to _parse_record_header in 2121

2757

# 4168 calls to readlines in 330

2758

df = GzipFile(mode='rb', fileobj=StringIO(data))

2759

try:

2760

record_contents = df.readlines()

2761

except Exception, e:

2762

raise KnitCorrupt(self._access, "Corrupt compressed record %r, got %s(%s)" %

2763

(data, e.__class__.__name__, str(e)))

2764

header = record_contents.pop(0)

2765

rec = self._split_header(header)

2766

last_line = record_contents.pop()

2767

if len(record_contents) != int(rec[2]):

2768

raise KnitCorrupt(self._access,

2769

'incorrect number of lines %s != %s'

2770

' for version {%s}'

2771

% (len(record_contents), int(rec[2]),

2772

rec[1]))

2773

if last_line != 'end %s\n' % rec[1]:

2774

raise KnitCorrupt(self._access,

2775

'unexpected version end line %r, wanted %r'

2776

% (last_line, rec[1]))

2777

df.close()

2778

return rec, record_contents

2779

2780

def _parse_record(self, version_id, data):

2781

rec, record_contents = self._parse_record_unchecked(data)

2782

self._check_header_version(rec, version_id)

2783

return record_contents, rec[3]

2784

2785

def read_records_iter_raw(self, records):

2786

"""Read text records from data file and yield raw data.

2787

2788

This unpacks enough of the text record to validate the id is

2789

as expected but thats all.

2790

2791

Each item the iterator yields is (version_id, bytes,

2792

sha1_of_full_text).

2793

"""

2794

# setup an iterator of the external records:

2795

# uses readv so nice and fast we hope.

2796

if len(records):

2797

# grab the disk data needed.

2798

needed_offsets = [index_memo for version_id, index_memo

2799

in records]

2800

raw_records = self._access.get_raw_records(needed_offsets)

2801

2802

for version_id, index_memo in records:

2803

data = raw_records.next()

2804

# validate the header

2805

df, rec = self._parse_record_header(version_id, data)

2806

df.close()

2807

yield version_id, data, rec[3]

2808

2809

def read_records_iter(self, records):

2810

"""Read text records from data file and yield result.

2811

2812

The result will be returned in whatever is the fastest to read.

2813

Not by the order requested. Also, multiple requests for the same

2814

record will only yield 1 response.

2815

:param records: A list of (version_id, pos, len) entries

2816

:return: Yields (version_id, contents, digest) in the order

2817

read, not the order requested

2818

"""

2819

if not records:

2820

return

2821

2822

needed_records = sorted(set(records), key=operator.itemgetter(1))

2823

if not needed_records:

2824

return

2825

2826

# The transport optimizes the fetching as well

2827

# (ie, reads continuous ranges.)

2828

raw_data = self._access.get_raw_records(

2829

[index_memo for version_id, index_memo in needed_records])

2830

2831

for (version_id, index_memo), data in \

2832

izip(iter(needed_records), raw_data):

2833

content, digest = self._parse_record(version_id, data)

2834

yield version_id, content, digest

2835

2836

def read_records(self, records):

2837

"""Read records into a dictionary."""

2838

components = {}

2839

for record_id, content, digest in \

2840

self.read_records_iter(records):

2841

components[record_id] = (content, digest)

2842

return components

2843

2844

2845

class InterKnit(InterVersionedFile):

2846

"""Optimised code paths for knit to knit operations."""

2847

2848

_matching_file_from_factory = staticmethod(make_file_knit)

2849

_matching_file_to_factory = staticmethod(make_file_knit)

2850

2851

@staticmethod

2852

def is_compatible(source, target):

2853

"""Be compatible with knits. """

2854

try:

2855

return (isinstance(source, KnitVersionedFile) and

2856

isinstance(target, KnitVersionedFile))

2857

except AttributeError:

2858

return False

2859

2860

def _copy_texts(self, pb, msg, version_ids, ignore_missing=False):

2861

"""Copy texts to the target by extracting and adding them one by one.

2862

2863

see join() for the parameter definitions.

2864

"""

2865

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2866

# --- the below is factorable out with VersionedFile.join, but wait for

2867

# VersionedFiles, it may all be simpler then.

2868

graph = Graph(self.source)

2869

search = graph._make_breadth_first_searcher(version_ids)

2870

transitive_ids = set()

2871

map(transitive_ids.update, list(search))

2872

parent_map = self.source.get_parent_map(transitive_ids)

2873

order = topo_sort(parent_map.items())

2874

2875

def size_of_content(content):

2876

return sum(len(line) for line in content.text())

2877

# Cache at most 10MB of parent texts

2878

parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,

2879

compute_size=size_of_content)

2880

# TODO: jam 20071116 It would be nice to have a streaming interface to

2881

# get multiple texts from a source. The source could be smarter

2882

# about how it handled intermediate stages.

2883

# get_line_list() or make_mpdiffs() seem like a possibility, but

2884

# at the moment they extract all full texts into memory, which

2885

# causes us to store more than our 3x fulltext goal.

2886

# Repository.iter_files_bytes() may be another possibility

2887

to_process = [version for version in order

2888

if version not in self.target]

2889

total = len(to_process)

2890

pb = ui.ui_factory.nested_progress_bar()

2891

try:

2892

for index, version in enumerate(to_process):

2893

pb.update('Converting versioned data', index, total)

2894

sha1, num_bytes, parent_text = self.target.add_lines(version,

2895

self.source.get_parents_with_ghosts(version),

2896

self.source.get_lines(version),

2897

parent_texts=parent_cache)

2898

parent_cache[version] = parent_text

2899

finally:

2900

pb.finished()

2901

return total

2902

2903

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2904

"""See InterVersionedFile.join."""

2905

# If the source and target are mismatched w.r.t. annotations vs

2906

# plain, the data needs to be converted accordingly

2907

if self.source.factory.annotated == self.target.factory.annotated:

2908

converter = None

2909

elif self.source.factory.annotated:

2910

converter = self._anno_to_plain_converter

2911

else:

2912

# We're converting from a plain to an annotated knit. Copy them

2913

# across by full texts.

2914

return self._copy_texts(pb, msg, version_ids, ignore_missing)

2915

2916

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2917

if not version_ids:

2918

return 0

2919

2920

pb = ui.ui_factory.nested_progress_bar()

2921

try:

2922

version_ids = list(version_ids)

2923

if None in version_ids:

2924

version_ids.remove(None)

2925

2926

self.source_ancestry = set(self.source.get_ancestry(version_ids,

2927

topo_sorted=False))

2928

this_versions = set(self.target._index.get_versions())

2929

# XXX: For efficiency we should not look at the whole index,

2930

# we only need to consider the referenced revisions - they

2931

# must all be present, or the method must be full-text.

2932

# TODO, RBC 20070919

2933

needed_versions = self.source_ancestry - this_versions

2934

2935

if not needed_versions:

2936

return 0

2937

full_list = topo_sort(

2938

self.source.get_parent_map(self.source.versions()))

2939

2940

version_list = [i for i in full_list if (not self.target.has_version(i)

2941

and i in needed_versions)]

2942

2943

# plan the join:

2944

copy_queue = []

2945

copy_queue_records = []

2946

copy_set = set()

2947

for version_id in version_list:

2948

options = self.source._index.get_options(version_id)

2949

parents = self.source._index.get_parents_with_ghosts(version_id)

2950

# check that its will be a consistent copy:

2951

for parent in parents:

2952

# if source has the parent, we must :

2953

# * already have it or

2954

# * have it scheduled already

2955

# otherwise we don't care

2956

if not (self.target.has_version(parent) or

2957

parent in copy_set or

2958

not self.source.has_version(parent)):

2959

raise AssertionError("problem joining parent %r "

2960

"from %r to %r"

2961

% (parent, self.source, self.target))

2962

index_memo = self.source._index.get_position(version_id)

2963

copy_queue_records.append((version_id, index_memo))

2964

copy_queue.append((version_id, options, parents))

2965

copy_set.add(version_id)

2966

2967

# data suck the join:

2968

count = 0

2969

total = len(version_list)

2970

raw_datum = []

2971

raw_records = []

2972

for (version_id, raw_data, _), \

2973

(version_id2, options, parents) in \

2974

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2975

copy_queue):

2976

if not (version_id == version_id2):

2977

raise AssertionError('logic error, inconsistent results')

2978

count = count + 1

2979

pb.update("Joining knit", count, total)

2980

if converter:

2981

size, raw_data = converter(raw_data, version_id, options,

2982

parents)

2983

else:

2984

size = len(raw_data)

2985

raw_records.append((version_id, options, parents, size))

2986

raw_datum.append(raw_data)

2987

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2988

return count

2989

finally:

2990

pb.finished()

2991

2992

def _anno_to_plain_converter(self, raw_data, version_id, options,

2993

parents):

2994

"""Convert annotated content to plain content."""

2995

data, digest = self.source._data._parse_record(version_id, raw_data)

2996

if 'fulltext' in options:

2997

content = self.source.factory.parse_fulltext(data, version_id)

2998

lines = self.target.factory.lower_fulltext(content)

2999

else:

3000

delta = self.source.factory.parse_line_delta(data, version_id,

3001

plain=True)

3002

lines = self.target.factory.lower_line_delta(delta)

3003

return self.target._data._record_to_data(version_id, digest, lines)

3004

3005

3006

InterVersionedFile.register_optimiser(InterKnit)

3007

3008

3009

class WeaveToKnit(InterVersionedFile):

3010

"""Optimised code paths for weave to knit operations."""

3011

3012

_matching_file_from_factory = bzrlib.weave.WeaveFile

3013

_matching_file_to_factory = staticmethod(make_file_knit)

3014

3015

@staticmethod

3016

def is_compatible(source, target):

3017

"""Be compatible with weaves to knits."""

3018

try:

3019

return (isinstance(source, bzrlib.weave.Weave) and

3020

isinstance(target, KnitVersionedFile))

3021

except AttributeError:

3022

return False

3023

3024

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

3025

"""See InterVersionedFile.join."""

3026

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

3027

3028

if not version_ids:

3029

return 0

3030

3031

pb = ui.ui_factory.nested_progress_bar()

3032

try:

3033

version_ids = list(version_ids)

3034

3035

self.source_ancestry = set(self.source.get_ancestry(version_ids))

3036

this_versions = set(self.target._index.get_versions())

3037

needed_versions = self.source_ancestry - this_versions

3038

3039

if not needed_versions:

3040

return 0

3041

full_list = topo_sort(

3042

self.source.get_parent_map(self.source.versions()))

3043

3044

version_list = [i for i in full_list if (not self.target.has_version(i)

3045

and i in needed_versions)]

3046

3047

# do the join:

3048

count = 0

3049

total = len(version_list)

3050

parent_map = self.source.get_parent_map(version_list)

3051

for version_id in version_list:

3052

pb.update("Converting to knit", count, total)

3053

parents = parent_map[version_id]

3054

# check that its will be a consistent copy:

3055

for parent in parents:

3056

# if source has the parent, we must already have it

3057

if not self.target.has_version(parent):

3058

raise AssertionError("%r does not have parent %r"

3059

% (self.target, parent))

3060

self.target.add_lines(

3061

version_id, parents, self.source.get_lines(version_id))

3062

count = count + 1

3063

return count

3064

finally:

3065

pb.finished()

3066

3067

3068

InterVersionedFile.register_optimiser(WeaveToKnit)

3069

3070

3071

# Deprecated, use PatienceSequenceMatcher instead

3072

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

3073

3074

3075

def annotate_knit(knit, revision_id):

3076

"""Annotate a knit with no cached annotations.

3077

3078

This implementation is for knits with no cached annotations.

3079

It will work for knits with cached annotations, but this is not

3080

recommended.

3081

"""

3082

annotator = _KnitAnnotator(knit)

3083

return iter(annotator.annotate(revision_id))

3084

3085

3086

class _KnitAnnotator(object):

3087

"""Build up the annotations for a text."""

3088

3089

def __init__(self, knit):

3090

self._knit = knit

3091

3092

# Content objects, differs from fulltexts because of how final newlines

3093

# are treated by knits. the content objects here will always have a

3094

# final newline

3095

self._fulltext_contents = {}

3096

3097

# Annotated lines of specific revisions

3098

self._annotated_lines = {}

3099

3100

# Track the raw data for nodes that we could not process yet.

3101

# This maps the revision_id of the base to a list of children that will

3102

# annotated from it.

3103

self._pending_children = {}

3104

3105

# Nodes which cannot be extracted

3106

self._ghosts = set()

3107

3108

# Track how many children this node has, so we know if we need to keep

3109

# it

3110

self._annotate_children = {}

3111

self._compression_children = {}

3112

3113

self._all_build_details = {}

3114

# The children => parent revision_id graph

3115

self._revision_id_graph = {}

3116

3117

self._heads_provider = None

3118

3119

self._nodes_to_keep_annotations = set()

3120

self._generations_until_keep = 100

3121

3122

def set_generations_until_keep(self, value):

3123

"""Set the number of generations before caching a node.

3124

3125

Setting this to -1 will cache every merge node, setting this higher

3126

will cache fewer nodes.

3127

"""

3128

self._generations_until_keep = value

3129

3130

def _add_fulltext_content(self, revision_id, content_obj):

3131

self._fulltext_contents[revision_id] = content_obj

3132

# TODO: jam 20080305 It might be good to check the sha1digest here

3133

return content_obj.text()

3134

3135

def _check_parents(self, child, nodes_to_annotate):

3136

"""Check if all parents have been processed.

3137

3138

:param child: A tuple of (rev_id, parents, raw_content)

3139

:param nodes_to_annotate: If child is ready, add it to

3140

nodes_to_annotate, otherwise put it back in self._pending_children

3141

"""

3142

for parent_id in child[1]:

3143

if (parent_id not in self._annotated_lines):

3144

# This parent is present, but another parent is missing

3145

self._pending_children.setdefault(parent_id,

3146

[]).append(child)

3147

break

3148

else:

3149

# This one is ready to be processed

3150

nodes_to_annotate.append(child)

3151

3152

def _add_annotation(self, revision_id, fulltext, parent_ids,

3153

left_matching_blocks=None):

3154

"""Add an annotation entry.

3155

3156

All parents should already have been annotated.

3157

:return: A list of children that now have their parents satisfied.

3158

"""

3159

a = self._annotated_lines

3160

annotated_parent_lines = [a[p] for p in parent_ids]

3161

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3162

fulltext, revision_id, left_matching_blocks,

3163

heads_provider=self._get_heads_provider()))

3164

self._annotated_lines[revision_id] = annotated_lines

3165

for p in parent_ids:

3166

ann_children = self._annotate_children[p]

3167

ann_children.remove(revision_id)

3168

if (not ann_children

3169

and p not in self._nodes_to_keep_annotations):

3170

del self._annotated_lines[p]

3171

del self._all_build_details[p]

3172

if p in self._fulltext_contents:

3173

del self._fulltext_contents[p]

3174

# Now that we've added this one, see if there are any pending

3175

# deltas to be done, certainly this parent is finished

3176

nodes_to_annotate = []

3177

for child in self._pending_children.pop(revision_id, []):

3178

self._check_parents(child, nodes_to_annotate)

3179

return nodes_to_annotate

3180

3181

def _get_build_graph(self, revision_id):

3182

"""Get the graphs for building texts and annotations.

3183

3184

The data you need for creating a full text may be different than the

3185

data you need to annotate that text. (At a minimum, you need both

3186

parents to create an annotation, but only need 1 parent to generate the

3187

fulltext.)

3188

3189

:return: A list of (revision_id, index_memo) records, suitable for

3190

passing to read_records_iter to start reading in the raw data fro/

3191

the pack file.

3192

"""

3193

if revision_id in self._annotated_lines:

3194

# Nothing to do

3195

return []

3196

pending = set([revision_id])

3197

records = []

3198

generation = 0

3199

kept_generation = 0

3200

while pending:

3201

# get all pending nodes

3202

generation += 1

3203

this_iteration = pending

3204

build_details = self._knit._index.get_build_details(this_iteration)

3205

self._all_build_details.update(build_details)

3206

# new_nodes = self._knit._index._get_entries(this_iteration)

3207

pending = set()

3208

for rev_id, details in build_details.iteritems():

3209

(index_memo, compression_parent, parents,

3210

record_details) = details

3211

self._revision_id_graph[rev_id] = parents

3212

records.append((rev_id, index_memo))

3213

# Do we actually need to check _annotated_lines?

3214

pending.update(p for p in parents

3215

if p not in self._all_build_details)

3216

if compression_parent:

3217

self._compression_children.setdefault(compression_parent,

3218

[]).append(rev_id)

3219

if parents:

3220

for parent in parents:

3221

self._annotate_children.setdefault(parent,

3222

[]).append(rev_id)

3223

num_gens = generation - kept_generation

3224

if ((num_gens >= self._generations_until_keep)

3225

and len(parents) > 1):

3226

kept_generation = generation

3227

self._nodes_to_keep_annotations.add(rev_id)

3228

3229

missing_versions = this_iteration.difference(build_details.keys())

3230

self._ghosts.update(missing_versions)

3231

for missing_version in missing_versions:

3232

# add a key, no parents

3233

self._revision_id_graph[missing_version] = ()

3234

pending.discard(missing_version) # don't look for it

3235

if self._ghosts.intersection(self._compression_children):

3236

raise KnitCorrupt(

3237

"We cannot have nodes which have a ghost compression parent:\n"

3238

"ghosts: %r\n"

3239

"compression children: %r"

3240

% (self._ghosts, self._compression_children))

3241

# Cleanout anything that depends on a ghost so that we don't wait for

3242

# the ghost to show up

3243

for node in self._ghosts:

3244

if node in self._annotate_children:

3245

# We won't be building this node

3246

del self._annotate_children[node]

3247

# Generally we will want to read the records in reverse order, because

3248

# we find the parent nodes after the children

3249

records.reverse()

3250

return records

3251

3252

def _annotate_records(self, records):

3253

"""Build the annotations for the listed records."""

3254

# We iterate in the order read, rather than a strict order requested

3255

# However, process what we can, and put off to the side things that

3256

# still need parents, cleaning them up when those parents are

3257

# processed.

3258

for (rev_id, record,

3259

digest) in self._knit._data.read_records_iter(records):

3260

if rev_id in self._annotated_lines:

3261

continue

3262

parent_ids = self._revision_id_graph[rev_id]

3263

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3264

details = self._all_build_details[rev_id]

3265

(index_memo, compression_parent, parents,

3266

record_details) = details

3267

nodes_to_annotate = []

3268

# TODO: Remove the punning between compression parents, and

3269

# parent_ids, we should be able to do this without assuming

3270

# the build order

3271

if len(parent_ids) == 0:

3272

# There are no parents for this node, so just add it

3273

# TODO: This probably needs to be decoupled

3274

fulltext_content, delta = self._knit.factory.parse_record(

3275

rev_id, record, record_details, None)

3276

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3277

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3278

parent_ids, left_matching_blocks=None))

3279

else:

3280

child = (rev_id, parent_ids, record)

3281

# Check if all the parents are present

3282

self._check_parents(child, nodes_to_annotate)

3283

while nodes_to_annotate:

3284

# Should we use a queue here instead of a stack?

3285

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3286

(index_memo, compression_parent, parents,

3287

record_details) = self._all_build_details[rev_id]

3288

if compression_parent is not None:

3289

comp_children = self._compression_children[compression_parent]

3290

if rev_id not in comp_children:

3291

raise AssertionError("%r not in compression children %r"

3292

% (rev_id, comp_children))

3293

# If there is only 1 child, it is safe to reuse this

3294

# content

3295

reuse_content = (len(comp_children) == 1

3296

and compression_parent not in

3297

self._nodes_to_keep_annotations)

3298

if reuse_content:

3299

# Remove it from the cache since it will be changing

3300

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3301

# Make sure to copy the fulltext since it might be

3302

# modified

3303

parent_fulltext = list(parent_fulltext_content.text())

3304

else:

3305

parent_fulltext_content = self._fulltext_contents[compression_parent]

3306

parent_fulltext = parent_fulltext_content.text()

3307

comp_children.remove(rev_id)

3308

fulltext_content, delta = self._knit.factory.parse_record(

3309

rev_id, record, record_details,

3310

parent_fulltext_content,

3311

copy_base_content=(not reuse_content))

3312

fulltext = self._add_fulltext_content(rev_id,

3313

fulltext_content)

3314

blocks = KnitContent.get_line_delta_blocks(delta,

3315

parent_fulltext, fulltext)

3316

else:

3317

fulltext_content = self._knit.factory.parse_fulltext(

3318

record, rev_id)

3319

fulltext = self._add_fulltext_content(rev_id,

3320

fulltext_content)

3321

blocks = None

3322

nodes_to_annotate.extend(

3323

self._add_annotation(rev_id, fulltext, parent_ids,

3324

left_matching_blocks=blocks))

3325

3326

def _get_heads_provider(self):

3327

"""Create a heads provider for resolving ancestry issues."""

3328

if self._heads_provider is not None:

3329

return self._heads_provider

3330

parent_provider = _mod_graph.DictParentsProvider(

3331

self._revision_id_graph)

3332

graph_obj = _mod_graph.Graph(parent_provider)

3333

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3334

self._heads_provider = head_cache

3335

return head_cache

3336

3337

def annotate(self, revision_id):

3338

"""Return the annotated fulltext at the given revision.

3339

3340

:param revision_id: The revision id for this file

3341

"""

3342

records = self._get_build_graph(revision_id)

3343

if revision_id in self._ghosts:

3344

raise errors.RevisionNotPresent(revision_id, self._knit)

3345

self._annotate_records(records)

3346

return self._annotated_lines[revision_id]

3347

3348

3349

try:

3350

from bzrlib._knit_load_data_c import _load_data_c as _load_data

3351

except ImportError:

3352

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »