/brz/remove-bazaar : revision 3224.1.13

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2008-02-26 22:26:00 UTC
mto: This revision was merged to the branch mainline in revision 3280.
Revision ID: john@arbash-meinel.com-20080226222600-6wsja3pv4kelorug

Revert the _get_component_positions api

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bzr_access

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/best_practice_intro.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/revnos.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/index.txt

generate_docs.py

index.txt

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

graph as _mod_graph,

lru_cache,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

100

KnitHeaderError,

101

RevisionNotPresent,

102

RevisionAlreadyPresent,

103

)

104

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

105

from bzrlib.osutils import (

106

contains_whitespace,

107

contains_linebreaks,

108

sha_string,

109

sha_strings,

110

)

111

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

112

from bzrlib.tsort import topo_sort

113

import bzrlib.ui

114

import bzrlib.weave

115

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

116

117

118

# TODO: Split out code specific to this format into an associated object.

119

120

# TODO: Can we put in some kind of value to check that the index and data

121

# files belong together?

122

123

# TODO: accommodate binaries, perhaps by storing a byte count

124

125

# TODO: function to check whole file

126

127

# TODO: atomically append data, then measure backwards from the cursor

128

# position after writing to work out where it was located. we may need to

129

# bypass python file buffering.

130

131

DATA_SUFFIX = '.knit'

132

INDEX_SUFFIX = '.kndx'

133

134

135

class KnitContent(object):

136

"""Content of a knit version to which deltas can be applied."""

137

138

def annotate(self):

139

"""Return a list of (origin, text) tuples."""

140

return list(self.annotate_iter())

141

142

def apply_delta(self, delta, new_version_id):

143

"""Apply delta to this object to become new_version_id."""

144

raise NotImplementedError(self.apply_delta)

145

146

def line_delta_iter(self, new_lines):

147

"""Generate line-based delta from this content to new_lines."""

148

new_texts = new_lines.text()

149

old_texts = self.text()

150

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

151

for tag, i1, i2, j1, j2 in s.get_opcodes():

152

if tag == 'equal':

153

continue

154

# ofrom, oto, length, data

155

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

156

157

def line_delta(self, new_lines):

158

return list(self.line_delta_iter(new_lines))

159

160

@staticmethod

161

def get_line_delta_blocks(knit_delta, source, target):

162

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

163

target_len = len(target)

164

s_pos = 0

165

t_pos = 0

166

for s_begin, s_end, t_len, new_text in knit_delta:

167

true_n = s_begin - s_pos

168

n = true_n

169

if n > 0:

170

# knit deltas do not provide reliable info about whether the

171

# last line of a file matches, due to eol handling.

172

if source[s_pos + n -1] != target[t_pos + n -1]:

173

n-=1

174

if n > 0:

175

yield s_pos, t_pos, n

176

t_pos += t_len + true_n

177

s_pos = s_end

178

n = target_len - t_pos

179

if n > 0:

180

if source[s_pos + n -1] != target[t_pos + n -1]:

181

n-=1

182

if n > 0:

183

yield s_pos, t_pos, n

184

yield s_pos + (target_len - t_pos), target_len, 0

185

186

187

class AnnotatedKnitContent(KnitContent):

188

"""Annotated content."""

189

190

def __init__(self, lines):

191

self._lines = lines

192

193

def annotate_iter(self):

194

"""Yield tuples of (origin, text) for each content line."""

195

return iter(self._lines)

196

197

def apply_delta(self, delta, new_version_id):

198

"""Apply delta to this object to become new_version_id."""

199

offset = 0

200

lines = self._lines

201

for start, end, count, delta_lines in delta:

202

lines[offset+start:offset+end] = delta_lines

203

offset = offset + (start - end) + count

204

205

def strip_last_line_newline(self):

206

line = self._lines[-1][1].rstrip('\n')

207

self._lines[-1] = (self._lines[-1][0], line)

208

209

def text(self):

210

try:

211

return [text for origin, text in self._lines]

212

except ValueError, e:

213

# most commonly (only?) caused by the internal form of the knit

214

# missing annotation information because of a bug - see thread

215

# around 20071015

216

raise KnitCorrupt(self,

217

"line in annotated knit missing annotation information: %s"

218

% (e,))

219

220

def copy(self):

221

return AnnotatedKnitContent(self._lines[:])

222

223

224

class PlainKnitContent(KnitContent):

225

"""Unannotated content.

226

227

When annotate[_iter] is called on this content, the same version is reported

228

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

229

objects.

230

"""

231

232

def __init__(self, lines, version_id):

233

self._lines = lines

234

self._version_id = version_id

235

236

def annotate_iter(self):

237

"""Yield tuples of (origin, text) for each content line."""

238

for line in self._lines:

239

yield self._version_id, line

240

241

def apply_delta(self, delta, new_version_id):

242

"""Apply delta to this object to become new_version_id."""

243

offset = 0

244

lines = self._lines

245

for start, end, count, delta_lines in delta:

246

lines[offset+start:offset+end] = delta_lines

247

offset = offset + (start - end) + count

248

self._version_id = new_version_id

249

250

def copy(self):

251

return PlainKnitContent(self._lines[:], self._version_id)

252

253

def strip_last_line_newline(self):

254

self._lines[-1] = self._lines[-1].rstrip('\n')

255

256

def text(self):

257

return self._lines

258

259

260

class KnitAnnotateFactory(object):

261

"""Factory for creating annotated Content objects."""

262

263

annotated = True

264

265

def make(self, lines, version_id):

266

num_lines = len(lines)

267

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

268

269

def parse_fulltext(self, content, version_id):

270

"""Convert fulltext to internal representation

271

272

fulltext content is of the format

273

revid(utf8) plaintext\n

274

internal representation is of the format:

275

(revid, plaintext)

276

"""

277

# TODO: jam 20070209 The tests expect this to be returned as tuples,

278

# but the code itself doesn't really depend on that.

279

# Figure out a way to not require the overhead of turning the

280

# list back into tuples.

281

lines = [tuple(line.split(' ', 1)) for line in content]

282

return AnnotatedKnitContent(lines)

283

284

def parse_line_delta_iter(self, lines):

285

return iter(self.parse_line_delta(lines))

286

287

def parse_line_delta(self, lines, version_id, plain=False):

288

"""Convert a line based delta into internal representation.

289

290

line delta is in the form of:

291

intstart intend intcount

292

1..count lines:

293

revid(utf8) newline\n

294

internal representation is

295

(start, end, count, [1..count tuples (revid, newline)])

296

297

:param plain: If True, the lines are returned as a plain

298

list without annotations, not as a list of (origin, content) tuples, i.e.

299

(start, end, count, [1..count newline])

300

"""

301

result = []

302

lines = iter(lines)

303

next = lines.next

304

305

cache = {}

306

def cache_and_return(line):

307

origin, text = line.split(' ', 1)

308

return cache.setdefault(origin, origin), text

309

310

# walk through the lines parsing.

311

# Note that the plain test is explicitly pulled out of the

312

# loop to minimise any performance impact

313

if plain:

314

for header in lines:

315

start, end, count = [int(n) for n in header.split(',')]

316

contents = [next().split(' ', 1)[1] for i in xrange(count)]

317

result.append((start, end, count, contents))

318

else:

319

for header in lines:

320

start, end, count = [int(n) for n in header.split(',')]

321

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

322

result.append((start, end, count, contents))

323

return result

324

325

def get_fulltext_content(self, lines):

326

"""Extract just the content lines from a fulltext."""

327

return (line.split(' ', 1)[1] for line in lines)

328

329

def get_linedelta_content(self, lines):

330

"""Extract just the content from a line delta.

331

332

This doesn't return all of the extra information stored in a delta.

333

Only the actual content lines.

334

"""

335

lines = iter(lines)

336

next = lines.next

337

for header in lines:

338

header = header.split(',')

339

count = int(header[2])

340

for i in xrange(count):

341

origin, text = next().split(' ', 1)

342

yield text

343

344

def lower_fulltext(self, content):

345

"""convert a fulltext content record into a serializable form.

346

347

see parse_fulltext which this inverts.

348

"""

349

# TODO: jam 20070209 We only do the caching thing to make sure that

350

# the origin is a valid utf-8 line, eventually we could remove it

351

return ['%s %s' % (o, t) for o, t in content._lines]

352

353

def lower_line_delta(self, delta):

354

"""convert a delta into a serializable form.

355

356

See parse_line_delta which this inverts.

357

"""

358

# TODO: jam 20070209 We only do the caching thing to make sure that

359

# the origin is a valid utf-8 line, eventually we could remove it

360

out = []

361

for start, end, c, lines in delta:

362

out.append('%d,%d,%d\n' % (start, end, c))

363

out.extend(origin + ' ' + text

364

for origin, text in lines)

365

return out

366

367

def annotate_iter(self, knit, version_id):

368

content = knit._get_content(version_id)

369

return content.annotate_iter()

370

371

372

class KnitPlainFactory(object):

373

"""Factory for creating plain Content objects."""

374

375

annotated = False

376

377

def make(self, lines, version_id):

378

return PlainKnitContent(lines, version_id)

379

380

def parse_fulltext(self, content, version_id):

381

"""This parses an unannotated fulltext.

382

383

Note that this is not a noop - the internal representation

384

has (versionid, line) - its just a constant versionid.

385

"""

386

return self.make(content, version_id)

387

388

def parse_line_delta_iter(self, lines, version_id):

389

cur = 0

390

num_lines = len(lines)

391

while cur < num_lines:

392

header = lines[cur]

393

cur += 1

394

start, end, c = [int(n) for n in header.split(',')]

395

yield start, end, c, lines[cur:cur+c]

396

cur += c

397

398

def parse_line_delta(self, lines, version_id):

399

return list(self.parse_line_delta_iter(lines, version_id))

400

401

def get_fulltext_content(self, lines):

402

"""Extract just the content lines from a fulltext."""

403

return iter(lines)

404

405

def get_linedelta_content(self, lines):

406

"""Extract just the content from a line delta.

407

408

This doesn't return all of the extra information stored in a delta.

409

Only the actual content lines.

410

"""

411

lines = iter(lines)

412

next = lines.next

413

for header in lines:

414

header = header.split(',')

415

count = int(header[2])

416

for i in xrange(count):

417

yield next()

418

419

def lower_fulltext(self, content):

420

return content.text()

421

422

def lower_line_delta(self, delta):

423

out = []

424

for start, end, c, lines in delta:

425

out.append('%d,%d,%d\n' % (start, end, c))

426

out.extend(lines)

427

return out

428

429

def annotate_iter(self, knit, version_id):

430

annotator = _KnitAnnotator(knit)

431

return iter(annotator.get_annotated_lines(version_id))

432

433

434

def make_empty_knit(transport, relpath):

435

"""Construct a empty knit at the specified location."""

436

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

437

438

439

class KnitVersionedFile(VersionedFile):

440

"""Weave-like structure with faster random access.

441

442

A knit stores a number of texts and a summary of the relationships

443

between them. Texts are identified by a string version-id. Texts

444

are normally stored and retrieved as a series of lines, but can

445

also be passed as single strings.

446

447

Lines are stored with the trailing newline (if any) included, to

448

avoid special cases for files with no final newline. Lines are

449

composed of 8-bit characters, not unicode. The combination of

450

these approaches should mean any 'binary' file can be safely

451

stored and retrieved.

452

"""

453

454

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

455

factory=None, delta=True, create=False, create_parent_dir=False,

456

delay_create=False, dir_mode=None, index=None, access_method=None):

457

"""Construct a knit at location specified by relpath.

458

459

:param create: If not True, only open an existing knit.

460

:param create_parent_dir: If True, create the parent directory if

461

creating the file fails. (This is used for stores with

462

hash-prefixes that may not exist yet)

463

:param delay_create: The calling code is aware that the knit won't

464

actually be created until the first data is stored.

465

:param index: An index to use for the knit.

466

"""

467

if access_mode is None:

468

access_mode = 'w'

469

super(KnitVersionedFile, self).__init__(access_mode)

470

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

471

self.transport = transport

472

self.filename = relpath

473

self.factory = factory or KnitAnnotateFactory()

474

self.writable = (access_mode == 'w')

475

self.delta = delta

476

477

self._max_delta_chain = 200

478

479

if index is None:

480

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

481

access_mode, create=create, file_mode=file_mode,

482

create_parent_dir=create_parent_dir, delay_create=delay_create,

483

dir_mode=dir_mode)

484

else:

485

self._index = index

486

if access_method is None:

487

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

488

((create and not len(self)) and delay_create), create_parent_dir)

489

else:

490

_access = access_method

491

if create and not len(self) and not delay_create:

492

_access.create()

493

self._data = _KnitData(_access)

494

495

def __repr__(self):

496

return '%s(%s)' % (self.__class__.__name__,

497

self.transport.abspath(self.filename))

498

499

def _check_should_delta(self, first_parents):

500

"""Iterate back through the parent listing, looking for a fulltext.

501

502

This is used when we want to decide whether to add a delta or a new

503

fulltext. It searches for _max_delta_chain parents. When it finds a

504

fulltext parent, it sees if the total size of the deltas leading up to

505

it is large enough to indicate that we want a new full text anyway.

506

507

Return True if we should create a new delta, False if we should use a

508

full text.

509

"""

510

delta_size = 0

511

fulltext_size = None

512

delta_parents = first_parents

513

for count in xrange(self._max_delta_chain):

514

parent = delta_parents[0]

515

method = self._index.get_method(parent)

516

index, pos, size = self._index.get_position(parent)

517

if method == 'fulltext':

518

fulltext_size = size

519

break

520

delta_size += size

521

delta_parents = self._index.get_parents(parent)

522

else:

523

# We couldn't find a fulltext, so we must create a new one

524

return False

525

526

return fulltext_size > delta_size

527

528

def _add_raw_records(self, records, data):

529

"""Add all the records 'records' with data pre-joined in 'data'.

530

531

:param records: A list of tuples(version_id, options, parents, size).

532

:param data: The data for the records. When it is written, the records

533

are adjusted to have pos pointing into data by the sum of

534

the preceding records sizes.

535

"""

536

# write all the data

537

raw_record_sizes = [record[3] for record in records]

538

positions = self._data.add_raw_records(raw_record_sizes, data)

539

offset = 0

540

index_entries = []

541

for (version_id, options, parents, size), access_memo in zip(

542

records, positions):

543

index_entries.append((version_id, options, access_memo, parents))

544

if self._data._do_cache:

545

self._data._cache[version_id] = data[offset:offset+size]

546

offset += size

547

self._index.add_versions(index_entries)

548

549

def enable_cache(self):

550

"""Start caching data for this knit"""

551

self._data.enable_cache()

552

553

def clear_cache(self):

554

"""Clear the data cache only."""

555

self._data.clear_cache()

556

557

def copy_to(self, name, transport):

558

"""See VersionedFile.copy_to()."""

559

# copy the current index to a temp index to avoid racing with local

560

# writes

561

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

562

self.transport.get(self._index._filename))

563

# copy the data file

564

f = self._data._open_file()

565

try:

566

transport.put_file(name + DATA_SUFFIX, f)

567

finally:

568

f.close()

569

# move the copied index into place

570

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

571

572

def create_empty(self, name, transport, mode=None):

573

return KnitVersionedFile(name, transport, factory=self.factory,

574

delta=self.delta, create=True)

575

576

def get_data_stream(self, required_versions):

577

"""Get a data stream for the specified versions.

578

579

Versions may be returned in any order, not necessarily the order

580

specified. They are returned in a partial order by compression

581

parent, so that the deltas can be applied as the data stream is

582

inserted; however note that compression parents will not be sent

583

unless they were specifically requested, as the client may already

584

have them.

585

586

:param required_versions: The exact set of versions to be extracted.

587

Unlike some other knit methods, this is not used to generate a

588

transitive closure, rather it is used precisely as given.

589

590

:returns: format_signature, list of (version, options, length, parents),

591

reader_callable.

592

"""

593

required_version_set = frozenset(required_versions)

594

version_index = {}

595

# list of revisions that can just be sent without waiting for their

596

# compression parent

597

ready_to_send = []

598

# map from revision to the children based on it

599

deferred = {}

600

# first, read all relevant index data, enough to sort into the right

601

# order to return

602

for version_id in required_versions:

603

options = self._index.get_options(version_id)

604

parents = self._index.get_parents_with_ghosts(version_id)

605

index_memo = self._index.get_position(version_id)

606

version_index[version_id] = (index_memo, options, parents)

607

if ('line-delta' in options

608

and parents[0] in required_version_set):

609

# must wait until the parent has been sent

610

deferred.setdefault(parents[0], []). \

611

append(version_id)

612

else:

613

# either a fulltext, or a delta whose parent the client did

614

# not ask for and presumably already has

615

ready_to_send.append(version_id)

616

# build a list of results to return, plus instructions for data to

617

# read from the file

618

copy_queue_records = []

619

temp_version_list = []

620

while ready_to_send:

621

# XXX: pushing and popping lists may be a bit inefficient

622

version_id = ready_to_send.pop(0)

623

(index_memo, options, parents) = version_index[version_id]

624

copy_queue_records.append((version_id, index_memo))

625

none, data_pos, data_size = index_memo

626

temp_version_list.append((version_id, options, data_size,

627

parents))

628

if version_id in deferred:

629

# now we can send all the children of this revision - we could

630

# put them in anywhere, but we hope that sending them soon

631

# after the fulltext will give good locality in the receiver

632

ready_to_send[:0] = deferred.pop(version_id)

633

assert len(deferred) == 0, \

634

"Still have compressed child versions waiting to be sent"

635

# XXX: The stream format is such that we cannot stream it - we have to

636

# know the length of all the data a-priori.

637

raw_datum = []

638

result_version_list = []

639

for (version_id, raw_data), \

640

(version_id2, options, _, parents) in \

641

izip(self._data.read_records_iter_raw(copy_queue_records),

642

temp_version_list):

643

assert version_id == version_id2, \

644

'logic error, inconsistent results'

645

raw_datum.append(raw_data)

646

result_version_list.append(

647

(version_id, options, len(raw_data), parents))

648

# provide a callback to get data incrementally.

649

pseudo_file = StringIO(''.join(raw_datum))

650

def read(length):

651

if length is None:

652

return pseudo_file.read()

653

else:

654

return pseudo_file.read(length)

655

return (self.get_format_signature(), result_version_list, read)

656

657

def _extract_blocks(self, version_id, source, target):

658

if self._index.get_method(version_id) != 'line-delta':

659

return None

660

parent, sha1, noeol, delta = self.get_delta(version_id)

661

return KnitContent.get_line_delta_blocks(delta, source, target)

662

663

def get_delta(self, version_id):

664

"""Get a delta for constructing version from some other version."""

665

self.check_not_reserved_id(version_id)

666

parents = self.get_parents(version_id)

667

if len(parents):

668

parent = parents[0]

669

else:

670

parent = None

671

index_memo = self._index.get_position(version_id)

672

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

673

noeol = 'no-eol' in self._index.get_options(version_id)

674

if 'fulltext' == self._index.get_method(version_id):

675

new_content = self.factory.parse_fulltext(data, version_id)

676

if parent is not None:

677

reference_content = self._get_content(parent)

678

old_texts = reference_content.text()

679

else:

680

old_texts = []

681

new_texts = new_content.text()

682

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

683

new_texts)

684

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

685

else:

686

delta = self.factory.parse_line_delta(data, version_id)

687

return parent, sha1, noeol, delta

688

689

def get_format_signature(self):

690

"""See VersionedFile.get_format_signature()."""

691

if self.factory.annotated:

692

annotated_part = "annotated"

693

else:

694

annotated_part = "plain"

695

return "knit-%s" % (annotated_part,)

696

697

def get_graph_with_ghosts(self):

698

"""See VersionedFile.get_graph_with_ghosts()."""

699

graph_items = self._index.get_graph()

700

return dict(graph_items)

701

702

def get_sha1(self, version_id):

703

return self.get_sha1s([version_id])[0]

704

705

def get_sha1s(self, version_ids):

706

"""See VersionedFile.get_sha1()."""

707

record_map = self._get_record_map(version_ids)

708

# record entry 2 is the 'digest'.

709

return [record_map[v][2] for v in version_ids]

710

711

@staticmethod

712

def get_suffixes():

713

"""See VersionedFile.get_suffixes()."""

714

return [DATA_SUFFIX, INDEX_SUFFIX]

715

716

def has_ghost(self, version_id):

717

"""True if there is a ghost reference in the file to version_id."""

718

# maybe we have it

719

if self.has_version(version_id):

720

return False

721

# optimisable if needed by memoising the _ghosts set.

722

items = self._index.get_graph()

723

for node, parents in items:

724

for parent in parents:

725

if parent not in self._index._cache:

726

if parent == version_id:

727

return True

728

return False

729

730

def insert_data_stream(self, (format, data_list, reader_callable)):

731

"""Insert knit records from a data stream into this knit.

732

733

If a version in the stream is already present in this knit, it will not

734

be inserted a second time. It will be checked for consistency with the

735

stored version however, and may cause a KnitCorrupt error to be raised

736

if the data in the stream disagrees with the already stored data.

737

738

:seealso: get_data_stream

739

"""

740

if format != self.get_format_signature():

741

if 'knit' in debug.debug_flags:

742

trace.mutter(

743

'incompatible format signature inserting to %r', self)

744

source = self._knit_from_datastream(

745

(format, data_list, reader_callable))

746

self.join(source)

747

return

748

749

for version_id, options, length, parents in data_list:

750

if self.has_version(version_id):

751

# First check: the list of parents.

752

my_parents = self.get_parents_with_ghosts(version_id)

753

if tuple(my_parents) != tuple(parents):

754

# XXX: KnitCorrupt is not quite the right exception here.

755

raise KnitCorrupt(

756

self.filename,

757

'parents list %r from data stream does not match '

758

'already recorded parents %r for %s'

759

% (parents, my_parents, version_id))

760

761

# Also check the SHA-1 of the fulltext this content will

762

# produce.

763

raw_data = reader_callable(length)

764

my_fulltext_sha1 = self.get_sha1(version_id)

765

df, rec = self._data._parse_record_header(version_id, raw_data)

766

stream_fulltext_sha1 = rec[3]

767

if my_fulltext_sha1 != stream_fulltext_sha1:

768

# Actually, we don't know if it's this knit that's corrupt,

769

# or the data stream we're trying to insert.

770

raise KnitCorrupt(

771

self.filename, 'sha-1 does not match %s' % version_id)

772

else:

773

if 'line-delta' in options:

774

# Make sure that this knit record is actually useful: a

775

# line-delta is no use unless we have its parent.

776

# Fetching from a broken repository with this problem

777

# shouldn't break the target repository.

778

779

# See https://bugs.launchpad.net/bzr/+bug/164443

780

if not self._index.has_version(parents[0]):

781

raise KnitCorrupt(

782

self.filename,

783

'line-delta from stream '

784

'for version %s '

785

'references '

786

'missing parent %s\n'

787

'Try running "bzr check" '

788

'on the source repository, and "bzr reconcile" '

789

'if necessary.' %

790

(version_id, parents[0]))

791

self._add_raw_records(

792

[(version_id, options, parents, length)],

793

reader_callable(length))

794

795

def _knit_from_datastream(self, (format, data_list, reader_callable)):

796

"""Create a knit object from a data stream.

797

798

This method exists to allow conversion of data streams that do not

799

match the signature of this knit. Generally it will be slower and use

800

more memory to use this method to insert data, but it will work.

801

802

:seealso: get_data_stream for details on datastreams.

803

:return: A knit versioned file which can be used to join the datastream

804

into self.

805

"""

806

if format == "knit-plain":

807

factory = KnitPlainFactory()

808

elif format == "knit-annotated":

809

factory = KnitAnnotateFactory()

810

else:

811

raise errors.KnitDataStreamUnknown(format)

812

index = _StreamIndex(data_list, self._index)

813

access = _StreamAccess(reader_callable, index, self, factory)

814

return KnitVersionedFile(self.filename, self.transport,

815

factory=factory, index=index, access_method=access)

816

817

def versions(self):

818

"""See VersionedFile.versions."""

819

if 'evil' in debug.debug_flags:

820

trace.mutter_callsite(2, "versions scales with size of history")

821

return self._index.get_versions()

822

823

def has_version(self, version_id):

824

"""See VersionedFile.has_version."""

825

if 'evil' in debug.debug_flags:

826

trace.mutter_callsite(2, "has_version is a LBYL scenario")

827

return self._index.has_version(version_id)

828

829

__contains__ = has_version

830

831

def _merge_annotations(self, content, parents, parent_texts={},

832

delta=None, annotated=None,

833

left_matching_blocks=None):

834

"""Merge annotations for content. This is done by comparing

835

the annotations based on changed to the text.

836

"""

837

if left_matching_blocks is not None:

838

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

839

else:

840

delta_seq = None

841

if annotated:

842

for parent_id in parents:

843

merge_content = self._get_content(parent_id, parent_texts)

844

if (parent_id == parents[0] and delta_seq is not None):

845

seq = delta_seq

846

else:

847

seq = patiencediff.PatienceSequenceMatcher(

848

None, merge_content.text(), content.text())

849

for i, j, n in seq.get_matching_blocks():

850

if n == 0:

851

continue

852

# this appears to copy (origin, text) pairs across to the

853

# new content for any line that matches the last-checked

854

# parent.

855

content._lines[j:j+n] = merge_content._lines[i:i+n]

856

if delta:

857

if delta_seq is None:

858

reference_content = self._get_content(parents[0], parent_texts)

859

new_texts = content.text()

860

old_texts = reference_content.text()

861

delta_seq = patiencediff.PatienceSequenceMatcher(

862

None, old_texts, new_texts)

863

return self._make_line_delta(delta_seq, content)

864

865

def _make_line_delta(self, delta_seq, new_content):

866

"""Generate a line delta from delta_seq and new_content."""

867

diff_hunks = []

868

for op in delta_seq.get_opcodes():

869

if op[0] == 'equal':

870

continue

871

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

872

return diff_hunks

873

874

def _get_components_positions(self, version_ids):

875

"""Produce a map of position data for the components of versions.

876

877

This data is intended to be used for retrieving the knit records.

878

879

A dict of version_id to (method, index_memo, next, parents, noeol) is

880

returned.

881

method is the way referenced data should be applied.

882

index_memo is the handle to pass to the data access to actually get the

883

data

884

next is the build-parent of the version, or None for fulltexts.

885

parents is the version_ids of the parents of this version

886

noeol is a flag indicating if there is a final newline character

887

"""

888

component_data = {}

889

pending_components = version_ids

890

while pending_components:

891

build_details = self._index.get_build_details(pending_components)

892

pending_components = set()

893

for version_id, details in build_details.items():

894

(method, index_memo, compression_parent, parents,

895

noeol) = details

896

if compression_parent is not None:

897

pending_components.add(compression_parent)

898

component_data[version_id] = (method, index_memo,

899

compression_parent)

900

return component_data

901

902

def _get_content(self, version_id, parent_texts={}):

903

"""Returns a content object that makes up the specified

904

version."""

905

cached_version = parent_texts.get(version_id, None)

906

if cached_version is not None:

907

if not self.has_version(version_id):

908

raise RevisionNotPresent(version_id, self.filename)

909

return cached_version

910

911

text_map, contents_map = self._get_content_maps([version_id])

912

return contents_map[version_id]

913

914

def _check_versions_present(self, version_ids):

915

"""Check that all specified versions are present."""

916

self._index.check_versions_present(version_ids)

917

918

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

919

nostore_sha, random_id, check_content):

920

"""See VersionedFile.add_lines_with_ghosts()."""

921

self._check_add(version_id, lines, random_id, check_content)

922

return self._add(version_id, lines, parents, self.delta,

923

parent_texts, None, nostore_sha, random_id)

924

925

def _add_lines(self, version_id, parents, lines, parent_texts,

926

left_matching_blocks, nostore_sha, random_id, check_content):

927

"""See VersionedFile.add_lines."""

928

self._check_add(version_id, lines, random_id, check_content)

929

self._check_versions_present(parents)

930

return self._add(version_id, lines[:], parents, self.delta,

931

parent_texts, left_matching_blocks, nostore_sha, random_id)

932

933

def _check_add(self, version_id, lines, random_id, check_content):

934

"""check that version_id and lines are safe to add."""

935

if contains_whitespace(version_id):

936

raise InvalidRevisionId(version_id, self.filename)

937

self.check_not_reserved_id(version_id)

938

# Technically this could be avoided if we are happy to allow duplicate

939

# id insertion when other things than bzr core insert texts, but it

940

# seems useful for folk using the knit api directly to have some safety

941

# blanket that we can disable.

942

if not random_id and self.has_version(version_id):

943

raise RevisionAlreadyPresent(version_id, self.filename)

944

if check_content:

945

self._check_lines_not_unicode(lines)

946

self._check_lines_are_lines(lines)

947

948

def _add(self, version_id, lines, parents, delta, parent_texts,

949

left_matching_blocks, nostore_sha, random_id):

950

"""Add a set of lines on top of version specified by parents.

951

952

If delta is true, compress the text as a line-delta against

953

the first parent.

954

955

Any versions not present will be converted into ghosts.

956

"""

957

# first thing, if the content is something we don't need to store, find

958

# that out.

959

line_bytes = ''.join(lines)

960

digest = sha_string(line_bytes)

961

if nostore_sha == digest:

962

raise errors.ExistingContent

963

964

present_parents = []

965

if parent_texts is None:

966

parent_texts = {}

967

for parent in parents:

968

if self.has_version(parent):

969

present_parents.append(parent)

970

971

# can only compress against the left most present parent.

972

if (delta and

973

(len(present_parents) == 0 or

974

present_parents[0] != parents[0])):

975

delta = False

976

977

text_length = len(line_bytes)

978

options = []

979

if lines:

980

if lines[-1][-1] != '\n':

981

# copy the contents of lines.

982

lines = lines[:]

983

options.append('no-eol')

984

lines[-1] = lines[-1] + '\n'

985

line_bytes += '\n'

986

987

if delta:

988

# To speed the extract of texts the delta chain is limited

989

# to a fixed number of deltas. This should minimize both

990

# I/O and the time spend applying deltas.

991

delta = self._check_should_delta(present_parents)

992

993

assert isinstance(version_id, str)

994

content = self.factory.make(lines, version_id)

995

if delta or (self.factory.annotated and len(present_parents) > 0):

996

# Merge annotations from parent texts if needed.

997

delta_hunks = self._merge_annotations(content, present_parents,

998

parent_texts, delta, self.factory.annotated,

999

left_matching_blocks)

1000

1001

if delta:

1002

options.append('line-delta')

1003

store_lines = self.factory.lower_line_delta(delta_hunks)

1004

size, bytes = self._data._record_to_data(version_id, digest,

1005

store_lines)

1006

else:

1007

options.append('fulltext')

1008

# isinstance is slower and we have no hierarchy.

1009

if self.factory.__class__ == KnitPlainFactory:

1010

# Use the already joined bytes saving iteration time in

1011

# _record_to_data.

1012

size, bytes = self._data._record_to_data(version_id, digest,

1013

lines, [line_bytes])

1014

else:

1015

# get mixed annotation + content and feed it into the

1016

# serialiser.

1017

store_lines = self.factory.lower_fulltext(content)

1018

size, bytes = self._data._record_to_data(version_id, digest,

1019

store_lines)

1020

1021

access_memo = self._data.add_raw_records([size], bytes)[0]

1022

self._index.add_versions(

1023

((version_id, options, access_memo, parents),),

1024

random_id=random_id)

1025

return digest, text_length, content

1026

1027

def check(self, progress_bar=None):

1028

"""See VersionedFile.check()."""

1029

1030

def _clone_text(self, new_version_id, old_version_id, parents):

1031

"""See VersionedFile.clone_text()."""

1032

# FIXME RBC 20060228 make fast by only inserting an index with null

1033

# delta.

1034

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

1035

1036

def get_lines(self, version_id):

1037

"""See VersionedFile.get_lines()."""

1038

return self.get_line_list([version_id])[0]

1039

1040

def _get_record_map(self, version_ids):

1041

"""Produce a dictionary of knit records.

1042

1043

The keys are version_ids, the values are tuples of (method, content,

1044

digest, next).

1045

method is the way the content should be applied.

1046

content is a KnitContent object.

1047

digest is the SHA1 digest of this version id after all steps are done

1048

next is the build-parent of the version, i.e. the leftmost ancestor.

1049

If the method is fulltext, next will be None.

1050

"""

1051

position_map = self._get_components_positions(version_ids)

1052

# c = component_id, m = method, i_m = index_memo, n = next

1053

# p = parent_ids, e = noeol

1054

records = [(c, i_m) for c, (m, i_m, n)

1055

in position_map.iteritems()]

1056

record_map = {}

1057

for component_id, content, digest in \

1058

self._data.read_records_iter(records):

1059

(method, index_memo, next) = position_map[component_id]

1060

record_map[component_id] = method, content, digest, next

1061

1062

return record_map

1063

1064

def get_text(self, version_id):

1065

"""See VersionedFile.get_text"""

1066

return self.get_texts([version_id])[0]

1067

1068

def get_texts(self, version_ids):

1069

return [''.join(l) for l in self.get_line_list(version_ids)]

1070

1071

def get_line_list(self, version_ids):

1072

"""Return the texts of listed versions as a list of strings."""

1073

for version_id in version_ids:

1074

self.check_not_reserved_id(version_id)

1075

text_map, content_map = self._get_content_maps(version_ids)

1076

return [text_map[v] for v in version_ids]

1077

1078

_get_lf_split_line_list = get_line_list

1079

1080

def _get_content_maps(self, version_ids):

1081

"""Produce maps of text and KnitContents

1082

1083

:return: (text_map, content_map) where text_map contains the texts for

1084

the requested versions and content_map contains the KnitContents.

1085

Both dicts take version_ids as their keys.

1086

"""

1087

# FUTURE: This function could be improved for the 'extract many' case

1088

# by tracking each component and only doing the copy when the number of

1089

# children than need to apply delta's to it is > 1 or it is part of the

1090

# final output.

1091

version_ids = list(version_ids)

1092

multiple_versions = len(version_ids) != 1

1093

record_map = self._get_record_map(version_ids)

1094

1095

text_map = {}

1096

content_map = {}

1097

final_content = {}

1098

for version_id in version_ids:

1099

components = []

1100

cursor = version_id

1101

while cursor is not None:

1102

method, data, digest, next = record_map[cursor]

1103

components.append((cursor, method, data, digest))

1104

if cursor in content_map:

1105

break

1106

cursor = next

1107

1108

content = None

1109

for component_id, method, data, digest in reversed(components):

1110

if component_id in content_map:

1111

content = content_map[component_id]

1112

else:

1113

if method == 'fulltext':

1114

assert content is None

1115

content = self.factory.parse_fulltext(data, version_id)

1116

elif method == 'line-delta':

1117

delta = self.factory.parse_line_delta(data, version_id)

1118

if multiple_versions:

1119

# only doing this when we want multiple versions

1120

# output avoids list copies - which reference and

1121

# dereference many strings.

1122

content = content.copy()

1123

content.apply_delta(delta, version_id)

1124

if multiple_versions:

1125

content_map[component_id] = content

1126

1127

if 'no-eol' in self._index.get_options(version_id):

1128

if multiple_versions:

1129

content = content.copy()

1130

content.strip_last_line_newline()

1131

final_content[version_id] = content

1132

1133

# digest here is the digest from the last applied component.

1134

text = content.text()

1135

actual_sha = sha_strings(text)

1136

if actual_sha != digest:

1137

raise KnitCorrupt(self.filename,

1138

'\n sha-1 %s'

1139

'\n of reconstructed text does not match'

1140

'\n expected %s'

1141

'\n for version %s' %

1142

(actual_sha, digest, version_id))

1143

text_map[version_id] = text

1144

return text_map, final_content

1145

1146

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1147

pb=None):

1148

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1149

if version_ids is None:

1150

version_ids = self.versions()

1151

if pb is None:

1152

pb = progress.DummyProgress()

1153

# we don't care about inclusions, the caller cares.

1154

# but we need to setup a list of records to visit.

1155

# we need version_id, position, length

1156

version_id_records = []

1157

requested_versions = set(version_ids)

1158

# filter for available versions

1159

for version_id in requested_versions:

1160

if not self.has_version(version_id):

1161

raise RevisionNotPresent(version_id, self.filename)

1162

# get a in-component-order queue:

1163

for version_id in self.versions():

1164

if version_id in requested_versions:

1165

index_memo = self._index.get_position(version_id)

1166

version_id_records.append((version_id, index_memo))

1167

1168

total = len(version_id_records)

1169

for version_idx, (version_id, data, sha_value) in \

1170

enumerate(self._data.read_records_iter(version_id_records)):

1171

pb.update('Walking content.', version_idx, total)

1172

method = self._index.get_method(version_id)

1173

1174

assert method in ('fulltext', 'line-delta')

1175

if method == 'fulltext':

1176

line_iterator = self.factory.get_fulltext_content(data)

1177

else:

1178

line_iterator = self.factory.get_linedelta_content(data)

1179

# XXX: It might be more efficient to yield (version_id,

1180

# line_iterator) in the future. However for now, this is a simpler

1181

# change to integrate into the rest of the codebase. RBC 20071110

1182

for line in line_iterator:

1183

yield line, version_id

1184

1185

pb.update('Walking content.', total, total)

1186

1187

def iter_parents(self, version_ids):

1188

"""Iterate through the parents for many version ids.

1189

1190

:param version_ids: An iterable yielding version_ids.

1191

:return: An iterator that yields (version_id, parents). Requested

1192

version_ids not present in the versioned file are simply skipped.

1193

The order is undefined, allowing for different optimisations in

1194

the underlying implementation.

1195

"""

1196

return self._index.iter_parents(version_ids)

1197

1198

def num_versions(self):

1199

"""See VersionedFile.num_versions()."""

1200

return self._index.num_versions()

1201

1202

__len__ = num_versions

1203

1204

def annotate_iter(self, version_id):

1205

"""See VersionedFile.annotate_iter."""

1206

return self.factory.annotate_iter(self, version_id)

1207

1208

def get_parents(self, version_id):

1209

"""See VersionedFile.get_parents."""

1210

# perf notes:

1211

# optimism counts!

1212

# 52554 calls in 1264 872 internal down from 3674

1213

try:

1214

return self._index.get_parents(version_id)

1215

except KeyError:

1216

raise RevisionNotPresent(version_id, self.filename)

1217

1218

def get_parents_with_ghosts(self, version_id):

1219

"""See VersionedFile.get_parents."""

1220

try:

1221

return self._index.get_parents_with_ghosts(version_id)

1222

except KeyError:

1223

raise RevisionNotPresent(version_id, self.filename)

1224

1225

def get_ancestry(self, versions, topo_sorted=True):

1226

"""See VersionedFile.get_ancestry."""

1227

if isinstance(versions, basestring):

1228

versions = [versions]

1229

if not versions:

1230

return []

1231

return self._index.get_ancestry(versions, topo_sorted)

1232

1233

def get_ancestry_with_ghosts(self, versions):

1234

"""See VersionedFile.get_ancestry_with_ghosts."""

1235

if isinstance(versions, basestring):

1236

versions = [versions]

1237

if not versions:

1238

return []

1239

return self._index.get_ancestry_with_ghosts(versions)

1240

1241

def plan_merge(self, ver_a, ver_b):

1242

"""See VersionedFile.plan_merge."""

1243

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1244

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1245

annotated_a = self.annotate(ver_a)

1246

annotated_b = self.annotate(ver_b)

1247

return merge._plan_annotate_merge(annotated_a, annotated_b,

1248

ancestors_a, ancestors_b)

1249

1250

1251

class _KnitComponentFile(object):

1252

"""One of the files used to implement a knit database"""

1253

1254

def __init__(self, transport, filename, mode, file_mode=None,

1255

create_parent_dir=False, dir_mode=None):

1256

self._transport = transport

1257

self._filename = filename

1258

self._mode = mode

1259

self._file_mode = file_mode

1260

self._dir_mode = dir_mode

1261

self._create_parent_dir = create_parent_dir

1262

self._need_to_create = False

1263

1264

def _full_path(self):

1265

"""Return the full path to this file."""

1266

return self._transport.base + self._filename

1267

1268

def check_header(self, fp):

1269

line = fp.readline()

1270

if line == '':

1271

# An empty file can actually be treated as though the file doesn't

1272

# exist yet.

1273

raise errors.NoSuchFile(self._full_path())

1274

if line != self.HEADER:

1275

raise KnitHeaderError(badline=line,

1276

filename=self._transport.abspath(self._filename))

1277

1278

def __repr__(self):

1279

return '%s(%s)' % (self.__class__.__name__, self._filename)

1280

1281

1282

class _KnitIndex(_KnitComponentFile):

1283

"""Manages knit index file.

1284

1285

The index is already kept in memory and read on startup, to enable

1286

fast lookups of revision information. The cursor of the index

1287

file is always pointing to the end, making it easy to append

1288

entries.

1289

1290

_cache is a cache for fast mapping from version id to a Index

1291

object.

1292

1293

_history is a cache for fast mapping from indexes to version ids.

1294

1295

The index data format is dictionary compressed when it comes to

1296

parent references; a index entry may only have parents that with a

1297

lover index number. As a result, the index is topological sorted.

1298

1299

Duplicate entries may be written to the index for a single version id

1300

if this is done then the latter one completely replaces the former:

1301

this allows updates to correct version and parent information.

1302

Note that the two entries may share the delta, and that successive

1303

annotations and references MUST point to the first entry.

1304

1305

The index file on disc contains a header, followed by one line per knit

1306

record. The same revision can be present in an index file more than once.

1307

The first occurrence gets assigned a sequence number starting from 0.

1308

1309

The format of a single line is

1310

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1311

REVISION_ID is a utf8-encoded revision id

1312

FLAGS is a comma separated list of flags about the record. Values include

1313

no-eol, line-delta, fulltext.

1314

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1315

that the the compressed data starts at.

1316

LENGTH is the ascii representation of the length of the data file.

1317

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1318

REVISION_ID.

1319

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1320

revision id already in the knit that is a parent of REVISION_ID.

1321

The ' :' marker is the end of record marker.

1322

1323

partial writes:

1324

when a write is interrupted to the index file, it will result in a line

1325

that does not end in ' :'. If the ' :' is not present at the end of a line,

1326

or at the end of the file, then the record that is missing it will be

1327

ignored by the parser.

1328

1329

When writing new records to the index file, the data is preceded by '\n'

1330

to ensure that records always start on new lines even if the last write was

1331

interrupted. As a result its normal for the last line in the index to be

1332

missing a trailing newline. One can be added with no harmful effects.

1333

"""

1334

1335

HEADER = "# bzr knit index 8\n"

1336

1337

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1338

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1339

1340

def _cache_version(self, version_id, options, pos, size, parents):

1341

"""Cache a version record in the history array and index cache.

1342

1343

This is inlined into _load_data for performance. KEEP IN SYNC.

1344

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1345

indexes).

1346

"""

1347

# only want the _history index to reference the 1st index entry

1348

# for version_id

1349

if version_id not in self._cache:

1350

index = len(self._history)

1351

self._history.append(version_id)

1352

else:

1353

index = self._cache[version_id][5]

1354

self._cache[version_id] = (version_id,

1355

options,

1356

pos,

1357

size,

1358

parents,

1359

index)

1360

1361

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1362

create_parent_dir=False, delay_create=False, dir_mode=None):

1363

_KnitComponentFile.__init__(self, transport, filename, mode,

1364

file_mode=file_mode,

1365

create_parent_dir=create_parent_dir,

1366

dir_mode=dir_mode)

1367

self._cache = {}

1368

# position in _history is the 'official' index for a revision

1369

# but the values may have come from a newer entry.

1370

# so - wc -l of a knit index is != the number of unique names

1371

# in the knit.

1372

self._history = []

1373

try:

1374

fp = self._transport.get(self._filename)

1375

try:

1376

# _load_data may raise NoSuchFile if the target knit is

1377

# completely empty.

1378

_load_data(self, fp)

1379

finally:

1380

fp.close()

1381

except NoSuchFile:

1382

if mode != 'w' or not create:

1383

raise

1384

elif delay_create:

1385

self._need_to_create = True

1386

else:

1387

self._transport.put_bytes_non_atomic(

1388

self._filename, self.HEADER, mode=self._file_mode)

1389

1390

def get_graph(self):

1391

"""Return a list of the node:parents lists from this knit index."""

1392

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1393

1394

def get_ancestry(self, versions, topo_sorted=True):

1395

"""See VersionedFile.get_ancestry."""

1396

# get a graph of all the mentioned versions:

1397

graph = {}

1398

pending = set(versions)

1399

cache = self._cache

1400

while pending:

1401

version = pending.pop()

1402

# trim ghosts

1403

try:

1404

parents = [p for p in cache[version][4] if p in cache]

1405

except KeyError:

1406

raise RevisionNotPresent(version, self._filename)

1407

# if not completed and not a ghost

1408

pending.update([p for p in parents if p not in graph])

1409

graph[version] = parents

1410

if not topo_sorted:

1411

return graph.keys()

1412

return topo_sort(graph.items())

1413

1414

def get_ancestry_with_ghosts(self, versions):

1415

"""See VersionedFile.get_ancestry_with_ghosts."""

1416

# get a graph of all the mentioned versions:

1417

self.check_versions_present(versions)

1418

cache = self._cache

1419

graph = {}

1420

pending = set(versions)

1421

while pending:

1422

version = pending.pop()

1423

try:

1424

parents = cache[version][4]

1425

except KeyError:

1426

# ghost, fake it

1427

graph[version] = []

1428

else:

1429

# if not completed

1430

pending.update([p for p in parents if p not in graph])

1431

graph[version] = parents

1432

return topo_sort(graph.items())

1433

1434

def get_build_details(self, version_ids):

1435

"""Get the method, index_memo and compression parent for version_ids.

1436

1437

:param version_ids: An iterable of version_ids.

1438

:return: A dict of version_id:(method, index_memo, compression_parent,

1439

parents, noeol).

1440

"""

1441

result = {}

1442

for version_id in version_ids:

1443

method = self.get_method(version_id)

1444

parents = self.get_parents_with_ghosts(version_id)

1445

if method == 'fulltext':

1446

compression_parent = None

1447

else:

1448

compression_parent = parents[0]

1449

noeol = 'no-eol' in self.get_options(version_id)

1450

index_memo = self.get_position(version_id)

1451

result[version_id] = (method, index_memo, compression_parent,

1452

parents, noeol)

1453

return result

1454

1455

def iter_parents(self, version_ids):

1456

"""Iterate through the parents for many version ids.

1457

1458

:param version_ids: An iterable yielding version_ids.

1459

:return: An iterator that yields (version_id, parents). Requested

1460

version_ids not present in the versioned file are simply skipped.

1461

The order is undefined, allowing for different optimisations in

1462

the underlying implementation.

1463

"""

1464

for version_id in version_ids:

1465

try:

1466

yield version_id, tuple(self.get_parents(version_id))

1467

except KeyError:

1468

pass

1469

1470

def num_versions(self):

1471

return len(self._history)

1472

1473

__len__ = num_versions

1474

1475

def get_versions(self):

1476

"""Get all the versions in the file. not topologically sorted."""

1477

return self._history

1478

1479

def _version_list_to_index(self, versions):

1480

result_list = []

1481

cache = self._cache

1482

for version in versions:

1483

if version in cache:

1484

# -- inlined lookup() --

1485

result_list.append(str(cache[version][5]))

1486

# -- end lookup () --

1487

else:

1488

result_list.append('.' + version)

1489

return ' '.join(result_list)

1490

1491

def add_version(self, version_id, options, index_memo, parents):

1492

"""Add a version record to the index."""

1493

self.add_versions(((version_id, options, index_memo, parents),))

1494

1495

def add_versions(self, versions, random_id=False):

1496

"""Add multiple versions to the index.

1497

1498

:param versions: a list of tuples:

1499

(version_id, options, pos, size, parents).

1500

:param random_id: If True the ids being added were randomly generated

1501

and no check for existence will be performed.

1502

"""

1503

lines = []

1504

orig_history = self._history[:]

1505

orig_cache = self._cache.copy()

1506

1507

try:

1508

for version_id, options, (index, pos, size), parents in versions:

1509

line = "\n%s %s %s %s %s :" % (version_id,

1510

','.join(options),

1511

pos,

1512

size,

1513

self._version_list_to_index(parents))

1514

assert isinstance(line, str), \

1515

'content must be utf-8 encoded: %r' % (line,)

1516

lines.append(line)

1517

self._cache_version(version_id, options, pos, size, parents)

1518

if not self._need_to_create:

1519

self._transport.append_bytes(self._filename, ''.join(lines))

1520

else:

1521

sio = StringIO()

1522

sio.write(self.HEADER)

1523

sio.writelines(lines)

1524

sio.seek(0)

1525

self._transport.put_file_non_atomic(self._filename, sio,

1526

create_parent_dir=self._create_parent_dir,

1527

mode=self._file_mode,

1528

dir_mode=self._dir_mode)

1529

self._need_to_create = False

1530

except:

1531

# If any problems happen, restore the original values and re-raise

1532

self._history = orig_history

1533

self._cache = orig_cache

1534

raise

1535

1536

def has_version(self, version_id):

1537

"""True if the version is in the index."""

1538

return version_id in self._cache

1539

1540

def get_position(self, version_id):

1541

"""Return details needed to access the version.

1542

1543

.kndx indices do not support split-out data, so return None for the

1544

index field.

1545

1546

:return: a tuple (None, data position, size) to hand to the access

1547

logic to get the record.

1548

"""

1549

entry = self._cache[version_id]

1550

return None, entry[2], entry[3]

1551

1552

def get_method(self, version_id):

1553

"""Return compression method of specified version."""

1554

try:

1555

options = self._cache[version_id][1]

1556

except KeyError:

1557

raise RevisionNotPresent(version_id, self._filename)

1558

if 'fulltext' in options:

1559

return 'fulltext'

1560

else:

1561

if 'line-delta' not in options:

1562

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1563

return 'line-delta'

1564

1565

def get_options(self, version_id):

1566

"""Return a list representing options.

1567

1568

e.g. ['foo', 'bar']

1569

"""

1570

return self._cache[version_id][1]

1571

1572

def get_parents(self, version_id):

1573

"""Return parents of specified version ignoring ghosts."""

1574

return [parent for parent in self._cache[version_id][4]

1575

if parent in self._cache]

1576

1577

def get_parents_with_ghosts(self, version_id):

1578

"""Return parents of specified version with ghosts."""

1579

return self._cache[version_id][4]

1580

1581

def check_versions_present(self, version_ids):

1582

"""Check that all specified versions are present."""

1583

cache = self._cache

1584

for version_id in version_ids:

1585

if version_id not in cache:

1586

raise RevisionNotPresent(version_id, self._filename)

1587

1588

1589

class KnitGraphIndex(object):

1590

"""A knit index that builds on GraphIndex."""

1591

1592

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1593

"""Construct a KnitGraphIndex on a graph_index.

1594

1595

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1596

:param deltas: Allow delta-compressed records.

1597

:param add_callback: If not None, allow additions to the index and call

1598

this callback with a list of added GraphIndex nodes:

1599

[(node, value, node_refs), ...]

1600

:param parents: If True, record knits parents, if not do not record

1601

parents.

1602

"""

1603

self._graph_index = graph_index

1604

self._deltas = deltas

1605

self._add_callback = add_callback

1606

self._parents = parents

1607

if deltas and not parents:

1608

raise KnitCorrupt(self, "Cannot do delta compression without "

1609

"parent tracking.")

1610

1611

def _get_entries(self, keys, check_present=False):

1612

"""Get the entries for keys.

1613

1614

:param keys: An iterable of index keys, - 1-tuples.

1615

"""

1616

keys = set(keys)

1617

found_keys = set()

1618

if self._parents:

1619

for node in self._graph_index.iter_entries(keys):

1620

yield node

1621

found_keys.add(node[1])

1622

else:

1623

# adapt parentless index to the rest of the code.

1624

for node in self._graph_index.iter_entries(keys):

1625

yield node[0], node[1], node[2], ()

1626

found_keys.add(node[1])

1627

if check_present:

1628

missing_keys = keys.difference(found_keys)

1629

if missing_keys:

1630

raise RevisionNotPresent(missing_keys.pop(), self)

1631

1632

def _present_keys(self, version_ids):

1633

return set([

1634

node[1] for node in self._get_entries(version_ids)])

1635

1636

def _parentless_ancestry(self, versions):

1637

"""Honour the get_ancestry API for parentless knit indices."""

1638

wanted_keys = self._version_ids_to_keys(versions)

1639

present_keys = self._present_keys(wanted_keys)

1640

missing = set(wanted_keys).difference(present_keys)

1641

if missing:

1642

raise RevisionNotPresent(missing.pop(), self)

1643

return list(self._keys_to_version_ids(present_keys))

1644

1645

def get_ancestry(self, versions, topo_sorted=True):

1646

"""See VersionedFile.get_ancestry."""

1647

if not self._parents:

1648

return self._parentless_ancestry(versions)

1649

# XXX: This will do len(history) index calls - perhaps

1650

# it should be altered to be a index core feature?

1651

# get a graph of all the mentioned versions:

1652

graph = {}

1653

ghosts = set()

1654

versions = self._version_ids_to_keys(versions)

1655

pending = set(versions)

1656

while pending:

1657

# get all pending nodes

1658

this_iteration = pending

1659

new_nodes = self._get_entries(this_iteration)

1660

found = set()

1661

pending = set()

1662

for (index, key, value, node_refs) in new_nodes:

1663

# dont ask for ghosties - otherwise

1664

# we we can end up looping with pending

1665

# being entirely ghosted.

1666

graph[key] = [parent for parent in node_refs[0]

1667

if parent not in ghosts]

1668

# queue parents

1669

for parent in graph[key]:

1670

# dont examine known nodes again

1671

if parent in graph:

1672

continue

1673

pending.add(parent)

1674

found.add(key)

1675

ghosts.update(this_iteration.difference(found))

1676

if versions.difference(graph):

1677

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1678

if topo_sorted:

1679

result_keys = topo_sort(graph.items())

1680

else:

1681

result_keys = graph.iterkeys()

1682

return [key[0] for key in result_keys]

1683

1684

def get_ancestry_with_ghosts(self, versions):

1685

"""See VersionedFile.get_ancestry."""

1686

if not self._parents:

1687

return self._parentless_ancestry(versions)

1688

# XXX: This will do len(history) index calls - perhaps

1689

# it should be altered to be a index core feature?

1690

# get a graph of all the mentioned versions:

1691

graph = {}

1692

versions = self._version_ids_to_keys(versions)

1693

pending = set(versions)

1694

while pending:

1695

# get all pending nodes

1696

this_iteration = pending

1697

new_nodes = self._get_entries(this_iteration)

1698

pending = set()

1699

for (index, key, value, node_refs) in new_nodes:

1700

graph[key] = node_refs[0]

1701

# queue parents

1702

for parent in graph[key]:

1703

# dont examine known nodes again

1704

if parent in graph:

1705

continue

1706

pending.add(parent)

1707

missing_versions = this_iteration.difference(graph)

1708

missing_needed = versions.intersection(missing_versions)

1709

if missing_needed:

1710

raise RevisionNotPresent(missing_needed.pop(), self)

1711

for missing_version in missing_versions:

1712

# add a key, no parents

1713

graph[missing_version] = []

1714

pending.discard(missing_version) # don't look for it

1715

result_keys = topo_sort(graph.items())

1716

return [key[0] for key in result_keys]

1717

1718

def get_build_details(self, version_ids):

1719

"""Get the method, index_memo and compression parent for version_ids.

1720

1721

:param version_ids: An iterable of version_ids.

1722

:return: A dict of version_id:(method, index_memo, compression_parent,

1723

parents, noeol).

1724

"""

1725

result = {}

1726

entries = self._get_entries(self._version_ids_to_keys(version_ids), True)

1727

for entry in entries:

1728

version_id = self._keys_to_version_ids((entry[1],))[0]

1729

parents = self._keys_to_version_ids(entry[3][0])

1730

if not self._deltas:

1731

compression_parent = None

1732

else:

1733

compression_parent_key = self._compression_parent(entry)

1734

if compression_parent_key:

1735

compression_parent = self._keys_to_version_ids(

1736

(compression_parent_key,))[0]

1737

else:

1738

compression_parent = None

1739

noeol = (entry[2][0] == 'N')

1740

if compression_parent:

1741

method = 'line-delta'

1742

else:

1743

method = 'fulltext'

1744

result[version_id] = (method, self._node_to_position(entry),

1745

compression_parent, parents, noeol)

1746

return result

1747

1748

def _compression_parent(self, an_entry):

1749

# return the key that an_entry is compressed against, or None

1750

# Grab the second parent list (as deltas implies parents currently)

1751

compression_parents = an_entry[3][1]

1752

if not compression_parents:

1753

return None

1754

assert len(compression_parents) == 1

1755

return compression_parents[0]

1756

1757

def _get_method(self, node):

1758

if not self._deltas:

1759

return 'fulltext'

1760

if self._compression_parent(node):

1761

return 'line-delta'

1762

else:

1763

return 'fulltext'

1764

1765

def get_graph(self):

1766

"""Return a list of the node:parents lists from this knit index."""

1767

if not self._parents:

1768

return [(key, ()) for key in self.get_versions()]

1769

result = []

1770

for index, key, value, refs in self._graph_index.iter_all_entries():

1771

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1772

return result

1773

1774

def iter_parents(self, version_ids):

1775

"""Iterate through the parents for many version ids.

1776

1777

:param version_ids: An iterable yielding version_ids.

1778

:return: An iterator that yields (version_id, parents). Requested

1779

version_ids not present in the versioned file are simply skipped.

1780

The order is undefined, allowing for different optimisations in

1781

the underlying implementation.

1782

"""

1783

if self._parents:

1784

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1785

all_parents = set()

1786

present_parents = set()

1787

for node in all_nodes:

1788

all_parents.update(node[3][0])

1789

# any node we are querying must be present

1790

present_parents.add(node[1])

1791

unknown_parents = all_parents.difference(present_parents)

1792

present_parents.update(self._present_keys(unknown_parents))

1793

for node in all_nodes:

1794

parents = []

1795

for parent in node[3][0]:

1796

if parent in present_parents:

1797

parents.append(parent[0])

1798

yield node[1][0], tuple(parents)

1799

else:

1800

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1801

yield node[1][0], ()

1802

1803

def num_versions(self):

1804

return len(list(self._graph_index.iter_all_entries()))

1805

1806

__len__ = num_versions

1807

1808

def get_versions(self):

1809

"""Get all the versions in the file. not topologically sorted."""

1810

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1811

1812

def has_version(self, version_id):

1813

"""True if the version is in the index."""

1814

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1815

1816

def _keys_to_version_ids(self, keys):

1817

return tuple(key[0] for key in keys)

1818

1819

def get_position(self, version_id):

1820

"""Return details needed to access the version.

1821

1822

:return: a tuple (index, data position, size) to hand to the access

1823

logic to get the record.

1824

"""

1825

node = self._get_node(version_id)

1826

return self._node_to_position(node)

1827

1828

def _node_to_position(self, node):

1829

"""Convert an index value to position details."""

1830

bits = node[2][1:].split(' ')

1831

return node[0], int(bits[0]), int(bits[1])

1832

1833

def get_method(self, version_id):

1834

"""Return compression method of specified version."""

1835

return self._get_method(self._get_node(version_id))

1836

1837

def _get_node(self, version_id):

1838

try:

1839

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1840

except IndexError:

1841

raise RevisionNotPresent(version_id, self)

1842

1843

def get_options(self, version_id):

1844

"""Return a list representing options.

1845

1846

e.g. ['foo', 'bar']

1847

"""

1848

node = self._get_node(version_id)

1849

options = [self._get_method(node)]

1850

if node[2][0] == 'N':

1851

options.append('no-eol')

1852

return options

1853

1854

def get_parents(self, version_id):

1855

"""Return parents of specified version ignoring ghosts."""

1856

parents = list(self.iter_parents([version_id]))

1857

if not parents:

1858

# missing key

1859

raise errors.RevisionNotPresent(version_id, self)

1860

return parents[0][1]

1861

1862

def get_parents_with_ghosts(self, version_id):

1863

"""Return parents of specified version with ghosts."""

1864

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1865

check_present=True))

1866

if not self._parents:

1867

return ()

1868

return self._keys_to_version_ids(nodes[0][3][0])

1869

1870

def check_versions_present(self, version_ids):

1871

"""Check that all specified versions are present."""

1872

keys = self._version_ids_to_keys(version_ids)

1873

present = self._present_keys(keys)

1874

missing = keys.difference(present)

1875

if missing:

1876

raise RevisionNotPresent(missing.pop(), self)

1877

1878

def add_version(self, version_id, options, access_memo, parents):

1879

"""Add a version record to the index."""

1880

return self.add_versions(((version_id, options, access_memo, parents),))

1881

1882

def add_versions(self, versions, random_id=False):

1883

"""Add multiple versions to the index.

1884

1885

This function does not insert data into the Immutable GraphIndex

1886

backing the KnitGraphIndex, instead it prepares data for insertion by

1887

the caller and checks that it is safe to insert then calls

1888

self._add_callback with the prepared GraphIndex nodes.

1889

1890

:param versions: a list of tuples:

1891

(version_id, options, pos, size, parents).

1892

:param random_id: If True the ids being added were randomly generated

1893

and no check for existence will be performed.

1894

"""

1895

if not self._add_callback:

1896

raise errors.ReadOnlyError(self)

1897

# we hope there are no repositories with inconsistent parentage

1898

# anymore.

1899

# check for dups

1900

1901

keys = {}

1902

for (version_id, options, access_memo, parents) in versions:

1903

index, pos, size = access_memo

1904

key = (version_id, )

1905

parents = tuple((parent, ) for parent in parents)

1906

if 'no-eol' in options:

1907

value = 'N'

1908

else:

1909

value = ' '

1910

value += "%d %d" % (pos, size)

1911

if not self._deltas:

1912

if 'line-delta' in options:

1913

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1914

if self._parents:

1915

if self._deltas:

1916

if 'line-delta' in options:

1917

node_refs = (parents, (parents[0],))

1918

else:

1919

node_refs = (parents, ())

1920

else:

1921

node_refs = (parents, )

1922

else:

1923

if parents:

1924

raise KnitCorrupt(self, "attempt to add node with parents "

1925

"in parentless index.")

1926

node_refs = ()

1927

keys[key] = (value, node_refs)

1928

if not random_id:

1929

present_nodes = self._get_entries(keys)

1930

for (index, key, value, node_refs) in present_nodes:

1931

if (value, node_refs) != keys[key]:

1932

raise KnitCorrupt(self, "inconsistent details in add_versions"

1933

": %s %s" % ((value, node_refs), keys[key]))

1934

del keys[key]

1935

result = []

1936

if self._parents:

1937

for key, (value, node_refs) in keys.iteritems():

1938

result.append((key, value, node_refs))

1939

else:

1940

for key, (value, node_refs) in keys.iteritems():

1941

result.append((key, value))

1942

self._add_callback(result)

1943

1944

def _version_ids_to_keys(self, version_ids):

1945

return set((version_id, ) for version_id in version_ids)

1946

1947

1948

class _KnitAccess(object):

1949

"""Access to knit records in a .knit file."""

1950

1951

def __init__(self, transport, filename, _file_mode, _dir_mode,

1952

_need_to_create, _create_parent_dir):

1953

"""Create a _KnitAccess for accessing and inserting data.

1954

1955

:param transport: The transport the .knit is located on.

1956

:param filename: The filename of the .knit.

1957

"""

1958

self._transport = transport

1959

self._filename = filename

1960

self._file_mode = _file_mode

1961

self._dir_mode = _dir_mode

1962

self._need_to_create = _need_to_create

1963

self._create_parent_dir = _create_parent_dir

1964

1965

def add_raw_records(self, sizes, raw_data):

1966

"""Add raw knit bytes to a storage area.

1967

1968

The data is spooled to whereever the access method is storing data.

1969

1970

:param sizes: An iterable containing the size of each raw data segment.

1971

:param raw_data: A bytestring containing the data.

1972

:return: A list of memos to retrieve the record later. Each memo is a

1973

tuple - (index, pos, length), where the index field is always None

1974

for the .knit access method.

1975

"""

1976

assert type(raw_data) == str, \

1977

'data must be plain bytes was %s' % type(raw_data)

1978

if not self._need_to_create:

1979

base = self._transport.append_bytes(self._filename, raw_data)

1980

else:

1981

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1982

create_parent_dir=self._create_parent_dir,

1983

mode=self._file_mode,

1984

dir_mode=self._dir_mode)

1985

self._need_to_create = False

1986

base = 0

1987

result = []

1988

for size in sizes:

1989

result.append((None, base, size))

1990

base += size

1991

return result

1992

1993

def create(self):

1994

"""IFF this data access has its own storage area, initialise it.

1995

1996

:return: None.

1997

"""

1998

self._transport.put_bytes_non_atomic(self._filename, '',

1999

mode=self._file_mode)

2000

2001

def open_file(self):

2002

"""IFF this data access can be represented as a single file, open it.

2003

2004

For knits that are not mapped to a single file on disk this will

2005

always return None.

2006

2007

:return: None or a file handle.

2008

"""

2009

try:

2010

return self._transport.get(self._filename)

2011

except NoSuchFile:

2012

pass

2013

return None

2014

2015

def get_raw_records(self, memos_for_retrieval):

2016

"""Get the raw bytes for a records.

2017

2018

:param memos_for_retrieval: An iterable containing the (index, pos,

2019

length) memo for retrieving the bytes. The .knit method ignores

2020

the index as there is always only a single file.

2021

:return: An iterator over the bytes of the records.

2022

"""

2023

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

2024

for pos, data in self._transport.readv(self._filename, read_vector):

2025

yield data

2026

2027

2028

class _PackAccess(object):

2029

"""Access to knit records via a collection of packs."""

2030

2031

def __init__(self, index_to_packs, writer=None):

2032

"""Create a _PackAccess object.

2033

2034

:param index_to_packs: A dict mapping index objects to the transport

2035

and file names for obtaining data.

2036

:param writer: A tuple (pack.ContainerWriter, write_index) which

2037

contains the pack to write, and the index that reads from it will

2038

be associated with.

2039

"""

2040

if writer:

2041

self.container_writer = writer[0]

2042

self.write_index = writer[1]

2043

else:

2044

self.container_writer = None

2045

self.write_index = None

2046

self.indices = index_to_packs

2047

2048

def add_raw_records(self, sizes, raw_data):

2049

"""Add raw knit bytes to a storage area.

2050

2051

The data is spooled to the container writer in one bytes-record per

2052

raw data item.

2053

2054

:param sizes: An iterable containing the size of each raw data segment.

2055

:param raw_data: A bytestring containing the data.

2056

:return: A list of memos to retrieve the record later. Each memo is a

2057

tuple - (index, pos, length), where the index field is the

2058

write_index object supplied to the PackAccess object.

2059

"""

2060

assert type(raw_data) == str, \

2061

'data must be plain bytes was %s' % type(raw_data)

2062

result = []

2063

offset = 0

2064

for size in sizes:

2065

p_offset, p_length = self.container_writer.add_bytes_record(

2066

raw_data[offset:offset+size], [])

2067

offset += size

2068

result.append((self.write_index, p_offset, p_length))

2069

return result

2070

2071

def create(self):

2072

"""Pack based knits do not get individually created."""

2073

2074

def get_raw_records(self, memos_for_retrieval):

2075

"""Get the raw bytes for a records.

2076

2077

:param memos_for_retrieval: An iterable containing the (index, pos,

2078

length) memo for retrieving the bytes. The Pack access method

2079

looks up the pack to use for a given record in its index_to_pack

2080

map.

2081

:return: An iterator over the bytes of the records.

2082

"""

2083

# first pass, group into same-index requests

2084

request_lists = []

2085

current_index = None

2086

for (index, offset, length) in memos_for_retrieval:

2087

if current_index == index:

2088

current_list.append((offset, length))

2089

else:

2090

if current_index is not None:

2091

request_lists.append((current_index, current_list))

2092

current_index = index

2093

current_list = [(offset, length)]

2094

# handle the last entry

2095

if current_index is not None:

2096

request_lists.append((current_index, current_list))

2097

for index, offsets in request_lists:

2098

transport, path = self.indices[index]

2099

reader = pack.make_readv_reader(transport, path, offsets)

2100

for names, read_func in reader.iter_records():

2101

yield read_func(None)

2102

2103

def open_file(self):

2104

"""Pack based knits have no single file."""

2105

return None

2106

2107

def set_writer(self, writer, index, (transport, packname)):

2108

"""Set a writer to use for adding data."""

2109

if index is not None:

2110

self.indices[index] = (transport, packname)

2111

self.container_writer = writer

2112

self.write_index = index

2113

2114

2115

class _StreamAccess(object):

2116

"""A Knit Access object that provides data from a datastream.

2117

2118

It also provides a fallback to present as unannotated data, annotated data

2119

from a *backing* access object.

2120

2121

This is triggered by a index_memo which is pointing to a different index

2122

than this was constructed with, and is used to allow extracting full

2123

unannotated texts for insertion into annotated knits.

2124

"""

2125

2126

def __init__(self, reader_callable, stream_index, backing_knit,

2127

orig_factory):

2128

"""Create a _StreamAccess object.

2129

2130

:param reader_callable: The reader_callable from the datastream.

2131

This is called to buffer all the data immediately, for

2132

random access.

2133

:param stream_index: The index the data stream this provides access to

2134

which will be present in native index_memo's.

2135

:param backing_knit: The knit object that will provide access to

2136

annotated texts which are not available in the stream, so as to

2137

create unannotated texts.

2138

:param orig_factory: The original content factory used to generate the

2139

stream. This is used for checking whether the thunk code for

2140

supporting _copy_texts will generate the correct form of data.

2141

"""

2142

self.data = reader_callable(None)

2143

self.stream_index = stream_index

2144

self.backing_knit = backing_knit

2145

self.orig_factory = orig_factory

2146

2147

def get_raw_records(self, memos_for_retrieval):

2148

"""Get the raw bytes for a records.

2149

2150

:param memos_for_retrieval: An iterable containing the (thunk_flag,

2151

index, start, end) memo for retrieving the bytes.

2152

:return: An iterator over the bytes of the records.

2153

"""

2154

# use a generator for memory friendliness

2155

for thunk_flag, version_id, start, end in memos_for_retrieval:

2156

if version_id is self.stream_index:

2157

yield self.data[start:end]

2158

continue

2159

# we have been asked to thunk. This thunking only occurs when

2160

# we are obtaining plain texts from an annotated backing knit

2161

# so that _copy_texts will work.

2162

# We could improve performance here by scanning for where we need

2163

# to do this and using get_line_list, then interleaving the output

2164

# as desired. However, for now, this is sufficient.

2165

if self.orig_factory.__class__ != KnitPlainFactory:

2166

raise errors.KnitCorrupt(

2167

self, 'Bad thunk request %r' % version_id)

2168

lines = self.backing_knit.get_lines(version_id)

2169

line_bytes = ''.join(lines)

2170

digest = sha_string(line_bytes)

2171

if lines:

2172

if lines[-1][-1] != '\n':

2173

lines[-1] = lines[-1] + '\n'

2174

line_bytes += '\n'

2175

orig_options = list(self.backing_knit._index.get_options(version_id))

2176

if 'fulltext' not in orig_options:

2177

if 'line-delta' not in orig_options:

2178

raise errors.KnitCorrupt(self,

2179

'Unknown compression method %r' % orig_options)

2180

orig_options.remove('line-delta')

2181

orig_options.append('fulltext')

2182

# We want plain data, because we expect to thunk only to allow text

2183

# extraction.

2184

size, bytes = self.backing_knit._data._record_to_data(version_id,

2185

digest, lines, line_bytes)

2186

yield bytes

2187

2188

2189

class _StreamIndex(object):

2190

"""A Knit Index object that uses the data map from a datastream."""

2191

2192

def __init__(self, data_list, backing_index):

2193

"""Create a _StreamIndex object.

2194

2195

:param data_list: The data_list from the datastream.

2196

:param backing_index: The index which will supply values for nodes

2197

referenced outside of this stream.

2198

"""

2199

self.data_list = data_list

2200

self.backing_index = backing_index

2201

self._by_version = {}

2202

pos = 0

2203

for key, options, length, parents in data_list:

2204

self._by_version[key] = options, (pos, pos + length), parents

2205

pos += length

2206

2207

def get_ancestry(self, versions, topo_sorted):

2208

"""Get an ancestry list for versions."""

2209

if topo_sorted:

2210

# Not needed for basic joins

2211

raise NotImplementedError(self.get_ancestry)

2212

# get a graph of all the mentioned versions:

2213

# Little ugly - basically copied from KnitIndex, but don't want to

2214

# accidentally incorporate too much of that index's code.

2215

ancestry = set()

2216

pending = set(versions)

2217

cache = self._by_version

2218

while pending:

2219

version = pending.pop()

2220

# trim ghosts

2221

try:

2222

parents = [p for p in cache[version][2] if p in cache]

2223

except KeyError:

2224

raise RevisionNotPresent(version, self)

2225

# if not completed and not a ghost

2226

pending.update([p for p in parents if p not in ancestry])

2227

ancestry.add(version)

2228

return list(ancestry)

2229

2230

def get_build_details(self, version_ids):

2231

"""Get the method, index_memo and compression parent for version_ids.

2232

2233

:param version_ids: An iterable of version_ids.

2234

:return: A dict of version_id:(method, index_memo, compression_parent,

2235

parents, noeol).

2236

"""

2237

result = {}

2238

for version_id in version_ids:

2239

method = self.get_method(version_id)

2240

parent_ids = self.get_parents_with_ghosts(version_id)

2241

noeol = ('no-eol' in self.get_options(version_id))

2242

if method == 'fulltext':

2243

compression_parent = None

2244

else:

2245

compression_parent = parent_ids[0]

2246

index_memo = self.get_position(version_id)

2247

result[version_id] = (method, index_memo, compression_parent,

2248

parent_ids, noeol)

2249

return result

2250

2251

def get_method(self, version_id):

2252

"""Return compression method of specified version."""

2253

try:

2254

options = self._by_version[version_id][0]

2255

except KeyError:

2256

# Strictly speaking this should check in the backing knit, but

2257

# until we have a test to discriminate, this will do.

2258

return self.backing_index.get_method(version_id)

2259

if 'fulltext' in options:

2260

return 'fulltext'

2261

elif 'line-delta' in options:

2262

return 'line-delta'

2263

else:

2264

raise errors.KnitIndexUnknownMethod(self, options)

2265

2266

def get_options(self, version_id):

2267

"""Return a list representing options.

2268

2269

e.g. ['foo', 'bar']

2270

"""

2271

try:

2272

return self._by_version[version_id][0]

2273

except KeyError:

2274

return self.backing_index.get_options(version_id)

2275

2276

def get_parents_with_ghosts(self, version_id):

2277

"""Return parents of specified version with ghosts."""

2278

try:

2279

return self._by_version[version_id][2]

2280

except KeyError:

2281

return self.backing_index.get_parents_with_ghosts(version_id)

2282

2283

def get_position(self, version_id):

2284

"""Return details needed to access the version.

2285

2286

_StreamAccess has the data as a big array, so we return slice

2287

coordinates into that (as index_memo's are opaque outside the

2288

index and matching access class).

2289

2290

:return: a tuple (thunk_flag, index, start, end). If thunk_flag is

2291

False, index will be self, otherwise it will be a version id.

2292

"""

2293

try:

2294

start, end = self._by_version[version_id][1]

2295

return False, self, start, end

2296

except KeyError:

2297

# Signal to the access object to handle this from the backing knit.

2298

return (True, version_id, None, None)

2299

2300

def get_versions(self):

2301

"""Get all the versions in the stream."""

2302

return self._by_version.keys()

2303

2304

def iter_parents(self, version_ids):

2305

"""Iterate through the parents for many version ids.

2306

2307

:param version_ids: An iterable yielding version_ids.

2308

:return: An iterator that yields (version_id, parents). Requested

2309

version_ids not present in the versioned file are simply skipped.

2310

The order is undefined, allowing for different optimisations in

2311

the underlying implementation.

2312

"""

2313

result = []

2314

for version in version_ids:

2315

try:

2316

result.append((version, self._by_version[version][2]))

2317

except KeyError:

2318

pass

2319

return result

2320

2321

2322

class _KnitData(object):

2323

"""Manage extraction of data from a KnitAccess, caching and decompressing.

2324

2325

The KnitData class provides the logic for parsing and using knit records,

2326

making use of an access method for the low level read and write operations.

2327

"""

2328

2329

def __init__(self, access):

2330

"""Create a KnitData object.

2331

2332

:param access: The access method to use. Access methods such as

2333

_KnitAccess manage the insertion of raw records and the subsequent

2334

retrieval of the same.

2335

"""

2336

self._access = access

2337

self._checked = False

2338

# TODO: jam 20060713 conceptually, this could spill to disk

2339

# if the cached size gets larger than a certain amount

2340

# but it complicates the model a bit, so for now just use

2341

# a simple dictionary

2342

self._cache = {}

2343

self._do_cache = False

2344

2345

def enable_cache(self):

2346

"""Enable caching of reads."""

2347

self._do_cache = True

2348

2349

def clear_cache(self):

2350

"""Clear the record cache."""

2351

self._do_cache = False

2352

self._cache = {}

2353

2354

def _open_file(self):

2355

return self._access.open_file()

2356

2357

def _record_to_data(self, version_id, digest, lines, dense_lines=None):

2358

"""Convert version_id, digest, lines into a raw data block.

2359

2360

:param dense_lines: The bytes of lines but in a denser form. For

2361

instance, if lines is a list of 1000 bytestrings each ending in \n,

2362

dense_lines may be a list with one line in it, containing all the

2363

1000's lines and their \n's. Using dense_lines if it is already

2364

known is a win because the string join to create bytes in this

2365

function spends less time resizing the final string.

2366

:return: (len, a StringIO instance with the raw data ready to read.)

2367

"""

2368

# Note: using a string copy here increases memory pressure with e.g.

2369

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

2370

# when doing the initial commit of a mozilla tree. RBC 20070921

2371

bytes = ''.join(chain(

2372

["version %s %d %s\n" % (version_id,

2373

len(lines),

2374

digest)],

2375

dense_lines or lines,

2376

["end %s\n" % version_id]))

2377

assert bytes.__class__ == str

2378

compressed_bytes = bytes_to_gzip(bytes)

2379

return len(compressed_bytes), compressed_bytes

2380

2381

def add_raw_records(self, sizes, raw_data):

2382

"""Append a prepared record to the data file.

2383

2384

:param sizes: An iterable containing the size of each raw data segment.

2385

:param raw_data: A bytestring containing the data.

2386

:return: a list of index data for the way the data was stored.

2387

See the access method add_raw_records documentation for more

2388

details.

2389

"""

2390

return self._access.add_raw_records(sizes, raw_data)

2391

2392

def _parse_record_header(self, version_id, raw_data):

2393

"""Parse a record header for consistency.

2394

2395

:return: the header and the decompressor stream.

2396

as (stream, header_record)

2397

"""

2398

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2399

try:

2400

rec = self._check_header(version_id, df.readline())

2401

except Exception, e:

2402

raise KnitCorrupt(self._access,

2403

"While reading {%s} got %s(%s)"

2404

% (version_id, e.__class__.__name__, str(e)))

2405

return df, rec

2406

2407

def _check_header(self, version_id, line):

2408

rec = line.split()

2409

if len(rec) != 4:

2410

raise KnitCorrupt(self._access,

2411

'unexpected number of elements in record header')

2412

if rec[1] != version_id:

2413

raise KnitCorrupt(self._access,

2414

'unexpected version, wanted %r, got %r'

2415

% (version_id, rec[1]))

2416

return rec

2417

2418

def _parse_record(self, version_id, data):

2419

# profiling notes:

2420

# 4168 calls in 2880 217 internal

2421

# 4168 calls to _parse_record_header in 2121

2422

# 4168 calls to readlines in 330

2423

df = GzipFile(mode='rb', fileobj=StringIO(data))

2424

2425

try:

2426

record_contents = df.readlines()

2427

except Exception, e:

2428

raise KnitCorrupt(self._access,

2429

"While reading {%s} got %s(%s)"

2430

% (version_id, e.__class__.__name__, str(e)))

2431

header = record_contents.pop(0)

2432

rec = self._check_header(version_id, header)

2433

2434

last_line = record_contents.pop()

2435

if len(record_contents) != int(rec[2]):

2436

raise KnitCorrupt(self._access,

2437

'incorrect number of lines %s != %s'

2438

' for version {%s}'

2439

% (len(record_contents), int(rec[2]),

2440

version_id))

2441

if last_line != 'end %s\n' % rec[1]:

2442

raise KnitCorrupt(self._access,

2443

'unexpected version end line %r, wanted %r'

2444

% (last_line, version_id))

2445

df.close()

2446

return record_contents, rec[3]

2447

2448

def read_records_iter_raw(self, records):

2449

"""Read text records from data file and yield raw data.

2450

2451

This unpacks enough of the text record to validate the id is

2452

as expected but thats all.

2453

"""

2454

# setup an iterator of the external records:

2455

# uses readv so nice and fast we hope.

2456

if len(records):

2457

# grab the disk data needed.

2458

if self._cache:

2459

# Don't check _cache if it is empty

2460

needed_offsets = [index_memo for version_id, index_memo

2461

in records

2462

if version_id not in self._cache]

2463

else:

2464

needed_offsets = [index_memo for version_id, index_memo

2465

in records]

2466

2467

raw_records = self._access.get_raw_records(needed_offsets)

2468

2469

for version_id, index_memo in records:

2470

if version_id in self._cache:

2471

# This data has already been validated

2472

data = self._cache[version_id]

2473

else:

2474

data = raw_records.next()

2475

if self._do_cache:

2476

self._cache[version_id] = data

2477

2478

# validate the header

2479

df, rec = self._parse_record_header(version_id, data)

2480

df.close()

2481

yield version_id, data

2482

2483

def read_records_iter(self, records):

2484

"""Read text records from data file and yield result.

2485

2486

The result will be returned in whatever is the fastest to read.

2487

Not by the order requested. Also, multiple requests for the same

2488

record will only yield 1 response.

2489

:param records: A list of (version_id, pos, len) entries

2490

:return: Yields (version_id, contents, digest) in the order

2491

read, not the order requested

2492

"""

2493

if not records:

2494

return

2495

2496

if self._cache:

2497

# Skip records we have alread seen

2498

yielded_records = set()

2499

needed_records = set()

2500

for record in records:

2501

if record[0] in self._cache:

2502

if record[0] in yielded_records:

2503

continue

2504

yielded_records.add(record[0])

2505

data = self._cache[record[0]]

2506

content, digest = self._parse_record(record[0], data)

2507

yield (record[0], content, digest)

2508

else:

2509

needed_records.add(record)

2510

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2511

else:

2512

needed_records = sorted(set(records), key=operator.itemgetter(1))

2513

2514

if not needed_records:

2515

return

2516

2517

# The transport optimizes the fetching as well

2518

# (ie, reads continuous ranges.)

2519

raw_data = self._access.get_raw_records(

2520

[index_memo for version_id, index_memo in needed_records])

2521

2522

for (version_id, index_memo), data in \

2523

izip(iter(needed_records), raw_data):

2524

content, digest = self._parse_record(version_id, data)

2525

if self._do_cache:

2526

self._cache[version_id] = data

2527

yield version_id, content, digest

2528

2529

def read_records(self, records):

2530

"""Read records into a dictionary."""

2531

components = {}

2532

for record_id, content, digest in \

2533

self.read_records_iter(records):

2534

components[record_id] = (content, digest)

2535

return components

2536

2537

2538

class InterKnit(InterVersionedFile):

2539

"""Optimised code paths for knit to knit operations."""

2540

2541

_matching_file_from_factory = KnitVersionedFile

2542

_matching_file_to_factory = KnitVersionedFile

2543

2544

@staticmethod

2545

def is_compatible(source, target):

2546

"""Be compatible with knits. """

2547

try:

2548

return (isinstance(source, KnitVersionedFile) and

2549

isinstance(target, KnitVersionedFile))

2550

except AttributeError:

2551

return False

2552

2553

def _copy_texts(self, pb, msg, version_ids, ignore_missing=False):

2554

"""Copy texts to the target by extracting and adding them one by one.

2555

2556

see join() for the parameter definitions.

2557

"""

2558

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2559

graph = self.source.get_graph(version_ids)

2560

order = topo_sort(graph.items())

2561

2562

def size_of_content(content):

2563

return sum(len(line) for line in content.text())

2564

# Cache at most 10MB of parent texts

2565

parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,

2566

compute_size=size_of_content)

2567

# TODO: jam 20071116 It would be nice to have a streaming interface to

2568

# get multiple texts from a source. The source could be smarter

2569

# about how it handled intermediate stages.

2570

# get_line_list() or make_mpdiffs() seem like a possibility, but

2571

# at the moment they extract all full texts into memory, which

2572

# causes us to store more than our 3x fulltext goal.

2573

# Repository.iter_files_bytes() may be another possibility

2574

to_process = [version for version in order

2575

if version not in self.target]

2576

total = len(to_process)

2577

pb = ui.ui_factory.nested_progress_bar()

2578

try:

2579

for index, version in enumerate(to_process):

2580

pb.update('Converting versioned data', index, total)

2581

sha1, num_bytes, parent_text = self.target.add_lines(version,

2582

self.source.get_parents_with_ghosts(version),

2583

self.source.get_lines(version),

2584

parent_texts=parent_cache)

2585

parent_cache[version] = parent_text

2586

finally:

2587

pb.finished()

2588

return total

2589

2590

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2591

"""See InterVersionedFile.join."""

2592

assert isinstance(self.source, KnitVersionedFile)

2593

assert isinstance(self.target, KnitVersionedFile)

2594

2595

# If the source and target are mismatched w.r.t. annotations vs

2596

# plain, the data needs to be converted accordingly

2597

if self.source.factory.annotated == self.target.factory.annotated:

2598

converter = None

2599

elif self.source.factory.annotated:

2600

converter = self._anno_to_plain_converter

2601

else:

2602

# We're converting from a plain to an annotated knit. Copy them

2603

# across by full texts.

2604

return self._copy_texts(pb, msg, version_ids, ignore_missing)

2605

2606

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2607

if not version_ids:

2608

return 0

2609

2610

pb = ui.ui_factory.nested_progress_bar()

2611

try:

2612

version_ids = list(version_ids)

2613

if None in version_ids:

2614

version_ids.remove(None)

2615

2616

self.source_ancestry = set(self.source.get_ancestry(version_ids,

2617

topo_sorted=False))

2618

this_versions = set(self.target._index.get_versions())

2619

# XXX: For efficiency we should not look at the whole index,

2620

# we only need to consider the referenced revisions - they

2621

# must all be present, or the method must be full-text.

2622

# TODO, RBC 20070919

2623

needed_versions = self.source_ancestry - this_versions

2624

2625

if not needed_versions:

2626

return 0

2627

full_list = topo_sort(self.source.get_graph())

2628

2629

version_list = [i for i in full_list if (not self.target.has_version(i)

2630

and i in needed_versions)]

2631

2632

# plan the join:

2633

copy_queue = []

2634

copy_queue_records = []

2635

copy_set = set()

2636

for version_id in version_list:

2637

options = self.source._index.get_options(version_id)

2638

parents = self.source._index.get_parents_with_ghosts(version_id)

2639

# check that its will be a consistent copy:

2640

for parent in parents:

2641

# if source has the parent, we must :

2642

# * already have it or

2643

# * have it scheduled already

2644

# otherwise we don't care

2645

assert (self.target.has_version(parent) or

2646

parent in copy_set or

2647

not self.source.has_version(parent))

2648

index_memo = self.source._index.get_position(version_id)

2649

copy_queue_records.append((version_id, index_memo))

2650

copy_queue.append((version_id, options, parents))

2651

copy_set.add(version_id)

2652

2653

# data suck the join:

2654

count = 0

2655

total = len(version_list)

2656

raw_datum = []

2657

raw_records = []

2658

for (version_id, raw_data), \

2659

(version_id2, options, parents) in \

2660

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2661

copy_queue):

2662

assert version_id == version_id2, 'logic error, inconsistent results'

2663

count = count + 1

2664

pb.update("Joining knit", count, total)

2665

if converter:

2666

size, raw_data = converter(raw_data, version_id, options,

2667

parents)

2668

else:

2669

size = len(raw_data)

2670

raw_records.append((version_id, options, parents, size))

2671

raw_datum.append(raw_data)

2672

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2673

return count

2674

finally:

2675

pb.finished()

2676

2677

def _anno_to_plain_converter(self, raw_data, version_id, options,

2678

parents):

2679

"""Convert annotated content to plain content."""

2680

data, digest = self.source._data._parse_record(version_id, raw_data)

2681

if 'fulltext' in options:

2682

content = self.source.factory.parse_fulltext(data, version_id)

2683

lines = self.target.factory.lower_fulltext(content)

2684

else:

2685

delta = self.source.factory.parse_line_delta(data, version_id,

2686

plain=True)

2687

lines = self.target.factory.lower_line_delta(delta)

2688

return self.target._data._record_to_data(version_id, digest, lines)

2689

2690

2691

InterVersionedFile.register_optimiser(InterKnit)

2692

2693

2694

class WeaveToKnit(InterVersionedFile):

2695

"""Optimised code paths for weave to knit operations."""

2696

2697

_matching_file_from_factory = bzrlib.weave.WeaveFile

2698

_matching_file_to_factory = KnitVersionedFile

2699

2700

@staticmethod

2701

def is_compatible(source, target):

2702

"""Be compatible with weaves to knits."""

2703

try:

2704

return (isinstance(source, bzrlib.weave.Weave) and

2705

isinstance(target, KnitVersionedFile))

2706

except AttributeError:

2707

return False

2708

2709

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2710

"""See InterVersionedFile.join."""

2711

assert isinstance(self.source, bzrlib.weave.Weave)

2712

assert isinstance(self.target, KnitVersionedFile)

2713

2714

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2715

2716

if not version_ids:

2717

return 0

2718

2719

pb = ui.ui_factory.nested_progress_bar()

2720

try:

2721

version_ids = list(version_ids)

2722

2723

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2724

this_versions = set(self.target._index.get_versions())

2725

needed_versions = self.source_ancestry - this_versions

2726

2727

if not needed_versions:

2728

return 0

2729

full_list = topo_sort(self.source.get_graph())

2730

2731

version_list = [i for i in full_list if (not self.target.has_version(i)

2732

and i in needed_versions)]

2733

2734

# do the join:

2735

count = 0

2736

total = len(version_list)

2737

for version_id in version_list:

2738

pb.update("Converting to knit", count, total)

2739

parents = self.source.get_parents(version_id)

2740

# check that its will be a consistent copy:

2741

for parent in parents:

2742

# if source has the parent, we must already have it

2743

assert (self.target.has_version(parent))

2744

self.target.add_lines(

2745

version_id, parents, self.source.get_lines(version_id))

2746

count = count + 1

2747

return count

2748

finally:

2749

pb.finished()

2750

2751

2752

InterVersionedFile.register_optimiser(WeaveToKnit)

2753

2754

2755

# Deprecated, use PatienceSequenceMatcher instead

2756

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2757

2758

2759

def annotate_knit(knit, revision_id):

2760

"""Annotate a knit with no cached annotations.

2761

2762

This implementation is for knits with no cached annotations.

2763

It will work for knits with cached annotations, but this is not

2764

recommended.

2765

"""

2766

annotator = _KnitAnnotator(knit)

2767

return iter(annotator.get_annotated_lines(revision_id))

2768

2769

2770

class _KnitAnnotator(object):

2771

"""Build up the annotations for a text."""

2772

2773

def __init__(self, knit):

2774

self._knit = knit

2775

2776

# unannotated lines of various revisions, this will have the final

2777

# newline correct

2778

self._fulltexts = {}

2779

# Content objects, differs from fulltexts because of how final newlines

2780

# are treated by knits. the content objects here will always have a

2781

# final newline

2782

self._fulltext_contents = {}

2783

2784

# Annotated lines of specific revisions

2785

self._annotated_lines = {}

2786

2787

# Track the raw data for nodes that we could not process yet.

2788

# This maps the revision_id of the base to a list of children that will

2789

# annotated from it.

2790

self._pending_children = {}

2791

2792

self._all_build_details = {}

2793

# The children => parent revision_id graph

2794

self._revision_id_graph = {}

2795

2796

self._heads_provider = None

2797

2798

def _add_fulltext_content(self, revision_id, content_obj, noeol_flag):

2799

self._fulltext_contents[revision_id] = content_obj

2800

if noeol_flag:

2801

content_obj = content_obj.copy()

2802

content_obj.strip_last_line_newline()

2803

fulltext = content_obj.text()

2804

self._fulltexts[revision_id] = fulltext

2805

# XXX: It would probably be good to check the sha1digest here

2806

return fulltext

2807

2808

def _check_parents(self, child, nodes_to_annotate):

2809

"""Check if all parents have been processed.

2810

2811

:param child: A tuple of (rev_id, parents, raw_content)

2812

:param nodes_to_annotate: If child is ready, add it to

2813

nodes_to_annotate, otherwise put it back in self._pending_children

2814

"""

2815

for parent_id in child[1]:

2816

if parent_id not in self._annotated_lines:

2817

# This parent is present, but another parent is missing

2818

self._pending_children.setdefault(parent_id,

2819

[]).append(child)

2820

break

2821

else:

2822

# This one is ready to be processed

2823

nodes_to_annotate.append(child)

2824

2825

def _add_annotation(self, revision_id, fulltext, parent_ids,

2826

left_matching_blocks=None):

2827

"""Add an annotation entry.

2828

2829

All parents should already have been annotated.

2830

:return: A list of children that now have their parents satisfied.

2831

"""

2832

a = self._annotated_lines

2833

annotated_parent_lines = [a[p] for p in parent_ids]

2834

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2835

fulltext, revision_id, left_matching_blocks,

2836

heads_provider=self._get_heads_provider()))

2837

self._annotated_lines[revision_id] = annotated_lines

2838

# Now that we've added this one, see if there are any pending

2839

# deltas to be done, certainly this parent is finished

2840

nodes_to_annotate = []

2841

for child in self._pending_children.pop(revision_id, []):

2842

self._check_parents(child, nodes_to_annotate)

2843

return nodes_to_annotate

2844

2845

def _get_build_graph(self, revision_id):

2846

"""Get the graphs for building texts and annotations.

2847

2848

The data you need for creating a full text may be different than the

2849

data you need to annotate that text. (At a minimum, you need both

2850

parents to create an annotation, but only need 1 parent to generate the

2851

fulltext.)

2852

2853

:return: A list of (revision_id, index_memo) records, suitable for

2854

passing to read_records_iter to start reading in the raw data from

2855

the pack file.

2856

"""

2857

if revision_id in self._annotated_lines:

2858

# Nothing to do

2859

return []

2860

pending = set([revision_id])

2861

records = []

2862

while pending:

2863

# get all pending nodes

2864

this_iteration = pending

2865

build_details = self._knit._index.get_build_details(this_iteration)

2866

self._all_build_details.update(build_details)

2867

# new_nodes = self._knit._index._get_entries(this_iteration)

2868

pending = set()

2869

for rev_id, details in build_details.iteritems():

2870

(method, index_memo, compression_parent, parents,

2871

noeol) = details

2872

self._revision_id_graph[rev_id] = parents

2873

records.append((rev_id, index_memo))

2874

pending.update(p for p in parents

2875

if p not in self._all_build_details)

2876

2877

missing_versions = this_iteration.difference(build_details.keys())

2878

for missing_version in missing_versions:

2879

# add a key, no parents

2880

self._revision_id_graph[missing_versions] = ()

2881

pending.discard(missing_version) # don't look for it

2882

# Generally we will want to read the records in reverse order, because

2883

# we find the parent nodes after the children

2884

records.reverse()

2885

return records

2886

2887

def _annotate_records(self, records):

2888

"""Build the annotations for the listed records."""

2889

# We iterate in the order read, rather than a strict order requested

2890

# However, process what we can, and put off to the side things that still

2891

# need parents, cleaning them up when those parents are processed.

2892

for (rev_id, raw_content,

2893

digest) in self._knit._data.read_records_iter(records):

2894

if rev_id in self._annotated_lines:

2895

continue

2896

parent_ids = self._revision_id_graph[rev_id]

2897

details = self._all_build_details[rev_id]

2898

(method, index_memo, compression_parent, parent_ids,

2899

noeol) = details

2900

nodes_to_annotate = []

2901

# TODO: Remove the punning between compression parents, and

2902

# parent_ids, we should be able to do this without assuming

2903

# the build order

2904

if len(parent_ids) == 0:

2905

# There are no parents for this node, so just add it

2906

# TODO: This probably needs to be decoupled

2907

assert compression_parent is None and method == 'fulltext'

2908

fulltext_content = self._knit.factory.parse_fulltext(

2909

raw_content, rev_id)

2910

fulltext = self._add_fulltext_content(rev_id, fulltext_content,

2911

noeol)

2912

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

2913

parent_ids, left_matching_blocks=None))

2914

else:

2915

child = (rev_id, parent_ids, raw_content)

2916

# Check if all the parents are present

2917

self._check_parents(child, nodes_to_annotate)

2918

while nodes_to_annotate:

2919

# Should we use a queue here instead of a stack?

2920

(rev_id, parent_ids, raw_content) = nodes_to_annotate.pop()

2921

(method, index_memo, compression_parent, parent_ids,

2922

noeol) = self._all_build_details[rev_id]

2923

if method == 'line-delta':

2924

parent_fulltext_content = self._fulltext_contents[compression_parent]

2925

delta = self._knit.factory.parse_line_delta(raw_content,

2926

rev_id)

2927

# TODO: only copy when the parent is still needed elsewhere

2928

fulltext_content = parent_fulltext_content.copy()

2929

fulltext_content.apply_delta(delta, rev_id)

2930

fulltext = self._add_fulltext_content(rev_id,

2931

fulltext_content, noeol)

2932

parent_fulltext = self._fulltexts[parent_ids[0]]

2933

blocks = KnitContent.get_line_delta_blocks(delta,

2934

parent_fulltext, fulltext)

2935

else:

2936

assert method == 'fulltext'

2937

fulltext_content = self._knit.factory.parse_fulltext(

2938

raw_content, rev_id)

2939

fulltext = self._add_fulltext_content(rev_id,

2940

fulltext_content, noeol)

2941

blocks = None

2942

nodes_to_annotate.extend(

2943

self._add_annotation(rev_id, fulltext, parent_ids,

2944

left_matching_blocks=blocks))

2945

2946

def _get_heads_provider(self):

2947

"""Create a heads provider for resolving ancestry issues."""

2948

if self._heads_provider is not None:

2949

return self._heads_provider

2950

parent_provider = _mod_graph.DictParentsProvider(

2951

self._revision_id_graph)

2952

graph_obj = _mod_graph.Graph(parent_provider)

2953

head_cache = _mod_graph.HeadsCache(graph_obj)

2954

self._heads_provider = head_cache

2955

return head_cache

2956

2957

def get_annotated_lines(self, revision_id):

2958

"""Return the annotated fulltext at the given revision.

2959

2960

:param revision_id: The revision id for this file

2961

"""

2962

records = self._get_build_graph(revision_id)

2963

self._annotate_records(records)

2964

return self._annotated_lines[revision_id]

2965

2966

2967

try:

2968

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2969

except ImportError:

2970

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »