/brz/remove-bazaar : revision 2670.3.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Andrew Bennetts
Date: 2007-08-30 08:27:29 UTC
mto: (2535.3.55 repo-refactor)
mto: This revision was merged to the branch mainline in revision 2772.
Revision ID: andrew.bennetts@canonical.com-20070830082729-8bue7wh0bqut2xs2

Remove get_stream_as_bytes from KnitVersionedFile's API, make it a function in knitrepo.py instead.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

dir.py

dulwich

dulwich/.bzrignore

dulwich/COPYING

dulwich/Makefile

dulwich/README

dulwich/bin

dulwich/bin/dul-daemon

dulwich/bin/dul-receive-pack

dulwich/bin/dul-upload-pack

dulwich/bin/dulwich

dulwich/docs

dulwich/docs/protocol.txt

dulwich/dulwich

dulwich/dulwich/__init__.py

dulwich/dulwich/client.py

dulwich/dulwich/commit.py

dulwich/dulwich/errors.py

dulwich/dulwich/objects.py

dulwich/dulwich/pack.py

dulwich/dulwich/protocol.py

dulwich/dulwich/repo.py

dulwich/dulwich/server.py

dulwich/dulwich/tests

dulwich/dulwich/tests/__init__.py

dulwich/dulwich/tests/data

dulwich/dulwich/tests/data/blobs

dulwich/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/commits

dulwich/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/packs

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/dulwich/tests/data/repos

dulwich/dulwich/tests/data/repos/a

dulwich/dulwich/tests/data/repos/a/.git

dulwich/dulwich/tests/data/repos/a/.git/HEAD

dulwich/dulwich/tests/data/repos/a/.git/index

dulwich/dulwich/tests/data/repos/a/.git/objects

dulwich/dulwich/tests/data/repos/a/.git/objects/2a

dulwich/dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/dulwich/tests/data/repos/a/.git/objects/4e

dulwich/dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/dulwich/tests/data/repos/a/.git/objects/4f

dulwich/dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/dulwich/tests/data/repos/a/.git/objects/7d

dulwich/dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/dulwich/tests/data/repos/a/.git/objects/a2

dulwich/dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/dulwich/tests/data/repos/a/.git/objects/a9

dulwich/dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/dulwich/tests/data/repos/a/.git/objects/ff

dulwich/dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/dulwich/tests/data/repos/a/.git/objects/info

dulwich/dulwich/tests/data/repos/a/.git/objects/pack

dulwich/dulwich/tests/data/repos/a/.git/refs

dulwich/dulwich/tests/data/repos/a/.git/refs/heads

dulwich/dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/a/.git/refs/tags

dulwich/dulwich/tests/data/repos/a/a

dulwich/dulwich/tests/data/repos/a/b

dulwich/dulwich/tests/data/repos/a/c

dulwich/dulwich/tests/data/repos/ooo_merge

dulwich/dulwich/tests/data/repos/ooo_merge/.git

dulwich/dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/ooo_merge/a

dulwich/dulwich/tests/data/repos/ooo_merge/b

dulwich/dulwich/tests/data/repos/ooo_merge/c

dulwich/dulwich/tests/data/repos/simple_merge

dulwich/dulwich/tests/data/repos/simple_merge/.git

dulwich/dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/simple_merge/.git/index

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/simple_merge/a

dulwich/dulwich/tests/data/repos/simple_merge/b

dulwich/dulwich/tests/data/repos/simple_merge/d

dulwich/dulwich/tests/data/repos/simple_merge/e

dulwich/dulwich/tests/data/trees

dulwich/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/test_objects.py

dulwich/dulwich/tests/test_pack.py

dulwich/dulwich/tests/test_repository.py

dulwich/setup.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

remote.py

repository.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_ids.py

tests/test_repository.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

RevisionNotPresent,

100

RevisionAlreadyPresent,

101

)

102

from bzrlib.tuned_gzip import GzipFile

103

from bzrlib.osutils import (

104

contains_whitespace,

105

contains_linebreaks,

106

sha_strings,

107

)

108

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

109

from bzrlib.trace import mutter

110

from bzrlib.tsort import topo_sort

111

import bzrlib.ui

112

import bzrlib.weave

113

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

114

115

116

# TODO: Split out code specific to this format into an associated object.

117

118

# TODO: Can we put in some kind of value to check that the index and data

119

# files belong together?

120

121

# TODO: accommodate binaries, perhaps by storing a byte count

122

123

# TODO: function to check whole file

124

125

# TODO: atomically append data, then measure backwards from the cursor

126

# position after writing to work out where it was located. we may need to

127

# bypass python file buffering.

128

129

DATA_SUFFIX = '.knit'

130

INDEX_SUFFIX = '.kndx'

131

132

133

class KnitContent(object):

134

"""Content of a knit version to which deltas can be applied."""

135

136

def __init__(self, lines):

137

self._lines = lines

138

139

def annotate_iter(self):

140

"""Yield tuples of (origin, text) for each content line."""

141

return iter(self._lines)

142

143

def annotate(self):

144

"""Return a list of (origin, text) tuples."""

145

return list(self.annotate_iter())

146

147

def line_delta_iter(self, new_lines):

148

"""Generate line-based delta from this content to new_lines."""

149

new_texts = new_lines.text()

150

old_texts = self.text()

151

s = KnitSequenceMatcher(None, old_texts, new_texts)

152

for tag, i1, i2, j1, j2 in s.get_opcodes():

153

if tag == 'equal':

154

continue

155

# ofrom, oto, length, data

156

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

157

158

def line_delta(self, new_lines):

159

return list(self.line_delta_iter(new_lines))

160

161

def text(self):

162

return [text for origin, text in self._lines]

163

164

def copy(self):

165

return KnitContent(self._lines[:])

166

167

@staticmethod

168

def get_line_delta_blocks(knit_delta, source, target):

169

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

170

target_len = len(target)

171

s_pos = 0

172

t_pos = 0

173

for s_begin, s_end, t_len, new_text in knit_delta:

174

true_n = s_begin - s_pos

175

n = true_n

176

if n > 0:

177

# knit deltas do not provide reliable info about whether the

178

# last line of a file matches, due to eol handling.

179

if source[s_pos + n -1] != target[t_pos + n -1]:

180

n-=1

181

if n > 0:

182

yield s_pos, t_pos, n

183

t_pos += t_len + true_n

184

s_pos = s_end

185

n = target_len - t_pos

186

if n > 0:

187

if source[s_pos + n -1] != target[t_pos + n -1]:

188

n-=1

189

if n > 0:

190

yield s_pos, t_pos, n

191

yield s_pos + (target_len - t_pos), target_len, 0

192

193

194

class _KnitFactory(object):

195

"""Base factory for creating content objects."""

196

197

def make(self, lines, version_id):

198

num_lines = len(lines)

199

return KnitContent(zip([version_id] * num_lines, lines))

200

201

202

class KnitAnnotateFactory(_KnitFactory):

203

"""Factory for creating annotated Content objects."""

204

205

annotated = True

206

207

def parse_fulltext(self, content, version_id):

208

"""Convert fulltext to internal representation

209

210

fulltext content is of the format

211

revid(utf8) plaintext\n

212

internal representation is of the format:

213

(revid, plaintext)

214

"""

215

# TODO: jam 20070209 The tests expect this to be returned as tuples,

216

# but the code itself doesn't really depend on that.

217

# Figure out a way to not require the overhead of turning the

218

# list back into tuples.

219

lines = [tuple(line.split(' ', 1)) for line in content]

220

return KnitContent(lines)

221

222

def parse_line_delta_iter(self, lines):

223

return iter(self.parse_line_delta(lines))

224

225

def parse_line_delta(self, lines, version_id):

226

"""Convert a line based delta into internal representation.

227

228

line delta is in the form of:

229

intstart intend intcount

230

1..count lines:

231

revid(utf8) newline\n

232

internal representation is

233

(start, end, count, [1..count tuples (revid, newline)])

234

"""

235

result = []

236

lines = iter(lines)

237

next = lines.next

238

239

cache = {}

240

def cache_and_return(line):

241

origin, text = line.split(' ', 1)

242

return cache.setdefault(origin, origin), text

243

244

# walk through the lines parsing.

245

for header in lines:

246

start, end, count = [int(n) for n in header.split(',')]

247

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

248

result.append((start, end, count, contents))

249

return result

250

251

def get_fulltext_content(self, lines):

252

"""Extract just the content lines from a fulltext."""

253

return (line.split(' ', 1)[1] for line in lines)

254

255

def get_linedelta_content(self, lines):

256

"""Extract just the content from a line delta.

257

258

This doesn't return all of the extra information stored in a delta.

259

Only the actual content lines.

260

"""

261

lines = iter(lines)

262

next = lines.next

263

for header in lines:

264

header = header.split(',')

265

count = int(header[2])

266

for i in xrange(count):

267

origin, text = next().split(' ', 1)

268

yield text

269

270

def lower_fulltext(self, content):

271

"""convert a fulltext content record into a serializable form.

272

273

see parse_fulltext which this inverts.

274

"""

275

# TODO: jam 20070209 We only do the caching thing to make sure that

276

# the origin is a valid utf-8 line, eventually we could remove it

277

return ['%s %s' % (o, t) for o, t in content._lines]

278

279

def lower_line_delta(self, delta):

280

"""convert a delta into a serializable form.

281

282

See parse_line_delta which this inverts.

283

"""

284

# TODO: jam 20070209 We only do the caching thing to make sure that

285

# the origin is a valid utf-8 line, eventually we could remove it

286

out = []

287

for start, end, c, lines in delta:

288

out.append('%d,%d,%d\n' % (start, end, c))

289

out.extend(origin + ' ' + text

290

for origin, text in lines)

291

return out

292

293

294

class KnitPlainFactory(_KnitFactory):

295

"""Factory for creating plain Content objects."""

296

297

annotated = False

298

299

def parse_fulltext(self, content, version_id):

300

"""This parses an unannotated fulltext.

301

302

Note that this is not a noop - the internal representation

303

has (versionid, line) - its just a constant versionid.

304

"""

305

return self.make(content, version_id)

306

307

def parse_line_delta_iter(self, lines, version_id):

308

cur = 0

309

num_lines = len(lines)

310

while cur < num_lines:

311

header = lines[cur]

312

cur += 1

313

start, end, c = [int(n) for n in header.split(',')]

314

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

315

cur += c

316

317

def parse_line_delta(self, lines, version_id):

318

return list(self.parse_line_delta_iter(lines, version_id))

319

320

def get_fulltext_content(self, lines):

321

"""Extract just the content lines from a fulltext."""

322

return iter(lines)

323

324

def get_linedelta_content(self, lines):

325

"""Extract just the content from a line delta.

326

327

This doesn't return all of the extra information stored in a delta.

328

Only the actual content lines.

329

"""

330

lines = iter(lines)

331

next = lines.next

332

for header in lines:

333

header = header.split(',')

334

count = int(header[2])

335

for i in xrange(count):

336

yield next()

337

338

def lower_fulltext(self, content):

339

return content.text()

340

341

def lower_line_delta(self, delta):

342

out = []

343

for start, end, c, lines in delta:

344

out.append('%d,%d,%d\n' % (start, end, c))

345

out.extend([text for origin, text in lines])

346

return out

347

348

349

def make_empty_knit(transport, relpath):

350

"""Construct a empty knit at the specified location."""

351

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

352

353

354

class KnitVersionedFile(VersionedFile):

355

"""Weave-like structure with faster random access.

356

357

A knit stores a number of texts and a summary of the relationships

358

between them. Texts are identified by a string version-id. Texts

359

are normally stored and retrieved as a series of lines, but can

360

also be passed as single strings.

361

362

Lines are stored with the trailing newline (if any) included, to

363

avoid special cases for files with no final newline. Lines are

364

composed of 8-bit characters, not unicode. The combination of

365

these approaches should mean any 'binary' file can be safely

366

stored and retrieved.

367

"""

368

369

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

370

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

371

create=False, create_parent_dir=False, delay_create=False,

372

dir_mode=None, index=None, access_method=None):

373

"""Construct a knit at location specified by relpath.

374

375

:param create: If not True, only open an existing knit.

376

:param create_parent_dir: If True, create the parent directory if

377

creating the file fails. (This is used for stores with

378

hash-prefixes that may not exist yet)

379

:param delay_create: The calling code is aware that the knit won't

380

actually be created until the first data is stored.

381

:param index: An index to use for the knit.

382

"""

383

if deprecated_passed(basis_knit):

384

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

385

" deprecated as of bzr 0.9.",

386

DeprecationWarning, stacklevel=2)

387

if access_mode is None:

388

access_mode = 'w'

389

super(KnitVersionedFile, self).__init__(access_mode)

390

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

391

self.transport = transport

392

self.filename = relpath

393

self.factory = factory or KnitAnnotateFactory()

394

self.writable = (access_mode == 'w')

395

self.delta = delta

396

397

self._max_delta_chain = 200

398

399

if index is None:

400

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

401

access_mode, create=create, file_mode=file_mode,

402

create_parent_dir=create_parent_dir, delay_create=delay_create,

403

dir_mode=dir_mode)

404

else:

405

self._index = index

406

if access_method is None:

407

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

408

((create and not len(self)) and delay_create), create_parent_dir)

409

else:

410

_access = access_method

411

if create and not len(self) and not delay_create:

412

_access.create()

413

self._data = _KnitData(_access)

414

415

def __repr__(self):

416

return '%s(%s)' % (self.__class__.__name__,

417

self.transport.abspath(self.filename))

418

419

def _check_should_delta(self, first_parents):

420

"""Iterate back through the parent listing, looking for a fulltext.

421

422

This is used when we want to decide whether to add a delta or a new

423

fulltext. It searches for _max_delta_chain parents. When it finds a

424

fulltext parent, it sees if the total size of the deltas leading up to

425

it is large enough to indicate that we want a new full text anyway.

426

427

Return True if we should create a new delta, False if we should use a

428

full text.

429

"""

430

delta_size = 0

431

fulltext_size = None

432

delta_parents = first_parents

433

for count in xrange(self._max_delta_chain):

434

parent = delta_parents[0]

435

method = self._index.get_method(parent)

436

index, pos, size = self._index.get_position(parent)

437

if method == 'fulltext':

438

fulltext_size = size

439

break

440

delta_size += size

441

delta_parents = self._index.get_parents(parent)

442

else:

443

# We couldn't find a fulltext, so we must create a new one

444

return False

445

446

return fulltext_size > delta_size

447

448

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

449

"""See VersionedFile._add_delta()."""

450

self._check_add(version_id, []) # should we check the lines ?

451

self._check_versions_present(parents)

452

present_parents = []

453

ghosts = []

454

parent_texts = {}

455

for parent in parents:

456

if not self.has_version(parent):

457

ghosts.append(parent)

458

else:

459

present_parents.append(parent)

460

461

if delta_parent is None:

462

# reconstitute as full text.

463

assert len(delta) == 1 or len(delta) == 0

464

if len(delta):

465

assert delta[0][0] == 0

466

assert delta[0][1] == 0, delta[0][1]

467

return super(KnitVersionedFile, self)._add_delta(version_id,

468

parents,

469

delta_parent,

470

sha1,

471

noeol,

472

delta)

473

474

digest = sha1

475

476

options = []

477

if noeol:

478

options.append('no-eol')

479

480

if delta_parent is not None:

481

# determine the current delta chain length.

482

# To speed the extract of texts the delta chain is limited

483

# to a fixed number of deltas. This should minimize both

484

# I/O and the time spend applying deltas.

485

# The window was changed to a maximum of 200 deltas, but also added

486

# was a check that the total compressed size of the deltas is

487

# smaller than the compressed size of the fulltext.

488

if not self._check_should_delta([delta_parent]):

489

# We don't want a delta here, just do a normal insertion.

490

return super(KnitVersionedFile, self)._add_delta(version_id,

491

parents,

492

delta_parent,

493

sha1,

494

noeol,

495

delta)

496

497

options.append('line-delta')

498

store_lines = self.factory.lower_line_delta(delta)

499

500

access_memo = self._data.add_record(version_id, digest, store_lines)

501

self._index.add_version(version_id, options, access_memo, parents)

502

503

def _add_raw_records(self, records, data):

504

"""Add all the records 'records' with data pre-joined in 'data'.

505

506

:param records: A list of tuples(version_id, options, parents, size).

507

:param data: The data for the records. When it is written, the records

508

are adjusted to have pos pointing into data by the sum of

509

the preceding records sizes.

510

"""

511

# write all the data

512

raw_record_sizes = [record[3] for record in records]

513

positions = self._data.add_raw_records(raw_record_sizes, data)

514

offset = 0

515

index_entries = []

516

for (version_id, options, parents, size), access_memo in zip(

517

records, positions):

518

index_entries.append((version_id, options, access_memo, parents))

519

if self._data._do_cache:

520

self._data._cache[version_id] = data[offset:offset+size]

521

offset += size

522

self._index.add_versions(index_entries)

523

524

def enable_cache(self):

525

"""Start caching data for this knit"""

526

self._data.enable_cache()

527

528

def clear_cache(self):

529

"""Clear the data cache only."""

530

self._data.clear_cache()

531

532

def copy_to(self, name, transport):

533

"""See VersionedFile.copy_to()."""

534

# copy the current index to a temp index to avoid racing with local

535

# writes

536

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

537

self.transport.get(self._index._filename))

538

# copy the data file

539

f = self._data._open_file()

540

try:

541

transport.put_file(name + DATA_SUFFIX, f)

542

finally:

543

f.close()

544

# move the copied index into place

545

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

546

547

def create_empty(self, name, transport, mode=None):

548

return KnitVersionedFile(name, transport, factory=self.factory,

549

delta=self.delta, create=True)

550

551

def _fix_parents(self, version_id, new_parents):

552

"""Fix the parents list for version.

553

554

This is done by appending a new version to the index

555

with identical data except for the parents list.

556

the parents list must be a superset of the current

557

list.

558

"""

559

current_values = self._index._cache[version_id]

560

assert set(current_values[4]).difference(set(new_parents)) == set()

561

self._index.add_version(version_id,

562

current_values[1],

563

(None, current_values[2], current_values[3]),

564

new_parents)

565

566

def get_data_stream(self, required_versions):

567

"""Get a data stream for the specified versions.

568

569

Versions may be returned in any order, not necessarily the order

570

specified.

571

572

:param required_versions: the exact set of versions to be returned, i.e.

573

not a transitive closure.

574

575

:returns: format_signature, list of (version, options, length, parents),

576

reader_callable.

577

"""

578

required_versions = set([osutils.safe_revision_id(v) for v in

579

required_versions])

580

# we don't care about inclusions, the caller cares.

581

# but we need to setup a list of records to visit.

582

for version_id in required_versions:

583

if not self.has_version(version_id):

584

raise RevisionNotPresent(version_id, self.filename)

585

# Pick the desired versions out of the index in oldest-to-newest order

586

version_list = []

587

for version_id in self.versions():

588

if version_id in required_versions:

589

version_list.append(version_id)

590

591

# create the list of version information for the result

592

copy_queue_records = []

593

copy_set = set()

594

result_version_list = []

595

for version_id in version_list:

596

options = self._index.get_options(version_id)

597

parents = self._index.get_parents_with_ghosts(version_id)

598

index_memo = self._index.get_position(version_id)

599

copy_queue_records.append((version_id, index_memo))

600

none, data_pos, data_size = index_memo

601

copy_set.add(version_id)

602

# version, options, length, parents

603

result_version_list.append((version_id, options, data_size,

604

parents))

605

606

# Read the compressed record data.

607

# XXX:

608

# From here down to the return should really be logic in the returned

609

# callable -- in a class that adapts read_records_iter_raw to read

610

# requests.

611

raw_datum = []

612

for (version_id, raw_data), \

613

(version_id2, options, _, parents) in \

614

izip(self._data.read_records_iter_raw(copy_queue_records),

615

result_version_list):

616

assert version_id == version_id2, 'logic error, inconsistent results'

617

raw_datum.append(raw_data)

618

pseudo_file = StringIO(''.join(raw_datum))

619

def read(length):

620

if length is None:

621

return pseudo_file.read()

622

else:

623

return pseudo_file.read(length)

624

return (self.get_format_signature(), result_version_list, read)

625

626

def _extract_blocks(self, version_id, source, target):

627

if self._index.get_method(version_id) != 'line-delta':

628

return None

629

parent, sha1, noeol, delta = self.get_delta(version_id)

630

return KnitContent.get_line_delta_blocks(delta, source, target)

631

632

def get_delta(self, version_id):

633

"""Get a delta for constructing version from some other version."""

634

version_id = osutils.safe_revision_id(version_id)

635

self.check_not_reserved_id(version_id)

636

if not self.has_version(version_id):

637

raise RevisionNotPresent(version_id, self.filename)

638

639

parents = self.get_parents(version_id)

640

if len(parents):

641

parent = parents[0]

642

else:

643

parent = None

644

index_memo = self._index.get_position(version_id)

645

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

646

noeol = 'no-eol' in self._index.get_options(version_id)

647

if 'fulltext' == self._index.get_method(version_id):

648

new_content = self.factory.parse_fulltext(data, version_id)

649

if parent is not None:

650

reference_content = self._get_content(parent)

651

old_texts = reference_content.text()

652

else:

653

old_texts = []

654

new_texts = new_content.text()

655

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

656

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

657

else:

658

delta = self.factory.parse_line_delta(data, version_id)

659

return parent, sha1, noeol, delta

660

661

def get_format_signature(self):

662

"""See VersionedFile.get_format_signature()."""

663

if self.factory.annotated:

664

annotated_part = "annotated"

665

else:

666

annotated_part = "plain"

667

return "knit-%s" % (annotated_part,)

668

669

def get_graph_with_ghosts(self):

670

"""See VersionedFile.get_graph_with_ghosts()."""

671

graph_items = self._index.get_graph()

672

return dict(graph_items)

673

674

def get_sha1(self, version_id):

675

return self.get_sha1s([version_id])[0]

676

677

def get_sha1s(self, version_ids):

678

"""See VersionedFile.get_sha1()."""

679

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

680

record_map = self._get_record_map(version_ids)

681

# record entry 2 is the 'digest'.

682

return [record_map[v][2] for v in version_ids]

683

684

@staticmethod

685

def get_suffixes():

686

"""See VersionedFile.get_suffixes()."""

687

return [DATA_SUFFIX, INDEX_SUFFIX]

688

689

def has_ghost(self, version_id):

690

"""True if there is a ghost reference in the file to version_id."""

691

version_id = osutils.safe_revision_id(version_id)

692

# maybe we have it

693

if self.has_version(version_id):

694

return False

695

# optimisable if needed by memoising the _ghosts set.

696

items = self._index.get_graph()

697

for node, parents in items:

698

for parent in parents:

699

if parent not in self._index._cache:

700

if parent == version_id:

701

return True

702

return False

703

704

def insert_data_stream(self, (format, data_list, reader_callable)):

705

"""Insert knit records from a data stream into this knit.

706

707

If a version in the stream is already present in this knit, it will not

708

be inserted a second time. It will be checked for consistency with the

709

stored version however, and may cause a KnitCorrupt error to be raised

710

if the data in the stream disagrees with the already stored data.

711

712

:seealso: get_data_stream

713

"""

714

if format != self.get_format_signature():

715

mutter('incompatible format signature inserting to %r', self)

716

raise KnitDataStreamIncompatible(

717

format, self.get_format_signature())

718

719

for version_id, options, length, parents in data_list:

720

if self.has_version(version_id):

721

# First check: the list of parents.

722

my_parents = self.get_parents_with_ghosts(version_id)

723

if my_parents != parents:

724

# XXX: KnitCorrupt is not quite the right exception here.

725

raise KnitCorrupt(

726

self.filename,

727

'parents list %r from data stream does not match '

728

'already recorded parents %r for %s'

729

% (parents, my_parents, version_id))

730

731

# Also check the SHA-1 of the fulltext this content will

732

# produce.

733

raw_data = reader_callable(length)

734

my_fulltext_sha1 = self.get_sha1(version_id)

735

df, rec = self._data._parse_record_header(version_id, raw_data)

736

stream_fulltext_sha1 = rec[3]

737

if my_fulltext_sha1 != stream_fulltext_sha1:

738

# Actually, we don't know if it's this knit that's corrupt,

739

# or the data stream we're trying to insert.

740

raise KnitCorrupt(

741

self.filename, 'sha-1 does not match %s' % version_id)

742

else:

743

self._add_raw_records(

744

[(version_id, options, parents, length)],

745

reader_callable(length))

746

747

def versions(self):

748

"""See VersionedFile.versions."""

749

if 'evil' in debug.debug_flags:

750

trace.mutter_callsite(2, "versions scales with size of history")

751

return self._index.get_versions()

752

753

def has_version(self, version_id):

754

"""See VersionedFile.has_version."""

755

if 'evil' in debug.debug_flags:

756

trace.mutter_callsite(2, "has_version is a LBYL scenario")

757

version_id = osutils.safe_revision_id(version_id)

758

return self._index.has_version(version_id)

759

760

__contains__ = has_version

761

762

def _merge_annotations(self, content, parents, parent_texts={},

763

delta=None, annotated=None,

764

left_matching_blocks=None):

765

"""Merge annotations for content. This is done by comparing

766

the annotations based on changed to the text.

767

"""

768

if left_matching_blocks is not None:

769

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

770

else:

771

delta_seq = None

772

if annotated:

773

for parent_id in parents:

774

merge_content = self._get_content(parent_id, parent_texts)

775

if (parent_id == parents[0] and delta_seq is not None):

776

seq = delta_seq

777

else:

778

seq = patiencediff.PatienceSequenceMatcher(

779

None, merge_content.text(), content.text())

780

for i, j, n in seq.get_matching_blocks():

781

if n == 0:

782

continue

783

# this appears to copy (origin, text) pairs across to the

784

# new content for any line that matches the last-checked

785

# parent.

786

content._lines[j:j+n] = merge_content._lines[i:i+n]

787

if delta:

788

if delta_seq is None:

789

reference_content = self._get_content(parents[0], parent_texts)

790

new_texts = content.text()

791

old_texts = reference_content.text()

792

delta_seq = patiencediff.PatienceSequenceMatcher(

793

None, old_texts, new_texts)

794

return self._make_line_delta(delta_seq, content)

795

796

def _make_line_delta(self, delta_seq, new_content):

797

"""Generate a line delta from delta_seq and new_content."""

798

diff_hunks = []

799

for op in delta_seq.get_opcodes():

800

if op[0] == 'equal':

801

continue

802

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

803

return diff_hunks

804

805

def _get_components_positions(self, version_ids):

806

"""Produce a map of position data for the components of versions.

807

808

This data is intended to be used for retrieving the knit records.

809

810

A dict of version_id to (method, data_pos, data_size, next) is

811

returned.

812

method is the way referenced data should be applied.

813

data_pos is the position of the data in the knit.

814

data_size is the size of the data in the knit.

815

next is the build-parent of the version, or None for fulltexts.

816

"""

817

component_data = {}

818

for version_id in version_ids:

819

cursor = version_id

820

821

while cursor is not None and cursor not in component_data:

822

method = self._index.get_method(cursor)

823

if method == 'fulltext':

824

next = None

825

else:

826

next = self.get_parents(cursor)[0]

827

index_memo = self._index.get_position(cursor)

828

component_data[cursor] = (method, index_memo, next)

829

cursor = next

830

return component_data

831

832

def _get_content(self, version_id, parent_texts={}):

833

"""Returns a content object that makes up the specified

834

version."""

835

if not self.has_version(version_id):

836

raise RevisionNotPresent(version_id, self.filename)

837

838

cached_version = parent_texts.get(version_id, None)

839

if cached_version is not None:

840

return cached_version

841

842

text_map, contents_map = self._get_content_maps([version_id])

843

return contents_map[version_id]

844

845

def _check_versions_present(self, version_ids):

846

"""Check that all specified versions are present."""

847

self._index.check_versions_present(version_ids)

848

849

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

850

"""See VersionedFile.add_lines_with_ghosts()."""

851

self._check_add(version_id, lines)

852

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

853

854

def _add_lines(self, version_id, parents, lines, parent_texts,

855

left_matching_blocks=None):

856

"""See VersionedFile.add_lines."""

857

self._check_add(version_id, lines)

858

self._check_versions_present(parents)

859

return self._add(version_id, lines[:], parents, self.delta,

860

parent_texts, left_matching_blocks)

861

862

def _check_add(self, version_id, lines):

863

"""check that version_id and lines are safe to add."""

864

assert self.writable, "knit is not opened for write"

865

### FIXME escape. RBC 20060228

866

if contains_whitespace(version_id):

867

raise InvalidRevisionId(version_id, self.filename)

868

self.check_not_reserved_id(version_id)

869

if self.has_version(version_id):

870

raise RevisionAlreadyPresent(version_id, self.filename)

871

self._check_lines_not_unicode(lines)

872

self._check_lines_are_lines(lines)

873

874

def _add(self, version_id, lines, parents, delta, parent_texts,

875

left_matching_blocks=None):

876

"""Add a set of lines on top of version specified by parents.

877

878

If delta is true, compress the text as a line-delta against

879

the first parent.

880

881

Any versions not present will be converted into ghosts.

882

"""

883

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

884

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

885

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

886

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

887

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

888

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

889

# +1383 0 8.0370 8.0370 +<len>

890

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

891

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

892

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

893

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

894

895

present_parents = []

896

ghosts = []

897

if parent_texts is None:

898

parent_texts = {}

899

for parent in parents:

900

if not self.has_version(parent):

901

ghosts.append(parent)

902

else:

903

present_parents.append(parent)

904

905

if delta and not len(present_parents):

906

delta = False

907

908

digest = sha_strings(lines)

909

options = []

910

if lines:

911

if lines[-1][-1] != '\n':

912

options.append('no-eol')

913

lines[-1] = lines[-1] + '\n'

914

915

if len(present_parents) and delta:

916

# To speed the extract of texts the delta chain is limited

917

# to a fixed number of deltas. This should minimize both

918

# I/O and the time spend applying deltas.

919

delta = self._check_should_delta(present_parents)

920

921

assert isinstance(version_id, str)

922

lines = self.factory.make(lines, version_id)

923

if delta or (self.factory.annotated and len(present_parents) > 0):

924

# Merge annotations from parent texts if so is needed.

925

delta_hunks = self._merge_annotations(lines, present_parents,

926

parent_texts, delta, self.factory.annotated,

927

left_matching_blocks)

928

929

if delta:

930

options.append('line-delta')

931

store_lines = self.factory.lower_line_delta(delta_hunks)

932

else:

933

options.append('fulltext')

934

store_lines = self.factory.lower_fulltext(lines)

935

936

access_memo = self._data.add_record(version_id, digest, store_lines)

937

self._index.add_version(version_id, options, access_memo, parents)

938

return lines

939

940

def check(self, progress_bar=None):

941

"""See VersionedFile.check()."""

942

943

def _clone_text(self, new_version_id, old_version_id, parents):

944

"""See VersionedFile.clone_text()."""

945

# FIXME RBC 20060228 make fast by only inserting an index with null

946

# delta.

947

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

948

949

def get_lines(self, version_id):

950

"""See VersionedFile.get_lines()."""

951

return self.get_line_list([version_id])[0]

952

953

def _get_record_map(self, version_ids):

954

"""Produce a dictionary of knit records.

955

956

The keys are version_ids, the values are tuples of (method, content,

957

digest, next).

958

method is the way the content should be applied.

959

content is a KnitContent object.

960

digest is the SHA1 digest of this version id after all steps are done

961

next is the build-parent of the version, i.e. the leftmost ancestor.

962

If the method is fulltext, next will be None.

963

"""

964

position_map = self._get_components_positions(version_ids)

965

# c = component_id, m = method, i_m = index_memo, n = next

966

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

967

record_map = {}

968

for component_id, content, digest in \

969

self._data.read_records_iter(records):

970

method, index_memo, next = position_map[component_id]

971

record_map[component_id] = method, content, digest, next

972

973

return record_map

974

975

def get_text(self, version_id):

976

"""See VersionedFile.get_text"""

977

return self.get_texts([version_id])[0]

978

979

def get_texts(self, version_ids):

980

return [''.join(l) for l in self.get_line_list(version_ids)]

981

982

def get_line_list(self, version_ids):

983

"""Return the texts of listed versions as a list of strings."""

984

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

985

for version_id in version_ids:

986

self.check_not_reserved_id(version_id)

987

text_map, content_map = self._get_content_maps(version_ids)

988

return [text_map[v] for v in version_ids]

989

990

_get_lf_split_line_list = get_line_list

991

992

def _get_content_maps(self, version_ids):

993

"""Produce maps of text and KnitContents

994

995

:return: (text_map, content_map) where text_map contains the texts for

996

the requested versions and content_map contains the KnitContents.

997

Both dicts take version_ids as their keys.

998

"""

999

for version_id in version_ids:

1000

if not self.has_version(version_id):

1001

raise RevisionNotPresent(version_id, self.filename)

1002

record_map = self._get_record_map(version_ids)

1003

1004

text_map = {}

1005

content_map = {}

1006

final_content = {}

1007

for version_id in version_ids:

1008

components = []

1009

cursor = version_id

1010

while cursor is not None:

1011

method, data, digest, next = record_map[cursor]

1012

components.append((cursor, method, data, digest))

1013

if cursor in content_map:

1014

break

1015

cursor = next

1016

1017

content = None

1018

for component_id, method, data, digest in reversed(components):

1019

if component_id in content_map:

1020

content = content_map[component_id]

1021

else:

1022

if method == 'fulltext':

1023

assert content is None

1024

content = self.factory.parse_fulltext(data, version_id)

1025

elif method == 'line-delta':

1026

delta = self.factory.parse_line_delta(data, version_id)

1027

content = content.copy()

1028

content._lines = self._apply_delta(content._lines,

1029

delta)

1030

content_map[component_id] = content

1031

1032

if 'no-eol' in self._index.get_options(version_id):

1033

content = content.copy()

1034

line = content._lines[-1][1].rstrip('\n')

1035

content._lines[-1] = (content._lines[-1][0], line)

1036

final_content[version_id] = content

1037

1038

# digest here is the digest from the last applied component.

1039

text = content.text()

1040

if sha_strings(text) != digest:

1041

raise KnitCorrupt(self.filename,

1042

'sha-1 does not match %s' % version_id)

1043

1044

text_map[version_id] = text

1045

return text_map, final_content

1046

1047

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1048

pb=None):

1049

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1050

if version_ids is None:

1051

version_ids = self.versions()

1052

else:

1053

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1054

if pb is None:

1055

pb = progress.DummyProgress()

1056

# we don't care about inclusions, the caller cares.

1057

# but we need to setup a list of records to visit.

1058

# we need version_id, position, length

1059

version_id_records = []

1060

requested_versions = set(version_ids)

1061

# filter for available versions

1062

for version_id in requested_versions:

1063

if not self.has_version(version_id):

1064

raise RevisionNotPresent(version_id, self.filename)

1065

# get a in-component-order queue:

1066

for version_id in self.versions():

1067

if version_id in requested_versions:

1068

index_memo = self._index.get_position(version_id)

1069

version_id_records.append((version_id, index_memo))

1070

1071

total = len(version_id_records)

1072

for version_idx, (version_id, data, sha_value) in \

1073

enumerate(self._data.read_records_iter(version_id_records)):

1074

pb.update('Walking content.', version_idx, total)

1075

method = self._index.get_method(version_id)

1076

1077

assert method in ('fulltext', 'line-delta')

1078

if method == 'fulltext':

1079

line_iterator = self.factory.get_fulltext_content(data)

1080

else:

1081

line_iterator = self.factory.get_linedelta_content(data)

1082

for line in line_iterator:

1083

yield line

1084

1085

pb.update('Walking content.', total, total)

1086

1087

def iter_parents(self, version_ids):

1088

"""Iterate through the parents for many version ids.

1089

1090

:param version_ids: An iterable yielding version_ids.

1091

:return: An iterator that yields (version_id, parents). Requested

1092

version_ids not present in the versioned file are simply skipped.

1093

The order is undefined, allowing for different optimisations in

1094

the underlying implementation.

1095

"""

1096

version_ids = [osutils.safe_revision_id(version_id) for

1097

version_id in version_ids]

1098

return self._index.iter_parents(version_ids)

1099

1100

def num_versions(self):

1101

"""See VersionedFile.num_versions()."""

1102

return self._index.num_versions()

1103

1104

__len__ = num_versions

1105

1106

def annotate_iter(self, version_id):

1107

"""See VersionedFile.annotate_iter."""

1108

version_id = osutils.safe_revision_id(version_id)

1109

content = self._get_content(version_id)

1110

for origin, text in content.annotate_iter():

1111

yield origin, text

1112

1113

def get_parents(self, version_id):

1114

"""See VersionedFile.get_parents."""

1115

# perf notes:

1116

# optimism counts!

1117

# 52554 calls in 1264 872 internal down from 3674

1118

version_id = osutils.safe_revision_id(version_id)

1119

try:

1120

return self._index.get_parents(version_id)

1121

except KeyError:

1122

raise RevisionNotPresent(version_id, self.filename)

1123

1124

def get_parents_with_ghosts(self, version_id):

1125

"""See VersionedFile.get_parents."""

1126

version_id = osutils.safe_revision_id(version_id)

1127

try:

1128

return self._index.get_parents_with_ghosts(version_id)

1129

except KeyError:

1130

raise RevisionNotPresent(version_id, self.filename)

1131

1132

def get_ancestry(self, versions, topo_sorted=True):

1133

"""See VersionedFile.get_ancestry."""

1134

if isinstance(versions, basestring):

1135

versions = [versions]

1136

if not versions:

1137

return []

1138

versions = [osutils.safe_revision_id(v) for v in versions]

1139

return self._index.get_ancestry(versions, topo_sorted)

1140

1141

def get_ancestry_with_ghosts(self, versions):

1142

"""See VersionedFile.get_ancestry_with_ghosts."""

1143

if isinstance(versions, basestring):

1144

versions = [versions]

1145

if not versions:

1146

return []

1147

versions = [osutils.safe_revision_id(v) for v in versions]

1148

return self._index.get_ancestry_with_ghosts(versions)

1149

1150

def plan_merge(self, ver_a, ver_b):

1151

"""See VersionedFile.plan_merge."""

1152

ver_a = osutils.safe_revision_id(ver_a)

1153

ver_b = osutils.safe_revision_id(ver_b)

1154

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1155

1156

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1157

annotated_a = self.annotate(ver_a)

1158

annotated_b = self.annotate(ver_b)

1159

return merge._plan_annotate_merge(annotated_a, annotated_b,

1160

ancestors_a, ancestors_b)

1161

1162

1163

class _KnitComponentFile(object):

1164

"""One of the files used to implement a knit database"""

1165

1166

def __init__(self, transport, filename, mode, file_mode=None,

1167

create_parent_dir=False, dir_mode=None):

1168

self._transport = transport

1169

self._filename = filename

1170

self._mode = mode

1171

self._file_mode = file_mode

1172

self._dir_mode = dir_mode

1173

self._create_parent_dir = create_parent_dir

1174

self._need_to_create = False

1175

1176

def _full_path(self):

1177

"""Return the full path to this file."""

1178

return self._transport.base + self._filename

1179

1180

def check_header(self, fp):

1181

line = fp.readline()

1182

if line == '':

1183

# An empty file can actually be treated as though the file doesn't

1184

# exist yet.

1185

raise errors.NoSuchFile(self._full_path())

1186

if line != self.HEADER:

1187

raise KnitHeaderError(badline=line,

1188

filename=self._transport.abspath(self._filename))

1189

1190

def __repr__(self):

1191

return '%s(%s)' % (self.__class__.__name__, self._filename)

1192

1193

1194

class _KnitIndex(_KnitComponentFile):

1195

"""Manages knit index file.

1196

1197

The index is already kept in memory and read on startup, to enable

1198

fast lookups of revision information. The cursor of the index

1199

file is always pointing to the end, making it easy to append

1200

entries.

1201

1202

_cache is a cache for fast mapping from version id to a Index

1203

object.

1204

1205

_history is a cache for fast mapping from indexes to version ids.

1206

1207

The index data format is dictionary compressed when it comes to

1208

parent references; a index entry may only have parents that with a

1209

lover index number. As a result, the index is topological sorted.

1210

1211

Duplicate entries may be written to the index for a single version id

1212

if this is done then the latter one completely replaces the former:

1213

this allows updates to correct version and parent information.

1214

Note that the two entries may share the delta, and that successive

1215

annotations and references MUST point to the first entry.

1216

1217

The index file on disc contains a header, followed by one line per knit

1218

record. The same revision can be present in an index file more than once.

1219

The first occurrence gets assigned a sequence number starting from 0.

1220

1221

The format of a single line is

1222

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1223

REVISION_ID is a utf8-encoded revision id

1224

FLAGS is a comma separated list of flags about the record. Values include

1225

no-eol, line-delta, fulltext.

1226

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1227

that the the compressed data starts at.

1228

LENGTH is the ascii representation of the length of the data file.

1229

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1230

REVISION_ID.

1231

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1232

revision id already in the knit that is a parent of REVISION_ID.

1233

The ' :' marker is the end of record marker.

1234

1235

partial writes:

1236

when a write is interrupted to the index file, it will result in a line

1237

that does not end in ' :'. If the ' :' is not present at the end of a line,

1238

or at the end of the file, then the record that is missing it will be

1239

ignored by the parser.

1240

1241

When writing new records to the index file, the data is preceded by '\n'

1242

to ensure that records always start on new lines even if the last write was

1243

interrupted. As a result its normal for the last line in the index to be

1244

missing a trailing newline. One can be added with no harmful effects.

1245

"""

1246

1247

HEADER = "# bzr knit index 8\n"

1248

1249

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1250

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1251

1252

def _cache_version(self, version_id, options, pos, size, parents):

1253

"""Cache a version record in the history array and index cache.

1254

1255

This is inlined into _load_data for performance. KEEP IN SYNC.

1256

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1257

indexes).

1258

"""

1259

# only want the _history index to reference the 1st index entry

1260

# for version_id

1261

if version_id not in self._cache:

1262

index = len(self._history)

1263

self._history.append(version_id)

1264

else:

1265

index = self._cache[version_id][5]

1266

self._cache[version_id] = (version_id,

1267

options,

1268

pos,

1269

size,

1270

parents,

1271

index)

1272

1273

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1274

create_parent_dir=False, delay_create=False, dir_mode=None):

1275

_KnitComponentFile.__init__(self, transport, filename, mode,

1276

file_mode=file_mode,

1277

create_parent_dir=create_parent_dir,

1278

dir_mode=dir_mode)

1279

self._cache = {}

1280

# position in _history is the 'official' index for a revision

1281

# but the values may have come from a newer entry.

1282

# so - wc -l of a knit index is != the number of unique names

1283

# in the knit.

1284

self._history = []

1285

try:

1286

fp = self._transport.get(self._filename)

1287

try:

1288

# _load_data may raise NoSuchFile if the target knit is

1289

# completely empty.

1290

_load_data(self, fp)

1291

finally:

1292

fp.close()

1293

except NoSuchFile:

1294

if mode != 'w' or not create:

1295

raise

1296

elif delay_create:

1297

self._need_to_create = True

1298

else:

1299

self._transport.put_bytes_non_atomic(

1300

self._filename, self.HEADER, mode=self._file_mode)

1301

1302

def get_graph(self):

1303

"""Return a list of the node:parents lists from this knit index."""

1304

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1305

1306

def get_ancestry(self, versions, topo_sorted=True):

1307

"""See VersionedFile.get_ancestry."""

1308

# get a graph of all the mentioned versions:

1309

graph = {}

1310

pending = set(versions)

1311

cache = self._cache

1312

while pending:

1313

version = pending.pop()

1314

# trim ghosts

1315

try:

1316

parents = [p for p in cache[version][4] if p in cache]

1317

except KeyError:

1318

raise RevisionNotPresent(version, self._filename)

1319

# if not completed and not a ghost

1320

pending.update([p for p in parents if p not in graph])

1321

graph[version] = parents

1322

if not topo_sorted:

1323

return graph.keys()

1324

return topo_sort(graph.items())

1325

1326

def get_ancestry_with_ghosts(self, versions):

1327

"""See VersionedFile.get_ancestry_with_ghosts."""

1328

# get a graph of all the mentioned versions:

1329

self.check_versions_present(versions)

1330

cache = self._cache

1331

graph = {}

1332

pending = set(versions)

1333

while pending:

1334

version = pending.pop()

1335

try:

1336

parents = cache[version][4]

1337

except KeyError:

1338

# ghost, fake it

1339

graph[version] = []

1340

else:

1341

# if not completed

1342

pending.update([p for p in parents if p not in graph])

1343

graph[version] = parents

1344

return topo_sort(graph.items())

1345

1346

def iter_parents(self, version_ids):

1347

"""Iterate through the parents for many version ids.

1348

1349

:param version_ids: An iterable yielding version_ids.

1350

:return: An iterator that yields (version_id, parents). Requested

1351

version_ids not present in the versioned file are simply skipped.

1352

The order is undefined, allowing for different optimisations in

1353

the underlying implementation.

1354

"""

1355

for version_id in version_ids:

1356

try:

1357

yield version_id, tuple(self.get_parents(version_id))

1358

except KeyError:

1359

pass

1360

1361

def num_versions(self):

1362

return len(self._history)

1363

1364

__len__ = num_versions

1365

1366

def get_versions(self):

1367

"""Get all the versions in the file. not topologically sorted."""

1368

return self._history

1369

1370

def _version_list_to_index(self, versions):

1371

result_list = []

1372

cache = self._cache

1373

for version in versions:

1374

if version in cache:

1375

# -- inlined lookup() --

1376

result_list.append(str(cache[version][5]))

1377

# -- end lookup () --

1378

else:

1379

result_list.append('.' + version)

1380

return ' '.join(result_list)

1381

1382

def add_version(self, version_id, options, index_memo, parents):

1383

"""Add a version record to the index."""

1384

self.add_versions(((version_id, options, index_memo, parents),))

1385

1386

def add_versions(self, versions):

1387

"""Add multiple versions to the index.

1388

1389

:param versions: a list of tuples:

1390

(version_id, options, pos, size, parents).

1391

"""

1392

lines = []

1393

orig_history = self._history[:]

1394

orig_cache = self._cache.copy()

1395

1396

try:

1397

for version_id, options, (index, pos, size), parents in versions:

1398

line = "\n%s %s %s %s %s :" % (version_id,

1399

','.join(options),

1400

pos,

1401

size,

1402

self._version_list_to_index(parents))

1403

assert isinstance(line, str), \

1404

'content must be utf-8 encoded: %r' % (line,)

1405

lines.append(line)

1406

self._cache_version(version_id, options, pos, size, parents)

1407

if not self._need_to_create:

1408

self._transport.append_bytes(self._filename, ''.join(lines))

1409

else:

1410

sio = StringIO()

1411

sio.write(self.HEADER)

1412

sio.writelines(lines)

1413

sio.seek(0)

1414

self._transport.put_file_non_atomic(self._filename, sio,

1415

create_parent_dir=self._create_parent_dir,

1416

mode=self._file_mode,

1417

dir_mode=self._dir_mode)

1418

self._need_to_create = False

1419

except:

1420

# If any problems happen, restore the original values and re-raise

1421

self._history = orig_history

1422

self._cache = orig_cache

1423

raise

1424

1425

def has_version(self, version_id):

1426

"""True if the version is in the index."""

1427

return version_id in self._cache

1428

1429

def get_position(self, version_id):

1430

"""Return details needed to access the version.

1431

1432

.kndx indices do not support split-out data, so return None for the

1433

index field.

1434

1435

:return: a tuple (None, data position, size) to hand to the access

1436

logic to get the record.

1437

"""

1438

entry = self._cache[version_id]

1439

return None, entry[2], entry[3]

1440

1441

def get_method(self, version_id):

1442

"""Return compression method of specified version."""

1443

options = self._cache[version_id][1]

1444

if 'fulltext' in options:

1445

return 'fulltext'

1446

else:

1447

if 'line-delta' not in options:

1448

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1449

return 'line-delta'

1450

1451

def get_options(self, version_id):

1452

"""Return a string represention options.

1453

1454

e.g. foo,bar

1455

"""

1456

return self._cache[version_id][1]

1457

1458

def get_parents(self, version_id):

1459

"""Return parents of specified version ignoring ghosts."""

1460

return [parent for parent in self._cache[version_id][4]

1461

if parent in self._cache]

1462

1463

def get_parents_with_ghosts(self, version_id):

1464

"""Return parents of specified version with ghosts."""

1465

return self._cache[version_id][4]

1466

1467

def check_versions_present(self, version_ids):

1468

"""Check that all specified versions are present."""

1469

cache = self._cache

1470

for version_id in version_ids:

1471

if version_id not in cache:

1472

raise RevisionNotPresent(version_id, self._filename)

1473

1474

1475

class KnitGraphIndex(object):

1476

"""A knit index that builds on GraphIndex."""

1477

1478

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1479

"""Construct a KnitGraphIndex on a graph_index.

1480

1481

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1482

:param deltas: Allow delta-compressed records.

1483

:param add_callback: If not None, allow additions to the index and call

1484

this callback with a list of added GraphIndex nodes:

1485

[(node, value, node_refs), ...]

1486

:param parents: If True, record knits parents, if not do not record

1487

parents.

1488

"""

1489

self._graph_index = graph_index

1490

self._deltas = deltas

1491

self._add_callback = add_callback

1492

self._parents = parents

1493

if deltas and not parents:

1494

raise KnitCorrupt(self, "Cannot do delta compression without "

1495

"parent tracking.")

1496

1497

def _get_entries(self, keys, check_present=False):

1498

"""Get the entries for keys.

1499

1500

:param keys: An iterable of index keys, - 1-tuples.

1501

"""

1502

keys = set(keys)

1503

found_keys = set()

1504

if self._parents:

1505

for node in self._graph_index.iter_entries(keys):

1506

yield node

1507

found_keys.add(node[1])

1508

else:

1509

# adapt parentless index to the rest of the code.

1510

for node in self._graph_index.iter_entries(keys):

1511

yield node[0], node[1], node[2], ()

1512

found_keys.add(node[1])

1513

if check_present:

1514

missing_keys = keys.difference(found_keys)

1515

if missing_keys:

1516

raise RevisionNotPresent(missing_keys.pop(), self)

1517

1518

def _present_keys(self, version_ids):

1519

return set([

1520

node[1] for node in self._get_entries(version_ids)])

1521

1522

def _parentless_ancestry(self, versions):

1523

"""Honour the get_ancestry API for parentless knit indices."""

1524

wanted_keys = self._version_ids_to_keys(versions)

1525

present_keys = self._present_keys(wanted_keys)

1526

missing = set(wanted_keys).difference(present_keys)

1527

if missing:

1528

raise RevisionNotPresent(missing.pop(), self)

1529

return list(self._keys_to_version_ids(present_keys))

1530

1531

def get_ancestry(self, versions, topo_sorted=True):

1532

"""See VersionedFile.get_ancestry."""

1533

if not self._parents:

1534

return self._parentless_ancestry(versions)

1535

# XXX: This will do len(history) index calls - perhaps

1536

# it should be altered to be a index core feature?

1537

# get a graph of all the mentioned versions:

1538

graph = {}

1539

ghosts = set()

1540

versions = self._version_ids_to_keys(versions)

1541

pending = set(versions)

1542

while pending:

1543

# get all pending nodes

1544

this_iteration = pending

1545

new_nodes = self._get_entries(this_iteration)

1546

found = set()

1547

pending = set()

1548

for (index, key, value, node_refs) in new_nodes:

1549

# dont ask for ghosties - otherwise

1550

# we we can end up looping with pending

1551

# being entirely ghosted.

1552

graph[key] = [parent for parent in node_refs[0]

1553

if parent not in ghosts]

1554

# queue parents

1555

for parent in graph[key]:

1556

# dont examine known nodes again

1557

if parent in graph:

1558

continue

1559

pending.add(parent)

1560

found.add(key)

1561

ghosts.update(this_iteration.difference(found))

1562

if versions.difference(graph):

1563

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1564

if topo_sorted:

1565

result_keys = topo_sort(graph.items())

1566

else:

1567

result_keys = graph.iterkeys()

1568

return [key[0] for key in result_keys]

1569

1570

def get_ancestry_with_ghosts(self, versions):

1571

"""See VersionedFile.get_ancestry."""

1572

if not self._parents:

1573

return self._parentless_ancestry(versions)

1574

# XXX: This will do len(history) index calls - perhaps

1575

# it should be altered to be a index core feature?

1576

# get a graph of all the mentioned versions:

1577

graph = {}

1578

versions = self._version_ids_to_keys(versions)

1579

pending = set(versions)

1580

while pending:

1581

# get all pending nodes

1582

this_iteration = pending

1583

new_nodes = self._get_entries(this_iteration)

1584

pending = set()

1585

for (index, key, value, node_refs) in new_nodes:

1586

graph[key] = node_refs[0]

1587

# queue parents

1588

for parent in graph[key]:

1589

# dont examine known nodes again

1590

if parent in graph:

1591

continue

1592

pending.add(parent)

1593

missing_versions = this_iteration.difference(graph)

1594

missing_needed = versions.intersection(missing_versions)

1595

if missing_needed:

1596

raise RevisionNotPresent(missing_needed.pop(), self)

1597

for missing_version in missing_versions:

1598

# add a key, no parents

1599

graph[missing_version] = []

1600

pending.discard(missing_version) # don't look for it

1601

result_keys = topo_sort(graph.items())

1602

return [key[0] for key in result_keys]

1603

1604

def get_graph(self):

1605

"""Return a list of the node:parents lists from this knit index."""

1606

if not self._parents:

1607

return [(key, ()) for key in self.get_versions()]

1608

result = []

1609

for index, key, value, refs in self._graph_index.iter_all_entries():

1610

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1611

return result

1612

1613

def iter_parents(self, version_ids):

1614

"""Iterate through the parents for many version ids.

1615

1616

:param version_ids: An iterable yielding version_ids.

1617

:return: An iterator that yields (version_id, parents). Requested

1618

version_ids not present in the versioned file are simply skipped.

1619

The order is undefined, allowing for different optimisations in

1620

the underlying implementation.

1621

"""

1622

if self._parents:

1623

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1624

all_parents = set()

1625

present_parents = set()

1626

for node in all_nodes:

1627

all_parents.update(node[3][0])

1628

# any node we are querying must be present

1629

present_parents.add(node[1])

1630

unknown_parents = all_parents.difference(present_parents)

1631

present_parents.update(self._present_keys(unknown_parents))

1632

for node in all_nodes:

1633

parents = []

1634

for parent in node[3][0]:

1635

if parent in present_parents:

1636

parents.append(parent[0])

1637

yield node[1][0], tuple(parents)

1638

else:

1639

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1640

yield node[1][0], ()

1641

1642

def num_versions(self):

1643

return len(list(self._graph_index.iter_all_entries()))

1644

1645

__len__ = num_versions

1646

1647

def get_versions(self):

1648

"""Get all the versions in the file. not topologically sorted."""

1649

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1650

1651

def has_version(self, version_id):

1652

"""True if the version is in the index."""

1653

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1654

1655

def _keys_to_version_ids(self, keys):

1656

return tuple(key[0] for key in keys)

1657

1658

def get_position(self, version_id):

1659

"""Return details needed to access the version.

1660

1661

:return: a tuple (index, data position, size) to hand to the access

1662

logic to get the record.

1663

"""

1664

node = self._get_node(version_id)

1665

bits = node[2][1:].split(' ')

1666

return node[0], int(bits[0]), int(bits[1])

1667

1668

def get_method(self, version_id):

1669

"""Return compression method of specified version."""

1670

if not self._deltas:

1671

return 'fulltext'

1672

return self._parent_compression(self._get_node(version_id)[3][1])

1673

1674

def _parent_compression(self, reference_list):

1675

# use the second reference list to decide if this is delta'd or not.

1676

if len(reference_list):

1677

return 'line-delta'

1678

else:

1679

return 'fulltext'

1680

1681

def _get_node(self, version_id):

1682

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1683

1684

def get_options(self, version_id):

1685

"""Return a string represention options.

1686

1687

e.g. foo,bar

1688

"""

1689

node = self._get_node(version_id)

1690

if not self._deltas:

1691

options = ['fulltext']

1692

else:

1693

options = [self._parent_compression(node[3][1])]

1694

if node[2][0] == 'N':

1695

options.append('no-eol')

1696

return options

1697

1698

def get_parents(self, version_id):

1699

"""Return parents of specified version ignoring ghosts."""

1700

parents = list(self.iter_parents([version_id]))

1701

if not parents:

1702

# missing key

1703

raise errors.RevisionNotPresent(version_id, self)

1704

return parents[0][1]

1705

1706

def get_parents_with_ghosts(self, version_id):

1707

"""Return parents of specified version with ghosts."""

1708

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1709

check_present=True))

1710

if not self._parents:

1711

return ()

1712

return self._keys_to_version_ids(nodes[0][3][0])

1713

1714

def check_versions_present(self, version_ids):

1715

"""Check that all specified versions are present."""

1716

keys = self._version_ids_to_keys(version_ids)

1717

present = self._present_keys(keys)

1718

missing = keys.difference(present)

1719

if missing:

1720

raise RevisionNotPresent(missing.pop(), self)

1721

1722

def add_version(self, version_id, options, access_memo, parents):

1723

"""Add a version record to the index."""

1724

return self.add_versions(((version_id, options, access_memo, parents),))

1725

1726

def add_versions(self, versions):

1727

"""Add multiple versions to the index.

1728

1729

This function does not insert data into the Immutable GraphIndex

1730

backing the KnitGraphIndex, instead it prepares data for insertion by

1731

the caller and checks that it is safe to insert then calls

1732

self._add_callback with the prepared GraphIndex nodes.

1733

1734

:param versions: a list of tuples:

1735

(version_id, options, pos, size, parents).

1736

"""

1737

if not self._add_callback:

1738

raise errors.ReadOnlyError(self)

1739

# we hope there are no repositories with inconsistent parentage

1740

# anymore.

1741

# check for dups

1742

1743

keys = {}

1744

for (version_id, options, access_memo, parents) in versions:

1745

index, pos, size = access_memo

1746

key = (version_id, )

1747

parents = tuple((parent, ) for parent in parents)

1748

if 'no-eol' in options:

1749

value = 'N'

1750

else:

1751

value = ' '

1752

value += "%d %d" % (pos, size)

1753

if not self._deltas:

1754

if 'line-delta' in options:

1755

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1756

if self._parents:

1757

if self._deltas:

1758

if 'line-delta' in options:

1759

node_refs = (parents, (parents[0],))

1760

else:

1761

node_refs = (parents, ())

1762

else:

1763

node_refs = (parents, )

1764

else:

1765

if parents:

1766

raise KnitCorrupt(self, "attempt to add node with parents "

1767

"in parentless index.")

1768

node_refs = ()

1769

keys[key] = (value, node_refs)

1770

present_nodes = self._get_entries(keys)

1771

for (index, key, value, node_refs) in present_nodes:

1772

if (value, node_refs) != keys[key]:

1773

raise KnitCorrupt(self, "inconsistent details in add_versions"

1774

": %s %s" % ((value, node_refs), keys[key]))

1775

del keys[key]

1776

result = []

1777

if self._parents:

1778

for key, (value, node_refs) in keys.iteritems():

1779

result.append((key, value, node_refs))

1780

else:

1781

for key, (value, node_refs) in keys.iteritems():

1782

result.append((key, value))

1783

self._add_callback(result)

1784

1785

def _version_ids_to_keys(self, version_ids):

1786

return set((version_id, ) for version_id in version_ids)

1787

1788

1789

class _KnitAccess(object):

1790

"""Access to knit records in a .knit file."""

1791

1792

def __init__(self, transport, filename, _file_mode, _dir_mode,

1793

_need_to_create, _create_parent_dir):

1794

"""Create a _KnitAccess for accessing and inserting data.

1795

1796

:param transport: The transport the .knit is located on.

1797

:param filename: The filename of the .knit.

1798

"""

1799

self._transport = transport

1800

self._filename = filename

1801

self._file_mode = _file_mode

1802

self._dir_mode = _dir_mode

1803

self._need_to_create = _need_to_create

1804

self._create_parent_dir = _create_parent_dir

1805

1806

def add_raw_records(self, sizes, raw_data):

1807

"""Add raw knit bytes to a storage area.

1808

1809

The data is spooled to whereever the access method is storing data.

1810

1811

:param sizes: An iterable containing the size of each raw data segment.

1812

:param raw_data: A bytestring containing the data.

1813

:return: A list of memos to retrieve the record later. Each memo is a

1814

tuple - (index, pos, length), where the index field is always None

1815

for the .knit access method.

1816

"""

1817

assert type(raw_data) == str, \

1818

'data must be plain bytes was %s' % type(raw_data)

1819

if not self._need_to_create:

1820

base = self._transport.append_bytes(self._filename, raw_data)

1821

else:

1822

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1823

create_parent_dir=self._create_parent_dir,

1824

mode=self._file_mode,

1825

dir_mode=self._dir_mode)

1826

self._need_to_create = False

1827

base = 0

1828

result = []

1829

for size in sizes:

1830

result.append((None, base, size))

1831

base += size

1832

return result

1833

1834

def create(self):

1835

"""IFF this data access has its own storage area, initialise it.

1836

1837

:return: None.

1838

"""

1839

self._transport.put_bytes_non_atomic(self._filename, '',

1840

mode=self._file_mode)

1841

1842

def open_file(self):

1843

"""IFF this data access can be represented as a single file, open it.

1844

1845

For knits that are not mapped to a single file on disk this will

1846

always return None.

1847

1848

:return: None or a file handle.

1849

"""

1850

try:

1851

return self._transport.get(self._filename)

1852

except NoSuchFile:

1853

pass

1854

return None

1855

1856

def get_raw_records(self, memos_for_retrieval):

1857

"""Get the raw bytes for a records.

1858

1859

:param memos_for_retrieval: An iterable containing the (index, pos,

1860

length) memo for retrieving the bytes. The .knit method ignores

1861

the index as there is always only a single file.

1862

:return: An iterator over the bytes of the records.

1863

"""

1864

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1865

for pos, data in self._transport.readv(self._filename, read_vector):

1866

yield data

1867

1868

1869

class _PackAccess(object):

1870

"""Access to knit records via a collection of packs."""

1871

1872

def __init__(self, index_to_packs, writer=None):

1873

"""Create a _PackAccess object.

1874

1875

:param index_to_packs: A dict mapping index objects to the transport

1876

and file names for obtaining data.

1877

:param writer: A tuple (pack.ContainerWriter, write_index) which

1878

contains the pack to write, and the index that reads from it will

1879

be associated with.

1880

"""

1881

if writer:

1882

self.container_writer = writer[0]

1883

self.write_index = writer[1]

1884

else:

1885

self.container_writer = None

1886

self.write_index = None

1887

self.indices = index_to_packs

1888

1889

def add_raw_records(self, sizes, raw_data):

1890

"""Add raw knit bytes to a storage area.

1891

1892

The data is spooled to the container writer in one bytes-record per

1893

raw data item.

1894

1895

:param sizes: An iterable containing the size of each raw data segment.

1896

:param raw_data: A bytestring containing the data.

1897

:return: A list of memos to retrieve the record later. Each memo is a

1898

tuple - (index, pos, length), where the index field is the

1899

write_index object supplied to the PackAccess object.

1900

"""

1901

assert type(raw_data) == str, \

1902

'data must be plain bytes was %s' % type(raw_data)

1903

result = []

1904

offset = 0

1905

for size in sizes:

1906

p_offset, p_length = self.container_writer.add_bytes_record(

1907

raw_data[offset:offset+size], [])

1908

offset += size

1909

result.append((self.write_index, p_offset, p_length))

1910

return result

1911

1912

def create(self):

1913

"""Pack based knits do not get individually created."""

1914

1915

def get_raw_records(self, memos_for_retrieval):

1916

"""Get the raw bytes for a records.

1917

1918

:param memos_for_retrieval: An iterable containing the (index, pos,

1919

length) memo for retrieving the bytes. The Pack access method

1920

looks up the pack to use for a given record in its index_to_pack

1921

map.

1922

:return: An iterator over the bytes of the records.

1923

"""

1924

# first pass, group into same-index requests

1925

request_lists = []

1926

current_index = None

1927

for (index, offset, length) in memos_for_retrieval:

1928

if current_index == index:

1929

current_list.append((offset, length))

1930

else:

1931

if current_index is not None:

1932

request_lists.append((current_index, current_list))

1933

current_index = index

1934

current_list = [(offset, length)]

1935

# handle the last entry

1936

if current_index is not None:

1937

request_lists.append((current_index, current_list))

1938

for index, offsets in request_lists:

1939

transport, path = self.indices[index]

1940

reader = pack.make_readv_reader(transport, path, offsets)

1941

for names, read_func in reader.iter_records():

1942

yield read_func(None)

1943

1944

def open_file(self):

1945

"""Pack based knits have no single file."""

1946

return None

1947

1948

def set_writer(self, writer, index, (transport, packname)):

1949

"""Set a writer to use for adding data."""

1950

self.indices[index] = (transport, packname)

1951

self.container_writer = writer

1952

self.write_index = index

1953

1954

1955

class _KnitData(object):

1956

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1957

1958

The KnitData class provides the logic for parsing and using knit records,

1959

making use of an access method for the low level read and write operations.

1960

"""

1961

1962

def __init__(self, access):

1963

"""Create a KnitData object.

1964

1965

:param access: The access method to use. Access methods such as

1966

_KnitAccess manage the insertion of raw records and the subsequent

1967

retrieval of the same.

1968

"""

1969

self._access = access

1970

self._checked = False

1971

# TODO: jam 20060713 conceptually, this could spill to disk

1972

# if the cached size gets larger than a certain amount

1973

# but it complicates the model a bit, so for now just use

1974

# a simple dictionary

1975

self._cache = {}

1976

self._do_cache = False

1977

1978

def enable_cache(self):

1979

"""Enable caching of reads."""

1980

self._do_cache = True

1981

1982

def clear_cache(self):

1983

"""Clear the record cache."""

1984

self._do_cache = False

1985

self._cache = {}

1986

1987

def _open_file(self):

1988

return self._access.open_file()

1989

1990

def _record_to_data(self, version_id, digest, lines):

1991

"""Convert version_id, digest, lines into a raw data block.

1992

1993

:return: (len, a StringIO instance with the raw data ready to read.)

1994

"""

1995

sio = StringIO()

1996

data_file = GzipFile(None, mode='wb', fileobj=sio)

1997

1998

assert isinstance(version_id, str)

1999

data_file.writelines(chain(

2000

["version %s %d %s\n" % (version_id,

2001

len(lines),

2002

digest)],

2003

lines,

2004

["end %s\n" % version_id]))

2005

data_file.close()

2006

length= sio.tell()

2007

2008

sio.seek(0)

2009

return length, sio

2010

2011

def add_raw_records(self, sizes, raw_data):

2012

"""Append a prepared record to the data file.

2013

2014

:param sizes: An iterable containing the size of each raw data segment.

2015

:param raw_data: A bytestring containing the data.

2016

:return: a list of index data for the way the data was stored.

2017

See the access method add_raw_records documentation for more

2018

details.

2019

"""

2020

return self._access.add_raw_records(sizes, raw_data)

2021

2022

def add_record(self, version_id, digest, lines):

2023

"""Write new text record to disk.

2024

2025

Returns index data for retrieving it later, as per add_raw_records.

2026

"""

2027

size, sio = self._record_to_data(version_id, digest, lines)

2028

result = self.add_raw_records([size], sio.getvalue())

2029

if self._do_cache:

2030

self._cache[version_id] = sio.getvalue()

2031

return result[0]

2032

2033

def _parse_record_header(self, version_id, raw_data):

2034

"""Parse a record header for consistency.

2035

2036

:return: the header and the decompressor stream.

2037

as (stream, header_record)

2038

"""

2039

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2040

try:

2041

rec = self._check_header(version_id, df.readline())

2042

except Exception, e:

2043

raise KnitCorrupt(self._access,

2044

"While reading {%s} got %s(%s)"

2045

% (version_id, e.__class__.__name__, str(e)))

2046

return df, rec

2047

2048

def _check_header(self, version_id, line):

2049

rec = line.split()

2050

if len(rec) != 4:

2051

raise KnitCorrupt(self._access,

2052

'unexpected number of elements in record header')

2053

if rec[1] != version_id:

2054

raise KnitCorrupt(self._access,

2055

'unexpected version, wanted %r, got %r'

2056

% (version_id, rec[1]))

2057

return rec

2058

2059

def _parse_record(self, version_id, data):

2060

# profiling notes:

2061

# 4168 calls in 2880 217 internal

2062

# 4168 calls to _parse_record_header in 2121

2063

# 4168 calls to readlines in 330

2064

df = GzipFile(mode='rb', fileobj=StringIO(data))

2065

2066

try:

2067

record_contents = df.readlines()

2068

except Exception, e:

2069

raise KnitCorrupt(self._access,

2070

"While reading {%s} got %s(%s)"

2071

% (version_id, e.__class__.__name__, str(e)))

2072

header = record_contents.pop(0)

2073

rec = self._check_header(version_id, header)

2074

2075

last_line = record_contents.pop()

2076

if len(record_contents) != int(rec[2]):

2077

raise KnitCorrupt(self._access,

2078

'incorrect number of lines %s != %s'

2079

' for version {%s}'

2080

% (len(record_contents), int(rec[2]),

2081

version_id))

2082

if last_line != 'end %s\n' % rec[1]:

2083

raise KnitCorrupt(self._access,

2084

'unexpected version end line %r, wanted %r'

2085

% (last_line, version_id))

2086

df.close()

2087

return record_contents, rec[3]

2088

2089

def read_records_iter_raw(self, records):

2090

"""Read text records from data file and yield raw data.

2091

2092

This unpacks enough of the text record to validate the id is

2093

as expected but thats all.

2094

"""

2095

# setup an iterator of the external records:

2096

# uses readv so nice and fast we hope.

2097

if len(records):

2098

# grab the disk data needed.

2099

if self._cache:

2100

# Don't check _cache if it is empty

2101

needed_offsets = [index_memo for version_id, index_memo

2102

in records

2103

if version_id not in self._cache]

2104

else:

2105

needed_offsets = [index_memo for version_id, index_memo

2106

in records]

2107

2108

raw_records = self._access.get_raw_records(needed_offsets)

2109

2110

for version_id, index_memo in records:

2111

if version_id in self._cache:

2112

# This data has already been validated

2113

data = self._cache[version_id]

2114

else:

2115

data = raw_records.next()

2116

if self._do_cache:

2117

self._cache[version_id] = data

2118

2119

# validate the header

2120

df, rec = self._parse_record_header(version_id, data)

2121

df.close()

2122

yield version_id, data

2123

2124

def read_records_iter(self, records):

2125

"""Read text records from data file and yield result.

2126

2127

The result will be returned in whatever is the fastest to read.

2128

Not by the order requested. Also, multiple requests for the same

2129

record will only yield 1 response.

2130

:param records: A list of (version_id, pos, len) entries

2131

:return: Yields (version_id, contents, digest) in the order

2132

read, not the order requested

2133

"""

2134

if not records:

2135

return

2136

2137

if self._cache:

2138

# Skip records we have alread seen

2139

yielded_records = set()

2140

needed_records = set()

2141

for record in records:

2142

if record[0] in self._cache:

2143

if record[0] in yielded_records:

2144

continue

2145

yielded_records.add(record[0])

2146

data = self._cache[record[0]]

2147

content, digest = self._parse_record(record[0], data)

2148

yield (record[0], content, digest)

2149

else:

2150

needed_records.add(record)

2151

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2152

else:

2153

needed_records = sorted(set(records), key=operator.itemgetter(1))

2154

2155

if not needed_records:

2156

return

2157

2158

# The transport optimizes the fetching as well

2159

# (ie, reads continuous ranges.)

2160

raw_data = self._access.get_raw_records(

2161

[index_memo for version_id, index_memo in needed_records])

2162

2163

for (version_id, index_memo), data in \

2164

izip(iter(needed_records), raw_data):

2165

content, digest = self._parse_record(version_id, data)

2166

if self._do_cache:

2167

self._cache[version_id] = data

2168

yield version_id, content, digest

2169

2170

def read_records(self, records):

2171

"""Read records into a dictionary."""

2172

components = {}

2173

for record_id, content, digest in \

2174

self.read_records_iter(records):

2175

components[record_id] = (content, digest)

2176

return components

2177

2178

2179

class InterKnit(InterVersionedFile):

2180

"""Optimised code paths for knit to knit operations."""

2181

2182

_matching_file_from_factory = KnitVersionedFile

2183

_matching_file_to_factory = KnitVersionedFile

2184

2185

@staticmethod

2186

def is_compatible(source, target):

2187

"""Be compatible with knits. """

2188

try:

2189

return (isinstance(source, KnitVersionedFile) and

2190

isinstance(target, KnitVersionedFile))

2191

except AttributeError:

2192

return False

2193

2194

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2195

"""See InterVersionedFile.join."""

2196

assert isinstance(self.source, KnitVersionedFile)

2197

assert isinstance(self.target, KnitVersionedFile)

2198

2199

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2200

2201

if not version_ids:

2202

return 0

2203

2204

pb = ui.ui_factory.nested_progress_bar()

2205

try:

2206

version_ids = list(version_ids)

2207

if None in version_ids:

2208

version_ids.remove(None)

2209

2210

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2211

this_versions = set(self.target._index.get_versions())

2212

needed_versions = self.source_ancestry - this_versions

2213

cross_check_versions = self.source_ancestry.intersection(this_versions)

2214

mismatched_versions = set()

2215

for version in cross_check_versions:

2216

# scan to include needed parents.

2217

n1 = set(self.target.get_parents_with_ghosts(version))

2218

n2 = set(self.source.get_parents_with_ghosts(version))

2219

if n1 != n2:

2220

# FIXME TEST this check for cycles being introduced works

2221

# the logic is we have a cycle if in our graph we are an

2222

# ancestor of any of the n2 revisions.

2223

for parent in n2:

2224

if parent in n1:

2225

# safe

2226

continue

2227

else:

2228

parent_ancestors = self.source.get_ancestry(parent)

2229

if version in parent_ancestors:

2230

raise errors.GraphCycleError([parent, version])

2231

# ensure this parent will be available later.

2232

new_parents = n2.difference(n1)

2233

needed_versions.update(new_parents.difference(this_versions))

2234

mismatched_versions.add(version)

2235

2236

if not needed_versions and not mismatched_versions:

2237

return 0

2238

full_list = topo_sort(self.source.get_graph())

2239

2240

version_list = [i for i in full_list if (not self.target.has_version(i)

2241

and i in needed_versions)]

2242

2243

# plan the join:

2244

copy_queue = []

2245

copy_queue_records = []

2246

copy_set = set()

2247

for version_id in version_list:

2248

options = self.source._index.get_options(version_id)

2249

parents = self.source._index.get_parents_with_ghosts(version_id)

2250

# check that its will be a consistent copy:

2251

for parent in parents:

2252

# if source has the parent, we must :

2253

# * already have it or

2254

# * have it scheduled already

2255

# otherwise we don't care

2256

assert (self.target.has_version(parent) or

2257

parent in copy_set or

2258

not self.source.has_version(parent))

2259

index_memo = self.source._index.get_position(version_id)

2260

copy_queue_records.append((version_id, index_memo))

2261

copy_queue.append((version_id, options, parents))

2262

copy_set.add(version_id)

2263

2264

# data suck the join:

2265

count = 0

2266

total = len(version_list)

2267

raw_datum = []

2268

raw_records = []

2269

for (version_id, raw_data), \

2270

(version_id2, options, parents) in \

2271

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2272

copy_queue):

2273

assert version_id == version_id2, 'logic error, inconsistent results'

2274

count = count + 1

2275

pb.update("Joining knit", count, total)

2276

raw_records.append((version_id, options, parents, len(raw_data)))

2277

raw_datum.append(raw_data)

2278

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2279

2280

for version in mismatched_versions:

2281

# FIXME RBC 20060309 is this needed?

2282

n1 = set(self.target.get_parents_with_ghosts(version))

2283

n2 = set(self.source.get_parents_with_ghosts(version))

2284

# write a combined record to our history preserving the current

2285

# parents as first in the list

2286

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2287

self.target.fix_parents(version, new_parents)

2288

return count

2289

finally:

2290

pb.finished()

2291

2292

2293

InterVersionedFile.register_optimiser(InterKnit)

2294

2295

2296

class WeaveToKnit(InterVersionedFile):

2297

"""Optimised code paths for weave to knit operations."""

2298

2299

_matching_file_from_factory = bzrlib.weave.WeaveFile

2300

_matching_file_to_factory = KnitVersionedFile

2301

2302

@staticmethod

2303

def is_compatible(source, target):

2304

"""Be compatible with weaves to knits."""

2305

try:

2306

return (isinstance(source, bzrlib.weave.Weave) and

2307

isinstance(target, KnitVersionedFile))

2308

except AttributeError:

2309

return False

2310

2311

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2312

"""See InterVersionedFile.join."""

2313

assert isinstance(self.source, bzrlib.weave.Weave)

2314

assert isinstance(self.target, KnitVersionedFile)

2315

2316

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2317

2318

if not version_ids:

2319

return 0

2320

2321

pb = ui.ui_factory.nested_progress_bar()

2322

try:

2323

version_ids = list(version_ids)

2324

2325

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2326

this_versions = set(self.target._index.get_versions())

2327

needed_versions = self.source_ancestry - this_versions

2328

cross_check_versions = self.source_ancestry.intersection(this_versions)

2329

mismatched_versions = set()

2330

for version in cross_check_versions:

2331

# scan to include needed parents.

2332

n1 = set(self.target.get_parents_with_ghosts(version))

2333

n2 = set(self.source.get_parents(version))

2334

# if all of n2's parents are in n1, then its fine.

2335

if n2.difference(n1):

2336

# FIXME TEST this check for cycles being introduced works

2337

# the logic is we have a cycle if in our graph we are an

2338

# ancestor of any of the n2 revisions.

2339

for parent in n2:

2340

if parent in n1:

2341

# safe

2342

continue

2343

else:

2344

parent_ancestors = self.source.get_ancestry(parent)

2345

if version in parent_ancestors:

2346

raise errors.GraphCycleError([parent, version])

2347

# ensure this parent will be available later.

2348

new_parents = n2.difference(n1)

2349

needed_versions.update(new_parents.difference(this_versions))

2350

mismatched_versions.add(version)

2351

2352

if not needed_versions and not mismatched_versions:

2353

return 0

2354

full_list = topo_sort(self.source.get_graph())

2355

2356

version_list = [i for i in full_list if (not self.target.has_version(i)

2357

and i in needed_versions)]

2358

2359

# do the join:

2360

count = 0

2361

total = len(version_list)

2362

for version_id in version_list:

2363

pb.update("Converting to knit", count, total)

2364

parents = self.source.get_parents(version_id)

2365

# check that its will be a consistent copy:

2366

for parent in parents:

2367

# if source has the parent, we must already have it

2368

assert (self.target.has_version(parent))

2369

self.target.add_lines(

2370

version_id, parents, self.source.get_lines(version_id))

2371

count = count + 1

2372

2373

for version in mismatched_versions:

2374

# FIXME RBC 20060309 is this needed?

2375

n1 = set(self.target.get_parents_with_ghosts(version))

2376

n2 = set(self.source.get_parents(version))

2377

# write a combined record to our history preserving the current

2378

# parents as first in the list

2379

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2380

self.target.fix_parents(version, new_parents)

2381

return count

2382

finally:

2383

pb.finished()

2384

2385

2386

InterVersionedFile.register_optimiser(WeaveToKnit)

2387

2388

2389

class KnitSequenceMatcher(difflib.SequenceMatcher):

2390

"""Knit tuned sequence matcher.

2391

2392

This is based on profiling of difflib which indicated some improvements

2393

for our usage pattern.

2394

"""

2395

2396

def find_longest_match(self, alo, ahi, blo, bhi):

2397

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2398

2399

If isjunk is not defined:

2400

2401

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2402

alo <= i <= i+k <= ahi

2403

blo <= j <= j+k <= bhi

2404

and for all (i',j',k') meeting those conditions,

2405

k >= k'

2406

i <= i'

2407

and if i == i', j <= j'

2408

2409

In other words, of all maximal matching blocks, return one that

2410

starts earliest in a, and of all those maximal matching blocks that

2411

start earliest in a, return the one that starts earliest in b.

2412

2413

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2414

>>> s.find_longest_match(0, 5, 0, 9)

2415

(0, 4, 5)

2416

2417

If isjunk is defined, first the longest matching block is

2418

determined as above, but with the additional restriction that no

2419

junk element appears in the block. Then that block is extended as

2420

far as possible by matching (only) junk elements on both sides. So

2421

the resulting block never matches on junk except as identical junk

2422

happens to be adjacent to an "interesting" match.

2423

2424

Here's the same example as before, but considering blanks to be

2425

junk. That prevents " abcd" from matching the " abcd" at the tail

2426

end of the second sequence directly. Instead only the "abcd" can

2427

match, and matches the leftmost "abcd" in the second sequence:

2428

2429

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2430

>>> s.find_longest_match(0, 5, 0, 9)

2431

(1, 0, 4)

2432

2433

If no blocks match, return (alo, blo, 0).

2434

2435

>>> s = SequenceMatcher(None, "ab", "c")

2436

>>> s.find_longest_match(0, 2, 0, 1)

2437

(0, 0, 0)

2438

"""

2439

2440

# CAUTION: stripping common prefix or suffix would be incorrect.

2441

# E.g.,

2442

# ab

2443

# acab

2444

# Longest matching block is "ab", but if common prefix is

2445

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2446

# strip, so ends up claiming that ab is changed to acab by

2447

# inserting "ca" in the middle. That's minimal but unintuitive:

2448

# "it's obvious" that someone inserted "ac" at the front.

2449

# Windiff ends up at the same place as diff, but by pairing up

2450

# the unique 'b's and then matching the first two 'a's.

2451

2452

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2453

besti, bestj, bestsize = alo, blo, 0

2454

# find longest junk-free match

2455

# during an iteration of the loop, j2len[j] = length of longest

2456

# junk-free match ending with a[i-1] and b[j]

2457

j2len = {}

2458

# nothing = []

2459

b2jget = b2j.get

2460

for i in xrange(alo, ahi):

2461

# look at all instances of a[i] in b; note that because

2462

# b2j has no junk keys, the loop is skipped if a[i] is junk

2463

j2lenget = j2len.get

2464

newj2len = {}

2465

2466

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2467

# following improvement

2468

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2469

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2470

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2471

# to

2472

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2473

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2474

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2475

2476

try:

2477

js = b2j[a[i]]

2478

except KeyError:

2479

pass

2480

else:

2481

for j in js:

2482

# a[i] matches b[j]

2483

if j >= blo:

2484

if j >= bhi:

2485

break

2486

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2487

if k > bestsize:

2488

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2489

j2len = newj2len

2490

2491

# Extend the best by non-junk elements on each end. In particular,

2492

# "popular" non-junk elements aren't in b2j, which greatly speeds

2493

# the inner loop above, but also means "the best" match so far

2494

# doesn't contain any junk *or* popular non-junk elements.

2495

while besti > alo and bestj > blo and \

2496

not isbjunk(b[bestj-1]) and \

2497

a[besti-1] == b[bestj-1]:

2498

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2499

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2500

not isbjunk(b[bestj+bestsize]) and \

2501

a[besti+bestsize] == b[bestj+bestsize]:

2502

bestsize += 1

2503

2504

# Now that we have a wholly interesting match (albeit possibly

2505

# empty!), we may as well suck up the matching junk on each

2506

# side of it too. Can't think of a good reason not to, and it

2507

# saves post-processing the (possibly considerable) expense of

2508

# figuring out what to do with it. In the case of an empty

2509

# interesting match, this is clearly the right thing to do,

2510

# because no other kind of match is possible in the regions.

2511

while besti > alo and bestj > blo and \

2512

isbjunk(b[bestj-1]) and \

2513

a[besti-1] == b[bestj-1]:

2514

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2515

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2516

isbjunk(b[bestj+bestsize]) and \

2517

a[besti+bestsize] == b[bestj+bestsize]:

2518

bestsize = bestsize + 1

2519

2520

return besti, bestj, bestsize

2521

2522

2523

try:

2524

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2525

except ImportError:

2526

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »