/brz/remove-bazaar : revision 2592.3.46

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2007-07-19 07:45:10 UTC
mfrom: (2625.8.1 knits)
mto: (2592.5.3 pack-repository)
mto: This revision was merged to the branch mainline in revision 2933.
Revision ID: robertc@robertcollins.net-20070719074510-6atx30q1x6s75l1e

Merge knits branch.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/file_names.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_file_names.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/bug_trackers.txt

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/developers

doc/developers/HACKING

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/scratch.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/shared_repository_layouts.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

dir.py

dulwich

dulwich/.bzrignore

dulwich/COPYING

dulwich/Makefile

dulwich/README

dulwich/bin

dulwich/bin/dul-daemon

dulwich/bin/dul-receive-pack

dulwich/bin/dul-upload-pack

dulwich/bin/dulwich

dulwich/docs

dulwich/docs/protocol.txt

dulwich/dulwich

dulwich/dulwich/__init__.py

dulwich/dulwich/client.py

dulwich/dulwich/commit.py

dulwich/dulwich/errors.py

dulwich/dulwich/objects.py

dulwich/dulwich/pack.py

dulwich/dulwich/protocol.py

dulwich/dulwich/repo.py

dulwich/dulwich/server.py

dulwich/dulwich/tests

dulwich/dulwich/tests/__init__.py

dulwich/dulwich/tests/data

dulwich/dulwich/tests/data/blobs

dulwich/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/commits

dulwich/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/packs

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/dulwich/tests/data/repos

dulwich/dulwich/tests/data/repos/a

dulwich/dulwich/tests/data/repos/a/.git

dulwich/dulwich/tests/data/repos/a/.git/HEAD

dulwich/dulwich/tests/data/repos/a/.git/index

dulwich/dulwich/tests/data/repos/a/.git/objects

dulwich/dulwich/tests/data/repos/a/.git/objects/2a

dulwich/dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/dulwich/tests/data/repos/a/.git/objects/4e

dulwich/dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/dulwich/tests/data/repos/a/.git/objects/4f

dulwich/dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/dulwich/tests/data/repos/a/.git/objects/7d

dulwich/dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/dulwich/tests/data/repos/a/.git/objects/a2

dulwich/dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/dulwich/tests/data/repos/a/.git/objects/a9

dulwich/dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/dulwich/tests/data/repos/a/.git/objects/ff

dulwich/dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/dulwich/tests/data/repos/a/.git/objects/info

dulwich/dulwich/tests/data/repos/a/.git/objects/pack

dulwich/dulwich/tests/data/repos/a/.git/refs

dulwich/dulwich/tests/data/repos/a/.git/refs/heads

dulwich/dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/a/.git/refs/tags

dulwich/dulwich/tests/data/repos/a/a

dulwich/dulwich/tests/data/repos/a/b

dulwich/dulwich/tests/data/repos/a/c

dulwich/dulwich/tests/data/repos/ooo_merge

dulwich/dulwich/tests/data/repos/ooo_merge/.git

dulwich/dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/ooo_merge/a

dulwich/dulwich/tests/data/repos/ooo_merge/b

dulwich/dulwich/tests/data/repos/ooo_merge/c

dulwich/dulwich/tests/data/repos/simple_merge

dulwich/dulwich/tests/data/repos/simple_merge/.git

dulwich/dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/simple_merge/.git/index

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/simple_merge/a

dulwich/dulwich/tests/data/repos/simple_merge/b

dulwich/dulwich/tests/data/repos/simple_merge/d

dulwich/dulwich/tests/data/repos/simple_merge/e

dulwich/dulwich/tests/data/trees

dulwich/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/test_objects.py

dulwich/dulwich/tests/test_pack.py

dulwich/dulwich/tests/test_repository.py

dulwich/setup.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

remote.py

repository.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_ids.py

tests/test_repository.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

103

104

105

# TODO: Split out code specific to this format into an associated object.

106

107

# TODO: Can we put in some kind of value to check that the index and data

108

# files belong together?

109

110

# TODO: accommodate binaries, perhaps by storing a byte count

111

112

# TODO: function to check whole file

113

114

# TODO: atomically append data, then measure backwards from the cursor

115

# position after writing to work out where it was located. we may need to

116

# bypass python file buffering.

117

118

DATA_SUFFIX = '.knit'

119

INDEX_SUFFIX = '.kndx'

120

121

122

class KnitContent(object):

123

"""Content of a knit version to which deltas can be applied."""

124

125

def __init__(self, lines):

126

self._lines = lines

127

128

def annotate_iter(self):

129

"""Yield tuples of (origin, text) for each content line."""

130

return iter(self._lines)

131

132

def annotate(self):

133

"""Return a list of (origin, text) tuples."""

134

return list(self.annotate_iter())

135

136

def line_delta_iter(self, new_lines):

137

"""Generate line-based delta from this content to new_lines."""

138

new_texts = new_lines.text()

139

old_texts = self.text()

140

s = KnitSequenceMatcher(None, old_texts, new_texts)

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

142

if tag == 'equal':

143

continue

144

# ofrom, oto, length, data

145

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

146

147

def line_delta(self, new_lines):

148

return list(self.line_delta_iter(new_lines))

149

150

def text(self):

151

return [text for origin, text in self._lines]

152

153

def copy(self):

154

return KnitContent(self._lines[:])

155

156

157

class _KnitFactory(object):

158

"""Base factory for creating content objects."""

159

160

def make(self, lines, version_id):

161

num_lines = len(lines)

162

return KnitContent(zip([version_id] * num_lines, lines))

163

164

165

class KnitAnnotateFactory(_KnitFactory):

166

"""Factory for creating annotated Content objects."""

167

168

annotated = True

169

170

def parse_fulltext(self, content, version_id):

171

"""Convert fulltext to internal representation

172

173

fulltext content is of the format

174

revid(utf8) plaintext\n

175

internal representation is of the format:

176

(revid, plaintext)

177

"""

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

183

return KnitContent(lines)

184

185

def parse_line_delta_iter(self, lines):

186

return iter(self.parse_line_delta(lines))

187

188

def parse_line_delta(self, lines, version_id):

189

"""Convert a line based delta into internal representation.

190

191

line delta is in the form of:

192

intstart intend intcount

193

1..count lines:

194

revid(utf8) newline\n

195

internal representation is

196

(start, end, count, [1..count tuples (revid, newline)])

197

"""

198

result = []

199

lines = iter(lines)

200

next = lines.next

201

202

cache = {}

203

def cache_and_return(line):

204

origin, text = line.split(' ', 1)

205

return cache.setdefault(origin, origin), text

206

207

# walk through the lines parsing.

208

for header in lines:

209

start, end, count = [int(n) for n in header.split(',')]

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

211

result.append((start, end, count, contents))

212

return result

213

214

def get_fulltext_content(self, lines):

215

"""Extract just the content lines from a fulltext."""

216

return (line.split(' ', 1)[1] for line in lines)

217

218

def get_linedelta_content(self, lines):

219

"""Extract just the content from a line delta.

220

221

This doesn't return all of the extra information stored in a delta.

222

Only the actual content lines.

223

"""

224

lines = iter(lines)

225

next = lines.next

226

for header in lines:

227

header = header.split(',')

228

count = int(header[2])

229

for i in xrange(count):

230

origin, text = next().split(' ', 1)

231

yield text

232

233

def lower_fulltext(self, content):

234

"""convert a fulltext content record into a serializable form.

235

236

see parse_fulltext which this inverts.

237

"""

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

241

242

def lower_line_delta(self, delta):

243

"""convert a delta into a serializable form.

244

245

See parse_line_delta which this inverts.

246

"""

247

# TODO: jam 20070209 We only do the caching thing to make sure that

248

# the origin is a valid utf-8 line, eventually we could remove it

249

out = []

250

for start, end, c, lines in delta:

251

out.append('%d,%d,%d\n' % (start, end, c))

252

out.extend(origin + ' ' + text

253

for origin, text in lines)

254

return out

255

256

257

class KnitPlainFactory(_KnitFactory):

258

"""Factory for creating plain Content objects."""

259

260

annotated = False

261

262

def parse_fulltext(self, content, version_id):

263

"""This parses an unannotated fulltext.

264

265

Note that this is not a noop - the internal representation

266

has (versionid, line) - its just a constant versionid.

267

"""

268

return self.make(content, version_id)

269

270

def parse_line_delta_iter(self, lines, version_id):

271

cur = 0

272

num_lines = len(lines)

273

while cur < num_lines:

274

header = lines[cur]

275

cur += 1

276

start, end, c = [int(n) for n in header.split(',')]

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

278

cur += c

279

280

def parse_line_delta(self, lines, version_id):

281

return list(self.parse_line_delta_iter(lines, version_id))

282

283

def get_fulltext_content(self, lines):

284

"""Extract just the content lines from a fulltext."""

285

return iter(lines)

286

287

def get_linedelta_content(self, lines):

288

"""Extract just the content from a line delta.

289

290

This doesn't return all of the extra information stored in a delta.

291

Only the actual content lines.

292

"""

293

lines = iter(lines)

294

next = lines.next

295

for header in lines:

296

header = header.split(',')

297

count = int(header[2])

298

for i in xrange(count):

299

yield next()

300

301

def lower_fulltext(self, content):

302

return content.text()

303

304

def lower_line_delta(self, delta):

305

out = []

306

for start, end, c, lines in delta:

307

out.append('%d,%d,%d\n' % (start, end, c))

308

out.extend([text for origin, text in lines])

309

return out

310

311

312

def make_empty_knit(transport, relpath):

313

"""Construct a empty knit at the specified location."""

314

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

315

k._data._open_file()

316

317

318

class KnitVersionedFile(VersionedFile):

319

"""Weave-like structure with faster random access.

320

321

A knit stores a number of texts and a summary of the relationships

322

between them. Texts are identified by a string version-id. Texts

323

are normally stored and retrieved as a series of lines, but can

324

also be passed as single strings.

325

326

Lines are stored with the trailing newline (if any) included, to

327

avoid special cases for files with no final newline. Lines are

328

composed of 8-bit characters, not unicode. The combination of

329

these approaches should mean any 'binary' file can be safely

330

stored and retrieved.

331

"""

332

333

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

334

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

335

create=False, create_parent_dir=False, delay_create=False,

336

dir_mode=None, index=None):

337

"""Construct a knit at location specified by relpath.

338

339

:param create: If not True, only open an existing knit.

340

:param create_parent_dir: If True, create the parent directory if

341

creating the file fails. (This is used for stores with

342

hash-prefixes that may not exist yet)

343

:param delay_create: The calling code is aware that the knit won't

344

actually be created until the first data is stored.

345

:param index: An index to use for the knit.

346

"""

347

if deprecated_passed(basis_knit):

348

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

349

" deprecated as of bzr 0.9.",

350

DeprecationWarning, stacklevel=2)

351

if access_mode is None:

352

access_mode = 'w'

353

super(KnitVersionedFile, self).__init__(access_mode)

354

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

355

self.transport = transport

356

self.filename = relpath

357

self.factory = factory or KnitAnnotateFactory()

358

self.writable = (access_mode == 'w')

359

self.delta = delta

360

361

self._max_delta_chain = 200

362

363

if index is None:

364

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

365

access_mode, create=create, file_mode=file_mode,

366

create_parent_dir=create_parent_dir, delay_create=delay_create,

367

dir_mode=dir_mode)

368

else:

369

self._index = index

370

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

371

access_mode, create=create and not len(self), file_mode=file_mode,

372

create_parent_dir=create_parent_dir, delay_create=delay_create,

373

dir_mode=dir_mode)

374

375

def __repr__(self):

376

return '%s(%s)' % (self.__class__.__name__,

377

self.transport.abspath(self.filename))

378

379

def _check_should_delta(self, first_parents):

380

"""Iterate back through the parent listing, looking for a fulltext.

381

382

This is used when we want to decide whether to add a delta or a new

383

fulltext. It searches for _max_delta_chain parents. When it finds a

384

fulltext parent, it sees if the total size of the deltas leading up to

385

it is large enough to indicate that we want a new full text anyway.

386

387

Return True if we should create a new delta, False if we should use a

388

full text.

389

"""

390

delta_size = 0

391

fulltext_size = None

392

delta_parents = first_parents

393

for count in xrange(self._max_delta_chain):

394

parent = delta_parents[0]

395

method = self._index.get_method(parent)

396

pos, size = self._index.get_position(parent)

397

if method == 'fulltext':

398

fulltext_size = size

399

break

400

delta_size += size

401

delta_parents = self._index.get_parents(parent)

402

else:

403

# We couldn't find a fulltext, so we must create a new one

404

return False

405

406

return fulltext_size > delta_size

407

408

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

409

"""See VersionedFile._add_delta()."""

410

self._check_add(version_id, []) # should we check the lines ?

411

self._check_versions_present(parents)

412

present_parents = []

413

ghosts = []

414

parent_texts = {}

415

for parent in parents:

416

if not self.has_version(parent):

417

ghosts.append(parent)

418

else:

419

present_parents.append(parent)

420

421

if delta_parent is None:

422

# reconstitute as full text.

423

assert len(delta) == 1 or len(delta) == 0

424

if len(delta):

425

assert delta[0][0] == 0

426

assert delta[0][1] == 0, delta[0][1]

427

return super(KnitVersionedFile, self)._add_delta(version_id,

428

parents,

429

delta_parent,

430

sha1,

431

noeol,

432

delta)

433

434

digest = sha1

435

436

options = []

437

if noeol:

438

options.append('no-eol')

439

440

if delta_parent is not None:

441

# determine the current delta chain length.

442

# To speed the extract of texts the delta chain is limited

443

# to a fixed number of deltas. This should minimize both

444

# I/O and the time spend applying deltas.

445

# The window was changed to a maximum of 200 deltas, but also added

446

# was a check that the total compressed size of the deltas is

447

# smaller than the compressed size of the fulltext.

448

if not self._check_should_delta([delta_parent]):

449

# We don't want a delta here, just do a normal insertion.

450

return super(KnitVersionedFile, self)._add_delta(version_id,

451

parents,

452

delta_parent,

453

sha1,

454

noeol,

455

delta)

456

457

options.append('line-delta')

458

store_lines = self.factory.lower_line_delta(delta)

459

460

where, size = self._data.add_record(version_id, digest, store_lines)

461

self._index.add_version(version_id, options, where, size, parents)

462

463

def _add_raw_records(self, records, data):

464

"""Add all the records 'records' with data pre-joined in 'data'.

465

466

:param records: A list of tuples(version_id, options, parents, size).

467

:param data: The data for the records. When it is written, the records

468

are adjusted to have pos pointing into data by the sum of

469

the preceding records sizes.

470

"""

471

# write all the data

472

pos = self._data.add_raw_record(data)

473

offset = 0

474

index_entries = []

475

for (version_id, options, parents, size) in records:

476

index_entries.append((version_id, options, pos+offset,

477

size, parents))

478

if self._data._do_cache:

479

self._data._cache[version_id] = data[offset:offset+size]

480

offset += size

481

self._index.add_versions(index_entries)

482

483

def enable_cache(self):

484

"""Start caching data for this knit"""

485

self._data.enable_cache()

486

487

def clear_cache(self):

488

"""Clear the data cache only."""

489

self._data.clear_cache()

490

491

def copy_to(self, name, transport):

492

"""See VersionedFile.copy_to()."""

493

# copy the current index to a temp index to avoid racing with local

494

# writes

495

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

496

self.transport.get(self._index._filename))

497

# copy the data file

498

f = self._data._open_file()

499

try:

500

transport.put_file(name + DATA_SUFFIX, f)

501

finally:

502

f.close()

503

# move the copied index into place

504

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

505

506

def create_empty(self, name, transport, mode=None):

507

return KnitVersionedFile(name, transport, factory=self.factory,

508

delta=self.delta, create=True)

509

510

def _fix_parents(self, version_id, new_parents):

511

"""Fix the parents list for version.

512

513

This is done by appending a new version to the index

514

with identical data except for the parents list.

515

the parents list must be a superset of the current

516

list.

517

"""

518

current_values = self._index._cache[version_id]

519

assert set(current_values[4]).difference(set(new_parents)) == set()

520

self._index.add_version(version_id,

521

current_values[1],

522

current_values[2],

523

current_values[3],

524

new_parents)

525

526

def get_delta(self, version_id):

527

"""Get a delta for constructing version from some other version."""

528

version_id = osutils.safe_revision_id(version_id)

529

self.check_not_reserved_id(version_id)

530

if not self.has_version(version_id):

531

raise RevisionNotPresent(version_id, self.filename)

532

533

parents = self.get_parents(version_id)

534

if len(parents):

535

parent = parents[0]

536

else:

537

parent = None

538

data_pos, data_size = self._index.get_position(version_id)

539

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

540

noeol = 'no-eol' in self._index.get_options(version_id)

541

if 'fulltext' == self._index.get_method(version_id):

542

new_content = self.factory.parse_fulltext(data, version_id)

543

if parent is not None:

544

reference_content = self._get_content(parent)

545

old_texts = reference_content.text()

546

else:

547

old_texts = []

548

new_texts = new_content.text()

549

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

550

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

551

else:

552

delta = self.factory.parse_line_delta(data, version_id)

553

return parent, sha1, noeol, delta

554

555

def get_graph_with_ghosts(self):

556

"""See VersionedFile.get_graph_with_ghosts()."""

557

graph_items = self._index.get_graph()

558

return dict(graph_items)

559

560

def get_sha1(self, version_id):

561

"""See VersionedFile.get_sha1()."""

562

version_id = osutils.safe_revision_id(version_id)

563

record_map = self._get_record_map([version_id])

564

method, content, digest, next = record_map[version_id]

565

return digest

566

567

@staticmethod

568

def get_suffixes():

569

"""See VersionedFile.get_suffixes()."""

570

return [DATA_SUFFIX, INDEX_SUFFIX]

571

572

def has_ghost(self, version_id):

573

"""True if there is a ghost reference in the file to version_id."""

574

version_id = osutils.safe_revision_id(version_id)

575

# maybe we have it

576

if self.has_version(version_id):

577

return False

578

# optimisable if needed by memoising the _ghosts set.

579

items = self._index.get_graph()

580

for node, parents in items:

581

for parent in parents:

582

if parent not in self._index._cache:

583

if parent == version_id:

584

return True

585

return False

586

587

def versions(self):

588

"""See VersionedFile.versions."""

589

return self._index.get_versions()

590

591

def has_version(self, version_id):

592

"""See VersionedFile.has_version."""

593

version_id = osutils.safe_revision_id(version_id)

594

return self._index.has_version(version_id)

595

596

__contains__ = has_version

597

598

def _merge_annotations(self, content, parents, parent_texts={},

599

delta=None, annotated=None):

600

"""Merge annotations for content. This is done by comparing

601

the annotations based on changed to the text.

602

"""

603

if annotated:

604

delta_seq = None

605

for parent_id in parents:

606

merge_content = self._get_content(parent_id, parent_texts)

607

seq = patiencediff.PatienceSequenceMatcher(

608

None, merge_content.text(), content.text())

609

if delta_seq is None:

610

# setup a delta seq to reuse.

611

delta_seq = seq

612

for i, j, n in seq.get_matching_blocks():

613

if n == 0:

614

continue

615

# this appears to copy (origin, text) pairs across to the new

616

# content for any line that matches the last-checked parent.

617

# FIXME: save the sequence control data for delta compression

618

# against the most relevant parent rather than rediffing.

619

content._lines[j:j+n] = merge_content._lines[i:i+n]

620

if delta:

621

if not annotated:

622

reference_content = self._get_content(parents[0], parent_texts)

623

new_texts = content.text()

624

old_texts = reference_content.text()

625

delta_seq = patiencediff.PatienceSequenceMatcher(

626

None, old_texts, new_texts)

627

return self._make_line_delta(delta_seq, content)

628

629

def _make_line_delta(self, delta_seq, new_content):

630

"""Generate a line delta from delta_seq and new_content."""

631

diff_hunks = []

632

for op in delta_seq.get_opcodes():

633

if op[0] == 'equal':

634

continue

635

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

636

return diff_hunks

637

638

def _get_components_positions(self, version_ids):

639

"""Produce a map of position data for the components of versions.

640

641

This data is intended to be used for retrieving the knit records.

642

643

A dict of version_id to (method, data_pos, data_size, next) is

644

returned.

645

method is the way referenced data should be applied.

646

data_pos is the position of the data in the knit.

647

data_size is the size of the data in the knit.

648

next is the build-parent of the version, or None for fulltexts.

649

"""

650

component_data = {}

651

for version_id in version_ids:

652

cursor = version_id

653

654

while cursor is not None and cursor not in component_data:

655

method = self._index.get_method(cursor)

656

if method == 'fulltext':

657

next = None

658

else:

659

next = self.get_parents(cursor)[0]

660

data_pos, data_size = self._index.get_position(cursor)

661

component_data[cursor] = (method, data_pos, data_size, next)

662

cursor = next

663

return component_data

664

665

def _get_content(self, version_id, parent_texts={}):

666

"""Returns a content object that makes up the specified

667

version."""

668

if not self.has_version(version_id):

669

raise RevisionNotPresent(version_id, self.filename)

670

671

cached_version = parent_texts.get(version_id, None)

672

if cached_version is not None:

673

return cached_version

674

675

text_map, contents_map = self._get_content_maps([version_id])

676

return contents_map[version_id]

677

678

def _check_versions_present(self, version_ids):

679

"""Check that all specified versions are present."""

680

self._index.check_versions_present(version_ids)

681

682

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

683

"""See VersionedFile.add_lines_with_ghosts()."""

684

self._check_add(version_id, lines)

685

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

686

687

def _add_lines(self, version_id, parents, lines, parent_texts):

688

"""See VersionedFile.add_lines."""

689

self._check_add(version_id, lines)

690

self._check_versions_present(parents)

691

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

692

693

def _check_add(self, version_id, lines):

694

"""check that version_id and lines are safe to add."""

695

assert self.writable, "knit is not opened for write"

696

### FIXME escape. RBC 20060228

697

if contains_whitespace(version_id):

698

raise InvalidRevisionId(version_id, self.filename)

699

self.check_not_reserved_id(version_id)

700

if self.has_version(version_id):

701

raise RevisionAlreadyPresent(version_id, self.filename)

702

self._check_lines_not_unicode(lines)

703

self._check_lines_are_lines(lines)

704

705

def _add(self, version_id, lines, parents, delta, parent_texts):

706

"""Add a set of lines on top of version specified by parents.

707

708

If delta is true, compress the text as a line-delta against

709

the first parent.

710

711

Any versions not present will be converted into ghosts.

712

"""

713

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

714

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

715

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

716

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

717

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

718

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

719

# +1383 0 8.0370 8.0370 +<len>

720

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

721

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

722

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

723

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

724

725

present_parents = []

726

ghosts = []

727

if parent_texts is None:

728

parent_texts = {}

729

for parent in parents:

730

if not self.has_version(parent):

731

ghosts.append(parent)

732

else:

733

present_parents.append(parent)

734

735

if delta and not len(present_parents):

736

delta = False

737

738

digest = sha_strings(lines)

739

options = []

740

if lines:

741

if lines[-1][-1] != '\n':

742

options.append('no-eol')

743

lines[-1] = lines[-1] + '\n'

744

745

if len(present_parents) and delta:

746

# To speed the extract of texts the delta chain is limited

747

# to a fixed number of deltas. This should minimize both

748

# I/O and the time spend applying deltas.

749

delta = self._check_should_delta(present_parents)

750

751

assert isinstance(version_id, str)

752

lines = self.factory.make(lines, version_id)

753

if delta or (self.factory.annotated and len(present_parents) > 0):

754

# Merge annotations from parent texts if so is needed.

755

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

756

delta, self.factory.annotated)

757

758

if delta:

759

options.append('line-delta')

760

store_lines = self.factory.lower_line_delta(delta_hunks)

761

else:

762

options.append('fulltext')

763

store_lines = self.factory.lower_fulltext(lines)

764

765

where, size = self._data.add_record(version_id, digest, store_lines)

766

self._index.add_version(version_id, options, where, size, parents)

767

return lines

768

769

def check(self, progress_bar=None):

770

"""See VersionedFile.check()."""

771

772

def _clone_text(self, new_version_id, old_version_id, parents):

773

"""See VersionedFile.clone_text()."""

774

# FIXME RBC 20060228 make fast by only inserting an index with null

775

# delta.

776

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

777

778

def get_lines(self, version_id):

779

"""See VersionedFile.get_lines()."""

780

return self.get_line_list([version_id])[0]

781

782

def _get_record_map(self, version_ids):

783

"""Produce a dictionary of knit records.

784

785

The keys are version_ids, the values are tuples of (method, content,

786

digest, next).

787

method is the way the content should be applied.

788

content is a KnitContent object.

789

digest is the SHA1 digest of this version id after all steps are done

790

next is the build-parent of the version, i.e. the leftmost ancestor.

791

If the method is fulltext, next will be None.

792

"""

793

position_map = self._get_components_positions(version_ids)

794

# c = component_id, m = method, p = position, s = size, n = next

795

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

796

record_map = {}

797

for component_id, content, digest in \

798

self._data.read_records_iter(records):

799

method, position, size, next = position_map[component_id]

800

record_map[component_id] = method, content, digest, next

801

802

return record_map

803

804

def get_text(self, version_id):

805

"""See VersionedFile.get_text"""

806

return self.get_texts([version_id])[0]

807

808

def get_texts(self, version_ids):

809

return [''.join(l) for l in self.get_line_list(version_ids)]

810

811

def get_line_list(self, version_ids):

812

"""Return the texts of listed versions as a list of strings."""

813

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

814

for version_id in version_ids:

815

self.check_not_reserved_id(version_id)

816

text_map, content_map = self._get_content_maps(version_ids)

817

return [text_map[v] for v in version_ids]

818

819

def _get_content_maps(self, version_ids):

820

"""Produce maps of text and KnitContents

821

822

:return: (text_map, content_map) where text_map contains the texts for

823

the requested versions and content_map contains the KnitContents.

824

Both dicts take version_ids as their keys.

825

"""

826

for version_id in version_ids:

827

if not self.has_version(version_id):

828

raise RevisionNotPresent(version_id, self.filename)

829

record_map = self._get_record_map(version_ids)

830

831

text_map = {}

832

content_map = {}

833

final_content = {}

834

for version_id in version_ids:

835

components = []

836

cursor = version_id

837

while cursor is not None:

838

method, data, digest, next = record_map[cursor]

839

components.append((cursor, method, data, digest))

840

if cursor in content_map:

841

break

842

cursor = next

843

844

content = None

845

for component_id, method, data, digest in reversed(components):

846

if component_id in content_map:

847

content = content_map[component_id]

848

else:

849

if method == 'fulltext':

850

assert content is None

851

content = self.factory.parse_fulltext(data, version_id)

852

elif method == 'line-delta':

853

delta = self.factory.parse_line_delta(data, version_id)

854

content = content.copy()

855

content._lines = self._apply_delta(content._lines,

856

delta)

857

content_map[component_id] = content

858

859

if 'no-eol' in self._index.get_options(version_id):

860

content = content.copy()

861

line = content._lines[-1][1].rstrip('\n')

862

content._lines[-1] = (content._lines[-1][0], line)

863

final_content[version_id] = content

864

865

# digest here is the digest from the last applied component.

866

text = content.text()

867

if sha_strings(text) != digest:

868

raise KnitCorrupt(self.filename,

869

'sha-1 does not match %s' % version_id)

870

871

text_map[version_id] = text

872

return text_map, final_content

873

874

def iter_lines_added_or_present_in_versions(self, version_ids=None,

875

pb=None):

876

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

877

if version_ids is None:

878

version_ids = self.versions()

879

else:

880

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

881

if pb is None:

882

pb = progress.DummyProgress()

883

# we don't care about inclusions, the caller cares.

884

# but we need to setup a list of records to visit.

885

# we need version_id, position, length

886

version_id_records = []

887

requested_versions = set(version_ids)

888

# filter for available versions

889

for version_id in requested_versions:

890

if not self.has_version(version_id):

891

raise RevisionNotPresent(version_id, self.filename)

892

# get a in-component-order queue:

893

for version_id in self.versions():

894

if version_id in requested_versions:

895

data_pos, length = self._index.get_position(version_id)

896

version_id_records.append((version_id, data_pos, length))

897

898

total = len(version_id_records)

899

for version_idx, (version_id, data, sha_value) in \

900

enumerate(self._data.read_records_iter(version_id_records)):

901

pb.update('Walking content.', version_idx, total)

902

method = self._index.get_method(version_id)

903

904

assert method in ('fulltext', 'line-delta')

905

if method == 'fulltext':

906

line_iterator = self.factory.get_fulltext_content(data)

907

else:

908

line_iterator = self.factory.get_linedelta_content(data)

909

for line in line_iterator:

910

yield line

911

912

pb.update('Walking content.', total, total)

913

914

def iter_parents(self, version_ids):

915

"""Iterate through the parents for many version ids.

916

917

:param version_ids: An iterable yielding version_ids.

918

:return: An iterator that yields (version_id, parents). Requested

919

version_ids not present in the versioned file are simply skipped.

920

The order is undefined, allowing for different optimisations in

921

the underlying implementation.

922

"""

923

version_ids = [osutils.safe_revision_id(version_id) for

924

version_id in version_ids]

925

return self._index.iter_parents(version_ids)

926

927

def num_versions(self):

928

"""See VersionedFile.num_versions()."""

929

return self._index.num_versions()

930

931

__len__ = num_versions

932

933

def annotate_iter(self, version_id):

934

"""See VersionedFile.annotate_iter."""

935

version_id = osutils.safe_revision_id(version_id)

936

content = self._get_content(version_id)

937

for origin, text in content.annotate_iter():

938

yield origin, text

939

940

def get_parents(self, version_id):

941

"""See VersionedFile.get_parents."""

942

# perf notes:

943

# optimism counts!

944

# 52554 calls in 1264 872 internal down from 3674

945

version_id = osutils.safe_revision_id(version_id)

946

try:

947

return self._index.get_parents(version_id)

948

except KeyError:

949

raise RevisionNotPresent(version_id, self.filename)

950

951

def get_parents_with_ghosts(self, version_id):

952

"""See VersionedFile.get_parents."""

953

version_id = osutils.safe_revision_id(version_id)

954

try:

955

return self._index.get_parents_with_ghosts(version_id)

956

except KeyError:

957

raise RevisionNotPresent(version_id, self.filename)

958

959

def get_ancestry(self, versions, topo_sorted=True):

960

"""See VersionedFile.get_ancestry."""

961

if isinstance(versions, basestring):

962

versions = [versions]

963

if not versions:

964

return []

965

versions = [osutils.safe_revision_id(v) for v in versions]

966

return self._index.get_ancestry(versions, topo_sorted)

967

968

def get_ancestry_with_ghosts(self, versions):

969

"""See VersionedFile.get_ancestry_with_ghosts."""

970

if isinstance(versions, basestring):

971

versions = [versions]

972

if not versions:

973

return []

974

versions = [osutils.safe_revision_id(v) for v in versions]

975

return self._index.get_ancestry_with_ghosts(versions)

976

977

#@deprecated_method(zero_eight)

978

def walk(self, version_ids):

979

"""See VersionedFile.walk."""

980

# We take the short path here, and extract all relevant texts

981

# and put them in a weave and let that do all the work. Far

982

# from optimal, but is much simpler.

983

# FIXME RB 20060228 this really is inefficient!

984

from bzrlib.weave import Weave

985

986

w = Weave(self.filename)

987

ancestry = set(self.get_ancestry(version_ids, topo_sorted=False))

988

sorted_graph = topo_sort(self._index.get_graph())

989

version_list = [vid for vid in sorted_graph if vid in ancestry]

990

991

for version_id in version_list:

992

lines = self.get_lines(version_id)

993

w.add_lines(version_id, self.get_parents(version_id), lines)

994

995

for lineno, insert_id, dset, line in w.walk(version_ids):

996

yield lineno, insert_id, dset, line

997

998

def plan_merge(self, ver_a, ver_b):

999

"""See VersionedFile.plan_merge."""

1000

ver_a = osutils.safe_revision_id(ver_a)

1001

ver_b = osutils.safe_revision_id(ver_b)

1002

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1003

def status_a(revision, text):

1004

if revision in ancestors_b:

1005

return 'killed-b', text

1006

else:

1007

return 'new-a', text

1008

1009

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1010

def status_b(revision, text):

1011

if revision in ancestors_a:

1012

return 'killed-a', text

1013

else:

1014

return 'new-b', text

1015

1016

annotated_a = self.annotate(ver_a)

1017

annotated_b = self.annotate(ver_b)

1018

plain_a = [t for (a, t) in annotated_a]

1019

plain_b = [t for (a, t) in annotated_b]

1020

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

1021

a_cur = 0

1022

b_cur = 0

1023

for ai, bi, l in blocks:

1024

# process all mismatched sections

1025

# (last mismatched section is handled because blocks always

1026

# includes a 0-length last block)

1027

for revision, text in annotated_a[a_cur:ai]:

1028

yield status_a(revision, text)

1029

for revision, text in annotated_b[b_cur:bi]:

1030

yield status_b(revision, text)

1031

1032

# and now the matched section

1033

a_cur = ai + l

1034

b_cur = bi + l

1035

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

1036

assert text_a == text_b

1037

yield "unchanged", text_a

1038

1039

1040

class _KnitComponentFile(object):

1041

"""One of the files used to implement a knit database"""

1042

1043

def __init__(self, transport, filename, mode, file_mode=None,

1044

create_parent_dir=False, dir_mode=None):

1045

self._transport = transport

1046

self._filename = filename

1047

self._mode = mode

1048

self._file_mode = file_mode

1049

self._dir_mode = dir_mode

1050

self._create_parent_dir = create_parent_dir

1051

self._need_to_create = False

1052

1053

def _full_path(self):

1054

"""Return the full path to this file."""

1055

return self._transport.base + self._filename

1056

1057

def check_header(self, fp):

1058

line = fp.readline()

1059

if line == '':

1060

# An empty file can actually be treated as though the file doesn't

1061

# exist yet.

1062

raise errors.NoSuchFile(self._full_path())

1063

if line != self.HEADER:

1064

raise KnitHeaderError(badline=line,

1065

filename=self._transport.abspath(self._filename))

1066

1067

def commit(self):

1068

"""Commit is a nop."""

1069

1070

def __repr__(self):

1071

return '%s(%s)' % (self.__class__.__name__, self._filename)

1072

1073

1074

class _KnitIndex(_KnitComponentFile):

1075

"""Manages knit index file.

1076

1077

The index is already kept in memory and read on startup, to enable

1078

fast lookups of revision information. The cursor of the index

1079

file is always pointing to the end, making it easy to append

1080

entries.

1081

1082

_cache is a cache for fast mapping from version id to a Index

1083

object.

1084

1085

_history is a cache for fast mapping from indexes to version ids.

1086

1087

The index data format is dictionary compressed when it comes to

1088

parent references; a index entry may only have parents that with a

1089

lover index number. As a result, the index is topological sorted.

1090

1091

Duplicate entries may be written to the index for a single version id

1092

if this is done then the latter one completely replaces the former:

1093

this allows updates to correct version and parent information.

1094

Note that the two entries may share the delta, and that successive

1095

annotations and references MUST point to the first entry.

1096

1097

The index file on disc contains a header, followed by one line per knit

1098

record. The same revision can be present in an index file more than once.

1099

The first occurrence gets assigned a sequence number starting from 0.

1100

1101

The format of a single line is

1102

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1103

REVISION_ID is a utf8-encoded revision id

1104

FLAGS is a comma separated list of flags about the record. Values include

1105

no-eol, line-delta, fulltext.

1106

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1107

that the the compressed data starts at.

1108

LENGTH is the ascii representation of the length of the data file.

1109

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1110

REVISION_ID.

1111

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1112

revision id already in the knit that is a parent of REVISION_ID.

1113

The ' :' marker is the end of record marker.

1114

1115

partial writes:

1116

when a write is interrupted to the index file, it will result in a line

1117

that does not end in ' :'. If the ' :' is not present at the end of a line,

1118

or at the end of the file, then the record that is missing it will be

1119

ignored by the parser.

1120

1121

When writing new records to the index file, the data is preceded by '\n'

1122

to ensure that records always start on new lines even if the last write was

1123

interrupted. As a result its normal for the last line in the index to be

1124

missing a trailing newline. One can be added with no harmful effects.

1125

"""

1126

1127

HEADER = "# bzr knit index 8\n"

1128

1129

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1130

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1131

1132

def _cache_version(self, version_id, options, pos, size, parents):

1133

"""Cache a version record in the history array and index cache.

1134

1135

This is inlined into _load_data for performance. KEEP IN SYNC.

1136

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1137

indexes).

1138

"""

1139

# only want the _history index to reference the 1st index entry

1140

# for version_id

1141

if version_id not in self._cache:

1142

index = len(self._history)

1143

self._history.append(version_id)

1144

else:

1145

index = self._cache[version_id][5]

1146

self._cache[version_id] = (version_id,

1147

options,

1148

pos,

1149

size,

1150

parents,

1151

index)

1152

1153

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1154

create_parent_dir=False, delay_create=False, dir_mode=None):

1155

_KnitComponentFile.__init__(self, transport, filename, mode,

1156

file_mode=file_mode,

1157

create_parent_dir=create_parent_dir,

1158

dir_mode=dir_mode)

1159

self._cache = {}

1160

# position in _history is the 'official' index for a revision

1161

# but the values may have come from a newer entry.

1162

# so - wc -l of a knit index is != the number of unique names

1163

# in the knit.

1164

self._history = []

1165

try:

1166

fp = self._transport.get(self._filename)

1167

try:

1168

# _load_data may raise NoSuchFile if the target knit is

1169

# completely empty.

1170

_load_data(self, fp)

1171

finally:

1172

fp.close()

1173

except NoSuchFile:

1174

if mode != 'w' or not create:

1175

raise

1176

elif delay_create:

1177

self._need_to_create = True

1178

else:

1179

self._transport.put_bytes_non_atomic(

1180

self._filename, self.HEADER, mode=self._file_mode)

1181

1182

def get_graph(self):

1183

"""Return a list of the node:parents lists from this knit index."""

1184

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1185

1186

def get_ancestry(self, versions, topo_sorted=True):

1187

"""See VersionedFile.get_ancestry."""

1188

# get a graph of all the mentioned versions:

1189

graph = {}

1190

pending = set(versions)

1191

cache = self._cache

1192

while pending:

1193

version = pending.pop()

1194

# trim ghosts

1195

try:

1196

parents = [p for p in cache[version][4] if p in cache]

1197

except KeyError:

1198

raise RevisionNotPresent(version, self._filename)

1199

# if not completed and not a ghost

1200

pending.update([p for p in parents if p not in graph])

1201

graph[version] = parents

1202

if not topo_sorted:

1203

return graph.keys()

1204

return topo_sort(graph.items())

1205

1206

def get_ancestry_with_ghosts(self, versions):

1207

"""See VersionedFile.get_ancestry_with_ghosts."""

1208

# get a graph of all the mentioned versions:

1209

self.check_versions_present(versions)

1210

cache = self._cache

1211

graph = {}

1212

pending = set(versions)

1213

while pending:

1214

version = pending.pop()

1215

try:

1216

parents = cache[version][4]

1217

except KeyError:

1218

# ghost, fake it

1219

graph[version] = []

1220

else:

1221

# if not completed

1222

pending.update([p for p in parents if p not in graph])

1223

graph[version] = parents

1224

return topo_sort(graph.items())

1225

1226

def iter_parents(self, version_ids):

1227

"""Iterate through the parents for many version ids.

1228

1229

:param version_ids: An iterable yielding version_ids.

1230

:return: An iterator that yields (version_id, parents). Requested

1231

version_ids not present in the versioned file are simply skipped.

1232

The order is undefined, allowing for different optimisations in

1233

the underlying implementation.

1234

"""

1235

for version_id in version_ids:

1236

try:

1237

yield version_id, tuple(self.get_parents(version_id))

1238

except KeyError:

1239

pass

1240

1241

def num_versions(self):

1242

return len(self._history)

1243

1244

__len__ = num_versions

1245

1246

def get_versions(self):

1247

"""Get all the versions in the file. not topologically sorted."""

1248

return self._history

1249

1250

def _version_list_to_index(self, versions):

1251

result_list = []

1252

cache = self._cache

1253

for version in versions:

1254

if version in cache:

1255

# -- inlined lookup() --

1256

result_list.append(str(cache[version][5]))

1257

# -- end lookup () --

1258

else:

1259

result_list.append('.' + version)

1260

return ' '.join(result_list)

1261

1262

def add_version(self, version_id, options, pos, size, parents):

1263

"""Add a version record to the index."""

1264

self.add_versions(((version_id, options, pos, size, parents),))

1265

1266

def add_versions(self, versions):

1267

"""Add multiple versions to the index.

1268

1269

:param versions: a list of tuples:

1270

(version_id, options, pos, size, parents).

1271

"""

1272

lines = []

1273

orig_history = self._history[:]

1274

orig_cache = self._cache.copy()

1275

1276

try:

1277

for version_id, options, pos, size, parents in versions:

1278

line = "\n%s %s %s %s %s :" % (version_id,

1279

','.join(options),

1280

pos,

1281

size,

1282

self._version_list_to_index(parents))

1283

assert isinstance(line, str), \

1284

'content must be utf-8 encoded: %r' % (line,)

1285

lines.append(line)

1286

self._cache_version(version_id, options, pos, size, parents)

1287

if not self._need_to_create:

1288

self._transport.append_bytes(self._filename, ''.join(lines))

1289

else:

1290

sio = StringIO()

1291

sio.write(self.HEADER)

1292

sio.writelines(lines)

1293

sio.seek(0)

1294

self._transport.put_file_non_atomic(self._filename, sio,

1295

create_parent_dir=self._create_parent_dir,

1296

mode=self._file_mode,

1297

dir_mode=self._dir_mode)

1298

self._need_to_create = False

1299

except:

1300

# If any problems happen, restore the original values and re-raise

1301

self._history = orig_history

1302

self._cache = orig_cache

1303

raise

1304

1305

def has_version(self, version_id):

1306

"""True if the version is in the index."""

1307

return version_id in self._cache

1308

1309

def get_position(self, version_id):

1310

"""Return data position and size of specified version."""

1311

entry = self._cache[version_id]

1312

return entry[2], entry[3]

1313

1314

def get_method(self, version_id):

1315

"""Return compression method of specified version."""

1316

options = self._cache[version_id][1]

1317

if 'fulltext' in options:

1318

return 'fulltext'

1319

else:

1320

if 'line-delta' not in options:

1321

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1322

return 'line-delta'

1323

1324

def get_options(self, version_id):

1325

"""Return a string represention options.

1326

1327

e.g. foo,bar

1328

"""

1329

return self._cache[version_id][1]

1330

1331

def get_parents(self, version_id):

1332

"""Return parents of specified version ignoring ghosts."""

1333

return [parent for parent in self._cache[version_id][4]

1334

if parent in self._cache]

1335

1336

def get_parents_with_ghosts(self, version_id):

1337

"""Return parents of specified version with ghosts."""

1338

return self._cache[version_id][4]

1339

1340

def check_versions_present(self, version_ids):

1341

"""Check that all specified versions are present."""

1342

cache = self._cache

1343

for version_id in version_ids:

1344

if version_id not in cache:

1345

raise RevisionNotPresent(version_id, self._filename)

1346

1347

1348

class KnitGraphIndex(object):

1349

"""A knit index that builds on GraphIndex."""

1350

1351

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1352

"""Construct a KnitGraphIndex on a graph_index.

1353

1354

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1355

:param deltas: Allow delta-compressed records.

1356

:param add_callback: If not None, allow additions to the index and call

1357

this callback with a list of added GraphIndex nodes:

1358

[(node, value, node_refs), ...]

1359

:param parents: If True, record knits parents, if not do not record

1360

parents.

1361

"""

1362

self._graph_index = graph_index

1363

self._deltas = deltas

1364

self._add_callback = add_callback

1365

self._parents = parents

1366

if deltas and not parents:

1367

raise KnitCorrupt(self, "Cannot do delta compression without "

1368

"parent tracking.")

1369

1370

def _get_entries(self, version_ids, check_present=False):

1371

"""Get the entries for version_ids."""

1372

version_ids = set(version_ids)

1373

found_keys = set()

1374

if self._parents:

1375

for node in self._graph_index.iter_entries(version_ids):

1376

yield node

1377

found_keys.add(node[0])

1378

else:

1379

# adapt parentless index to the rest of the code.

1380

for node in self._graph_index.iter_entries(version_ids):

1381

yield node[0], node[1], ()

1382

found_keys.add(node[0])

1383

if check_present:

1384

missing_keys = version_ids.difference(found_keys)

1385

if missing_keys:

1386

raise RevisionNotPresent(missing_keys.pop(), self)

1387

1388

def _present_keys(self, version_ids):

1389

return set([

1390

node[0] for node in self._get_entries(version_ids)])

1391

1392

def _parentless_ancestry(self, versions):

1393

"""Honour the get_ancestry API for parentless knit indices."""

1394

present_keys = self._present_keys(versions)

1395

missing = set(versions).difference(present_keys)

1396

if missing:

1397

raise RevisionNotPresent(missing.pop(), self)

1398

return list(present_keys)

1399

1400

def get_ancestry(self, versions, topo_sorted=True):

1401

"""See VersionedFile.get_ancestry."""

1402

if not self._parents:

1403

return self._parentless_ancestry(versions)

1404

# XXX: This will do len(history) index calls - perhaps

1405

# it should be altered to be a index core feature?

1406

# get a graph of all the mentioned versions:

1407

graph = {}

1408

ghosts = set()

1409

versions = set(versions)

1410

pending = set(versions)

1411

while pending:

1412

# get all pending nodes

1413

this_iteration = pending

1414

new_nodes = self._get_entries(this_iteration)

1415

pending = set()

1416

for (key, value, node_refs) in new_nodes:

1417

# dont ask for ghosties - otherwise

1418

# we we can end up looping with pending

1419

# being entirely ghosted.

1420

graph[key] = [parent for parent in node_refs[0]

1421

if parent not in ghosts]

1422

# queue parents

1423

pending.update(graph[key])

1424

ghosts.difference_update(graph)

1425

# dont examine known nodes

1426

pending.difference_update(graph)

1427

if versions.difference(graph):

1428

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1429

if not topo_sorted:

1430

return graph.keys()

1431

return topo_sort(graph.items())

1432

1433

def get_ancestry_with_ghosts(self, versions):

1434

"""See VersionedFile.get_ancestry."""

1435

if not self._parents:

1436

return self._parentless_ancestry(versions)

1437

# XXX: This will do len(history) index calls - perhaps

1438

# it should be altered to be a index core feature?

1439

# get a graph of all the mentioned versions:

1440

graph = {}

1441

versions = set(versions)

1442

pending = set(versions)

1443

while pending:

1444

# get all pending nodes

1445

this_iteration = pending

1446

new_nodes = self._get_entries(this_iteration)

1447

pending = set()

1448

for (key, value, node_refs) in new_nodes:

1449

graph[key] = node_refs[0]

1450

# queue parents

1451

pending.update(graph[key])

1452

missing_versions = this_iteration.difference(graph)

1453

missing_needed = versions.intersection(missing_versions)

1454

if missing_needed:

1455

raise RevisionNotPresent(missing_needed.pop(), self)

1456

for missing_version in missing_versions:

1457

# add a key, no parents

1458

graph[missing_version] = []

1459

# dont examine known nodes

1460

pending.difference_update(graph)

1461

return topo_sort(graph.items())

1462

1463

def get_graph(self):

1464

"""Return a list of the node:parents lists from this knit index."""

1465

if not self._parents:

1466

return [(key, ()) for key in self.get_versions()]

1467

return [(key, refs[0]) for (key, value, refs) in

1468

self._graph_index.iter_all_entries()]

1469

1470

def iter_parents(self, version_ids):

1471

"""Iterate through the parents for many version ids.

1472

1473

:param version_ids: An iterable yielding version_ids.

1474

:return: An iterator that yields (version_id, parents). Requested

1475

version_ids not present in the versioned file are simply skipped.

1476

The order is undefined, allowing for different optimisations in

1477

the underlying implementation.

1478

"""

1479

if self._parents:

1480

all_nodes = set(self._get_entries(version_ids))

1481

all_parents = set()

1482

present_parents = set()

1483

for node in all_nodes:

1484

all_parents.update(node[2][0])

1485

# any node we are querying must be present

1486

present_parents.add(node[0])

1487

unknown_parents = all_parents.difference(present_parents)

1488

present_parents.update(self._present_keys(unknown_parents))

1489

for node in all_nodes:

1490

parents = []

1491

for parent in node[2][0]:

1492

if parent in present_parents:

1493

parents.append(parent)

1494

yield node[0], tuple(parents)

1495

else:

1496

for node in self._get_entries(version_ids):

1497

yield node[0], ()

1498

1499

def num_versions(self):

1500

return len(list(self._graph_index.iter_all_entries()))

1501

1502

__len__ = num_versions

1503

1504

def get_versions(self):

1505

"""Get all the versions in the file. not topologically sorted."""

1506

return [node[0] for node in self._graph_index.iter_all_entries()]

1507

1508

def has_version(self, version_id):

1509

"""True if the version is in the index."""

1510

return len(self._present_keys([version_id])) == 1

1511

1512

def get_position(self, version_id):

1513

"""Return data position and size of specified version."""

1514

bits = self._get_node(version_id)[1][1:].split(' ')

1515

return int(bits[0]), int(bits[1])

1516

1517

def get_method(self, version_id):

1518

"""Return compression method of specified version."""

1519

if not self._deltas:

1520

return 'fulltext'

1521

return self._parent_compression(self._get_node(version_id)[2][1])

1522

1523

def _parent_compression(self, reference_list):

1524

# use the second reference list to decide if this is delta'd or not.

1525

if len(reference_list):

1526

return 'line-delta'

1527

else:

1528

return 'fulltext'

1529

1530

def _get_node(self, version_id):

1531

return list(self._get_entries([version_id]))[0]

1532

1533

def get_options(self, version_id):

1534

"""Return a string represention options.

1535

1536

e.g. foo,bar

1537

"""

1538

node = self._get_node(version_id)

1539

if not self._deltas:

1540

options = ['fulltext']

1541

else:

1542

options = [self._parent_compression(node[2][1])]

1543

if node[1][0] == 'N':

1544

options.append('no-eol')

1545

return ','.join(options)

1546

1547

def get_parents(self, version_id):

1548

"""Return parents of specified version ignoring ghosts."""

1549

parents = list(self.iter_parents([version_id]))

1550

if not parents:

1551

# missing key

1552

raise errors.RevisionNotPresent(version_id, self)

1553

return parents[0][1]

1554

1555

def get_parents_with_ghosts(self, version_id):

1556

"""Return parents of specified version with ghosts."""

1557

nodes = list(self._get_entries([version_id], check_present=True))

1558

if not self._parents:

1559

return ()

1560

return nodes[0][2][0]

1561

1562

def check_versions_present(self, version_ids):

1563

"""Check that all specified versions are present."""

1564

version_ids = set(version_ids)

1565

present = self._present_keys(version_ids)

1566

missing = version_ids.difference(present)

1567

if missing:

1568

raise RevisionNotPresent(missing.pop(), self)

1569

1570

def add_version(self, version_id, options, pos, size, parents):

1571

"""Add a version record to the index."""

1572

return self.add_versions(((version_id, options, pos, size, parents),))

1573

1574

def add_versions(self, versions):

1575

"""Add multiple versions to the index.

1576

1577

This function does not insert data into the Immutable GraphIndex

1578

backing the KnitGraphIndex, instead it prepares data for insertion by

1579

the caller and checks that it is safe to insert then calls

1580

self._add_callback with the prepared GraphIndex nodes.

1581

1582

:param versions: a list of tuples:

1583

(version_id, options, pos, size, parents).

1584

"""

1585

if not self._add_callback:

1586

raise errors.ReadOnlyError(self)

1587

# we hope there are no repositories with inconsistent parentage

1588

# anymore.

1589

# check for dups

1590

1591

keys = {}

1592

for (version_id, options, pos, size, parents) in versions:

1593

if 'no-eol' in options:

1594

value = 'N'

1595

else:

1596

value = ' '

1597

value += "%d %d" % (pos, size)

1598

if not self._deltas:

1599

if 'line-delta' in options:

1600

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1601

if self._parents:

1602

if self._deltas:

1603

if 'line-delta' in options:

1604

node_refs = (tuple(parents), (parents[0],))

1605

else:

1606

node_refs = (tuple(parents), ())

1607

else:

1608

node_refs = (tuple(parents), )

1609

else:

1610

if parents:

1611

raise KnitCorrupt(self, "attempt to add node with parents "

1612

"in parentless index.")

1613

node_refs = ()

1614

keys[version_id] = (value, node_refs)

1615

present_nodes = self._get_entries(keys)

1616

for (key, value, node_refs) in present_nodes:

1617

if (value, node_refs) != keys[key]:

1618

raise KnitCorrupt(self, "inconsistent details in add_versions"

1619

": %s %s" % ((value, node_refs), keys[key]))

1620

del keys[key]

1621

result = []

1622

if self._parents:

1623

for key, (value, node_refs) in keys.iteritems():

1624

result.append((key, value, node_refs))

1625

else:

1626

for key, (value, node_refs) in keys.iteritems():

1627

result.append((key, value))

1628

self._add_callback(result)

1629

1630

1631

class _KnitData(_KnitComponentFile):

1632

"""Contents of the knit data file"""

1633

1634

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1635

create_parent_dir=False, delay_create=False,

1636

dir_mode=None):

1637

_KnitComponentFile.__init__(self, transport, filename, mode,

1638

file_mode=file_mode,

1639

create_parent_dir=create_parent_dir,

1640

dir_mode=dir_mode)

1641

self._checked = False

1642

# TODO: jam 20060713 conceptually, this could spill to disk

1643

# if the cached size gets larger than a certain amount

1644

# but it complicates the model a bit, so for now just use

1645

# a simple dictionary

1646

self._cache = {}

1647

self._do_cache = False

1648

if create:

1649

if delay_create:

1650

self._need_to_create = create

1651

else:

1652

self._transport.put_bytes_non_atomic(self._filename, '',

1653

mode=self._file_mode)

1654

1655

def enable_cache(self):

1656

"""Enable caching of reads."""

1657

self._do_cache = True

1658

1659

def clear_cache(self):

1660

"""Clear the record cache."""

1661

self._do_cache = False

1662

self._cache = {}

1663

1664

def _open_file(self):

1665

try:

1666

return self._transport.get(self._filename)

1667

except NoSuchFile:

1668

pass

1669

return None

1670

1671

def _record_to_data(self, version_id, digest, lines):

1672

"""Convert version_id, digest, lines into a raw data block.

1673

1674

:return: (len, a StringIO instance with the raw data ready to read.)

1675

"""

1676

sio = StringIO()

1677

data_file = GzipFile(None, mode='wb', fileobj=sio)

1678

1679

assert isinstance(version_id, str)

1680

data_file.writelines(chain(

1681

["version %s %d %s\n" % (version_id,

1682

len(lines),

1683

digest)],

1684

lines,

1685

["end %s\n" % version_id]))

1686

data_file.close()

1687

length= sio.tell()

1688

1689

sio.seek(0)

1690

return length, sio

1691

1692

def add_raw_record(self, raw_data):

1693

"""Append a prepared record to the data file.

1694

1695

:return: the offset in the data file raw_data was written.

1696

"""

1697

assert isinstance(raw_data, str), 'data must be plain bytes'

1698

if not self._need_to_create:

1699

return self._transport.append_bytes(self._filename, raw_data)

1700

else:

1701

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1702

create_parent_dir=self._create_parent_dir,

1703

mode=self._file_mode,

1704

dir_mode=self._dir_mode)

1705

self._need_to_create = False

1706

return 0

1707

1708

def add_record(self, version_id, digest, lines):

1709

"""Write new text record to disk. Returns the position in the

1710

file where it was written."""

1711

size, sio = self._record_to_data(version_id, digest, lines)

1712

# write to disk

1713

if not self._need_to_create:

1714

start_pos = self._transport.append_file(self._filename, sio)

1715

else:

1716

self._transport.put_file_non_atomic(self._filename, sio,

1717

create_parent_dir=self._create_parent_dir,

1718

mode=self._file_mode,

1719

dir_mode=self._dir_mode)

1720

self._need_to_create = False

1721

start_pos = 0

1722

if self._do_cache:

1723

self._cache[version_id] = sio.getvalue()

1724

return start_pos, size

1725

1726

def _parse_record_header(self, version_id, raw_data):

1727

"""Parse a record header for consistency.

1728

1729

:return: the header and the decompressor stream.

1730

as (stream, header_record)

1731

"""

1732

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1733

try:

1734

rec = self._check_header(version_id, df.readline())

1735

except Exception, e:

1736

raise KnitCorrupt(self._filename,

1737

"While reading {%s} got %s(%s)"

1738

% (version_id, e.__class__.__name__, str(e)))

1739

return df, rec

1740

1741

def _check_header(self, version_id, line):

1742

rec = line.split()

1743

if len(rec) != 4:

1744

raise KnitCorrupt(self._filename,

1745

'unexpected number of elements in record header')

1746

if rec[1] != version_id:

1747

raise KnitCorrupt(self._filename,

1748

'unexpected version, wanted %r, got %r'

1749

% (version_id, rec[1]))

1750

return rec

1751

1752

def _parse_record(self, version_id, data):

1753

# profiling notes:

1754

# 4168 calls in 2880 217 internal

1755

# 4168 calls to _parse_record_header in 2121

1756

# 4168 calls to readlines in 330

1757

df = GzipFile(mode='rb', fileobj=StringIO(data))

1758

1759

try:

1760

record_contents = df.readlines()

1761

except Exception, e:

1762

raise KnitCorrupt(self._filename,

1763

"While reading {%s} got %s(%s)"

1764

% (version_id, e.__class__.__name__, str(e)))

1765

header = record_contents.pop(0)

1766

rec = self._check_header(version_id, header)

1767

1768

last_line = record_contents.pop()

1769

if len(record_contents) != int(rec[2]):

1770

raise KnitCorrupt(self._filename,

1771

'incorrect number of lines %s != %s'

1772

' for version {%s}'

1773

% (len(record_contents), int(rec[2]),

1774

version_id))

1775

if last_line != 'end %s\n' % rec[1]:

1776

raise KnitCorrupt(self._filename,

1777

'unexpected version end line %r, wanted %r'

1778

% (last_line, version_id))

1779

df.close()

1780

return record_contents, rec[3]

1781

1782

def read_records_iter_raw(self, records):

1783

"""Read text records from data file and yield raw data.

1784

1785

This unpacks enough of the text record to validate the id is

1786

as expected but thats all.

1787

"""

1788

# setup an iterator of the external records:

1789

# uses readv so nice and fast we hope.

1790

if len(records):

1791

# grab the disk data needed.

1792

if self._cache:

1793

# Don't check _cache if it is empty

1794

needed_offsets = [(pos, size) for version_id, pos, size

1795

in records

1796

if version_id not in self._cache]

1797

else:

1798

needed_offsets = [(pos, size) for version_id, pos, size

1799

in records]

1800

1801

raw_records = self._transport.readv(self._filename, needed_offsets)

1802

1803

for version_id, pos, size in records:

1804

if version_id in self._cache:

1805

# This data has already been validated

1806

data = self._cache[version_id]

1807

else:

1808

pos, data = raw_records.next()

1809

if self._do_cache:

1810

self._cache[version_id] = data

1811

1812

# validate the header

1813

df, rec = self._parse_record_header(version_id, data)

1814

df.close()

1815

yield version_id, data

1816

1817

def read_records_iter(self, records):

1818

"""Read text records from data file and yield result.

1819

1820

The result will be returned in whatever is the fastest to read.

1821

Not by the order requested. Also, multiple requests for the same

1822

record will only yield 1 response.

1823

:param records: A list of (version_id, pos, len) entries

1824

:return: Yields (version_id, contents, digest) in the order

1825

read, not the order requested

1826

"""

1827

if not records:

1828

return

1829

1830

if self._cache:

1831

# Skip records we have alread seen

1832

yielded_records = set()

1833

needed_records = set()

1834

for record in records:

1835

if record[0] in self._cache:

1836

if record[0] in yielded_records:

1837

continue

1838

yielded_records.add(record[0])

1839

data = self._cache[record[0]]

1840

content, digest = self._parse_record(record[0], data)

1841

yield (record[0], content, digest)

1842

else:

1843

needed_records.add(record)

1844

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1845

else:

1846

needed_records = sorted(set(records), key=operator.itemgetter(1))

1847

1848

if not needed_records:

1849

return

1850

1851

# The transport optimizes the fetching as well

1852

# (ie, reads continuous ranges.)

1853

readv_response = self._transport.readv(self._filename,

1854

[(pos, size) for version_id, pos, size in needed_records])

1855

1856

for (version_id, pos, size), (pos, data) in \

1857

izip(iter(needed_records), readv_response):

1858

content, digest = self._parse_record(version_id, data)

1859

if self._do_cache:

1860

self._cache[version_id] = data

1861

yield version_id, content, digest

1862

1863

def read_records(self, records):

1864

"""Read records into a dictionary."""

1865

components = {}

1866

for record_id, content, digest in \

1867

self.read_records_iter(records):

1868

components[record_id] = (content, digest)

1869

return components

1870

1871

1872

class InterKnit(InterVersionedFile):

1873

"""Optimised code paths for knit to knit operations."""

1874

1875

_matching_file_from_factory = KnitVersionedFile

1876

_matching_file_to_factory = KnitVersionedFile

1877

1878

@staticmethod

1879

def is_compatible(source, target):

1880

"""Be compatible with knits. """

1881

try:

1882

return (isinstance(source, KnitVersionedFile) and

1883

isinstance(target, KnitVersionedFile))

1884

except AttributeError:

1885

return False

1886

1887

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1888

"""See InterVersionedFile.join."""

1889

assert isinstance(self.source, KnitVersionedFile)

1890

assert isinstance(self.target, KnitVersionedFile)

1891

1892

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1893

1894

if not version_ids:

1895

return 0

1896

1897

pb = ui.ui_factory.nested_progress_bar()

1898

try:

1899

version_ids = list(version_ids)

1900

if None in version_ids:

1901

version_ids.remove(None)

1902

1903

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1904

this_versions = set(self.target._index.get_versions())

1905

needed_versions = self.source_ancestry - this_versions

1906

cross_check_versions = self.source_ancestry.intersection(this_versions)

1907

mismatched_versions = set()

1908

for version in cross_check_versions:

1909

# scan to include needed parents.

1910

n1 = set(self.target.get_parents_with_ghosts(version))

1911

n2 = set(self.source.get_parents_with_ghosts(version))

1912

if n1 != n2:

1913

# FIXME TEST this check for cycles being introduced works

1914

# the logic is we have a cycle if in our graph we are an

1915

# ancestor of any of the n2 revisions.

1916

for parent in n2:

1917

if parent in n1:

1918

# safe

1919

continue

1920

else:

1921

parent_ancestors = self.source.get_ancestry(parent)

1922

if version in parent_ancestors:

1923

raise errors.GraphCycleError([parent, version])

1924

# ensure this parent will be available later.

1925

new_parents = n2.difference(n1)

1926

needed_versions.update(new_parents.difference(this_versions))

1927

mismatched_versions.add(version)

1928

1929

if not needed_versions and not mismatched_versions:

1930

return 0

1931

full_list = topo_sort(self.source.get_graph())

1932

1933

version_list = [i for i in full_list if (not self.target.has_version(i)

1934

and i in needed_versions)]

1935

1936

# plan the join:

1937

copy_queue = []

1938

copy_queue_records = []

1939

copy_set = set()

1940

for version_id in version_list:

1941

options = self.source._index.get_options(version_id)

1942

parents = self.source._index.get_parents_with_ghosts(version_id)

1943

# check that its will be a consistent copy:

1944

for parent in parents:

1945

# if source has the parent, we must :

1946

# * already have it or

1947

# * have it scheduled already

1948

# otherwise we don't care

1949

assert (self.target.has_version(parent) or

1950

parent in copy_set or

1951

not self.source.has_version(parent))

1952

data_pos, data_size = self.source._index.get_position(version_id)

1953

copy_queue_records.append((version_id, data_pos, data_size))

1954

copy_queue.append((version_id, options, parents))

1955

copy_set.add(version_id)

1956

1957

# data suck the join:

1958

count = 0

1959

total = len(version_list)

1960

raw_datum = []

1961

raw_records = []

1962

for (version_id, raw_data), \

1963

(version_id2, options, parents) in \

1964

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1965

copy_queue):

1966

assert version_id == version_id2, 'logic error, inconsistent results'

1967

count = count + 1

1968

pb.update("Joining knit", count, total)

1969

raw_records.append((version_id, options, parents, len(raw_data)))

1970

raw_datum.append(raw_data)

1971

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1972

1973

for version in mismatched_versions:

1974

# FIXME RBC 20060309 is this needed?

1975

n1 = set(self.target.get_parents_with_ghosts(version))

1976

n2 = set(self.source.get_parents_with_ghosts(version))

1977

# write a combined record to our history preserving the current

1978

# parents as first in the list

1979

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1980

self.target.fix_parents(version, new_parents)

1981

return count

1982

finally:

1983

pb.finished()

1984

1985

1986

InterVersionedFile.register_optimiser(InterKnit)

1987

1988

1989

class WeaveToKnit(InterVersionedFile):

1990

"""Optimised code paths for weave to knit operations."""

1991

1992

_matching_file_from_factory = bzrlib.weave.WeaveFile

1993

_matching_file_to_factory = KnitVersionedFile

1994

1995

@staticmethod

1996

def is_compatible(source, target):

1997

"""Be compatible with weaves to knits."""

1998

try:

1999

return (isinstance(source, bzrlib.weave.Weave) and

2000

isinstance(target, KnitVersionedFile))

2001

except AttributeError:

2002

return False

2003

2004

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2005

"""See InterVersionedFile.join."""

2006

assert isinstance(self.source, bzrlib.weave.Weave)

2007

assert isinstance(self.target, KnitVersionedFile)

2008

2009

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2010

2011

if not version_ids:

2012

return 0

2013

2014

pb = ui.ui_factory.nested_progress_bar()

2015

try:

2016

version_ids = list(version_ids)

2017

2018

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2019

this_versions = set(self.target._index.get_versions())

2020

needed_versions = self.source_ancestry - this_versions

2021

cross_check_versions = self.source_ancestry.intersection(this_versions)

2022

mismatched_versions = set()

2023

for version in cross_check_versions:

2024

# scan to include needed parents.

2025

n1 = set(self.target.get_parents_with_ghosts(version))

2026

n2 = set(self.source.get_parents(version))

2027

# if all of n2's parents are in n1, then its fine.

2028

if n2.difference(n1):

2029

# FIXME TEST this check for cycles being introduced works

2030

# the logic is we have a cycle if in our graph we are an

2031

# ancestor of any of the n2 revisions.

2032

for parent in n2:

2033

if parent in n1:

2034

# safe

2035

continue

2036

else:

2037

parent_ancestors = self.source.get_ancestry(parent)

2038

if version in parent_ancestors:

2039

raise errors.GraphCycleError([parent, version])

2040

# ensure this parent will be available later.

2041

new_parents = n2.difference(n1)

2042

needed_versions.update(new_parents.difference(this_versions))

2043

mismatched_versions.add(version)

2044

2045

if not needed_versions and not mismatched_versions:

2046

return 0

2047

full_list = topo_sort(self.source.get_graph())

2048

2049

version_list = [i for i in full_list if (not self.target.has_version(i)

2050

and i in needed_versions)]

2051

2052

# do the join:

2053

count = 0

2054

total = len(version_list)

2055

for version_id in version_list:

2056

pb.update("Converting to knit", count, total)

2057

parents = self.source.get_parents(version_id)

2058

# check that its will be a consistent copy:

2059

for parent in parents:

2060

# if source has the parent, we must already have it

2061

assert (self.target.has_version(parent))

2062

self.target.add_lines(

2063

version_id, parents, self.source.get_lines(version_id))

2064

count = count + 1

2065

2066

for version in mismatched_versions:

2067

# FIXME RBC 20060309 is this needed?

2068

n1 = set(self.target.get_parents_with_ghosts(version))

2069

n2 = set(self.source.get_parents(version))

2070

# write a combined record to our history preserving the current

2071

# parents as first in the list

2072

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2073

self.target.fix_parents(version, new_parents)

2074

return count

2075

finally:

2076

pb.finished()

2077

2078

2079

InterVersionedFile.register_optimiser(WeaveToKnit)

2080

2081

2082

class KnitSequenceMatcher(difflib.SequenceMatcher):

2083

"""Knit tuned sequence matcher.

2084

2085

This is based on profiling of difflib which indicated some improvements

2086

for our usage pattern.

2087

"""

2088

2089

def find_longest_match(self, alo, ahi, blo, bhi):

2090

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

2091

2092

If isjunk is not defined:

2093

2094

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

2095

alo <= i <= i+k <= ahi

2096

blo <= j <= j+k <= bhi

2097

and for all (i',j',k') meeting those conditions,

2098

k >= k'

2099

i <= i'

2100

and if i == i', j <= j'

2101

2102

In other words, of all maximal matching blocks, return one that

2103

starts earliest in a, and of all those maximal matching blocks that

2104

start earliest in a, return the one that starts earliest in b.

2105

2106

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

2107

>>> s.find_longest_match(0, 5, 0, 9)

2108

(0, 4, 5)

2109

2110

If isjunk is defined, first the longest matching block is

2111

determined as above, but with the additional restriction that no

2112

junk element appears in the block. Then that block is extended as

2113

far as possible by matching (only) junk elements on both sides. So

2114

the resulting block never matches on junk except as identical junk

2115

happens to be adjacent to an "interesting" match.

2116

2117

Here's the same example as before, but considering blanks to be

2118

junk. That prevents " abcd" from matching the " abcd" at the tail

2119

end of the second sequence directly. Instead only the "abcd" can

2120

match, and matches the leftmost "abcd" in the second sequence:

2121

2122

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

2123

>>> s.find_longest_match(0, 5, 0, 9)

2124

(1, 0, 4)

2125

2126

If no blocks match, return (alo, blo, 0).

2127

2128

>>> s = SequenceMatcher(None, "ab", "c")

2129

>>> s.find_longest_match(0, 2, 0, 1)

2130

(0, 0, 0)

2131

"""

2132

2133

# CAUTION: stripping common prefix or suffix would be incorrect.

2134

# E.g.,

2135

# ab

2136

# acab

2137

# Longest matching block is "ab", but if common prefix is

2138

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

2139

# strip, so ends up claiming that ab is changed to acab by

2140

# inserting "ca" in the middle. That's minimal but unintuitive:

2141

# "it's obvious" that someone inserted "ac" at the front.

2142

# Windiff ends up at the same place as diff, but by pairing up

2143

# the unique 'b's and then matching the first two 'a's.

2144

2145

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

2146

besti, bestj, bestsize = alo, blo, 0

2147

# find longest junk-free match

2148

# during an iteration of the loop, j2len[j] = length of longest

2149

# junk-free match ending with a[i-1] and b[j]

2150

j2len = {}

2151

# nothing = []

2152

b2jget = b2j.get

2153

for i in xrange(alo, ahi):

2154

# look at all instances of a[i] in b; note that because

2155

# b2j has no junk keys, the loop is skipped if a[i] is junk

2156

j2lenget = j2len.get

2157

newj2len = {}

2158

2159

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

2160

# following improvement

2161

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

2162

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

2163

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

2164

# to

2165

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

2166

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

2167

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

2168

2169

try:

2170

js = b2j[a[i]]

2171

except KeyError:

2172

pass

2173

else:

2174

for j in js:

2175

# a[i] matches b[j]

2176

if j >= blo:

2177

if j >= bhi:

2178

break

2179

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

2180

if k > bestsize:

2181

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

2182

j2len = newj2len

2183

2184

# Extend the best by non-junk elements on each end. In particular,

2185

# "popular" non-junk elements aren't in b2j, which greatly speeds

2186

# the inner loop above, but also means "the best" match so far

2187

# doesn't contain any junk *or* popular non-junk elements.

2188

while besti > alo and bestj > blo and \

2189

not isbjunk(b[bestj-1]) and \

2190

a[besti-1] == b[bestj-1]:

2191

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2192

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2193

not isbjunk(b[bestj+bestsize]) and \

2194

a[besti+bestsize] == b[bestj+bestsize]:

2195

bestsize += 1

2196

2197

# Now that we have a wholly interesting match (albeit possibly

2198

# empty!), we may as well suck up the matching junk on each

2199

# side of it too. Can't think of a good reason not to, and it

2200

# saves post-processing the (possibly considerable) expense of

2201

# figuring out what to do with it. In the case of an empty

2202

# interesting match, this is clearly the right thing to do,

2203

# because no other kind of match is possible in the regions.

2204

while besti > alo and bestj > blo and \

2205

isbjunk(b[bestj-1]) and \

2206

a[besti-1] == b[bestj-1]:

2207

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

2208

while besti+bestsize < ahi and bestj+bestsize < bhi and \

2209

isbjunk(b[bestj+bestsize]) and \

2210

a[besti+bestsize] == b[bestj+bestsize]:

2211

bestsize = bestsize + 1

2212

2213

return besti, bestj, bestsize

2214

2215

2216

try:

2217

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2218

except ImportError:

2219

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »