/brz/remove-bazaar : revision 3890.2.8

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2008-12-11 03:08:03 UTC
mto: This revision was merged to the branch mainline in revision 3895.
Revision ID: john@arbash-meinel.com-20081211030803-gctunob7zsten3qg

Move everything into properly parameterized tests.

Also add tests that we preserve the object when it is already lines.

The compiled form takes 450us on a 7.6k line file (NEWS).
So for common cases, we should have virtually no overhead.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

dir.py

dulwich

dulwich/.bzrignore

dulwich/COPYING

dulwich/Makefile

dulwich/README

dulwich/bin

dulwich/bin/dul-daemon

dulwich/bin/dul-receive-pack

dulwich/bin/dul-upload-pack

dulwich/bin/dulwich

dulwich/docs

dulwich/docs/protocol.txt

dulwich/dulwich

dulwich/dulwich/__init__.py

dulwich/dulwich/client.py

dulwich/dulwich/commit.py

dulwich/dulwich/errors.py

dulwich/dulwich/objects.py

dulwich/dulwich/pack.py

dulwich/dulwich/protocol.py

dulwich/dulwich/repo.py

dulwich/dulwich/server.py

dulwich/dulwich/tests

dulwich/dulwich/tests/__init__.py

dulwich/dulwich/tests/data

dulwich/dulwich/tests/data/blobs

dulwich/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/commits

dulwich/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/packs

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/dulwich/tests/data/repos

dulwich/dulwich/tests/data/repos/a

dulwich/dulwich/tests/data/repos/a/.git

dulwich/dulwich/tests/data/repos/a/.git/HEAD

dulwich/dulwich/tests/data/repos/a/.git/index

dulwich/dulwich/tests/data/repos/a/.git/objects

dulwich/dulwich/tests/data/repos/a/.git/objects/2a

dulwich/dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/dulwich/tests/data/repos/a/.git/objects/4e

dulwich/dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/dulwich/tests/data/repos/a/.git/objects/4f

dulwich/dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/dulwich/tests/data/repos/a/.git/objects/7d

dulwich/dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/dulwich/tests/data/repos/a/.git/objects/a2

dulwich/dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/dulwich/tests/data/repos/a/.git/objects/a9

dulwich/dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/dulwich/tests/data/repos/a/.git/objects/ff

dulwich/dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/dulwich/tests/data/repos/a/.git/objects/info

dulwich/dulwich/tests/data/repos/a/.git/objects/pack

dulwich/dulwich/tests/data/repos/a/.git/refs

dulwich/dulwich/tests/data/repos/a/.git/refs/heads

dulwich/dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/a/.git/refs/tags

dulwich/dulwich/tests/data/repos/a/a

dulwich/dulwich/tests/data/repos/a/b

dulwich/dulwich/tests/data/repos/a/c

dulwich/dulwich/tests/data/repos/ooo_merge

dulwich/dulwich/tests/data/repos/ooo_merge/.git

dulwich/dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/ooo_merge/a

dulwich/dulwich/tests/data/repos/ooo_merge/b

dulwich/dulwich/tests/data/repos/ooo_merge/c

dulwich/dulwich/tests/data/repos/simple_merge

dulwich/dulwich/tests/data/repos/simple_merge/.git

dulwich/dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/simple_merge/.git/index

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/simple_merge/a

dulwich/dulwich/tests/data/repos/simple_merge/b

dulwich/dulwich/tests/data/repos/simple_merge/d

dulwich/dulwich/tests/data/repos/simple_merge/e

dulwich/dulwich/tests/data/trees

dulwich/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/test_objects.py

dulwich/dulwich/tests/test_pack.py

dulwich/dulwich/tests/test_repository.py

dulwich/setup.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

remote.py

repository.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_ids.py

tests/test_repository.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

errors,

osutils,

patiencediff,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

SHA1KnitCorrupt,

100

)

101

from bzrlib.osutils import (

102

contains_whitespace,

103

contains_linebreaks,

104

sha_string,

105

sha_strings,

106

split_lines,

107

)

108

from bzrlib.versionedfile import (

109

AbsentContentFactory,

110

adapter_registry,

111

ConstantMapper,

112

ContentFactory,

113

ChunkedContentFactory,

114

VersionedFile,

115

VersionedFiles,

116

)

117

118

119

# TODO: Split out code specific to this format into an associated object.

120

121

# TODO: Can we put in some kind of value to check that the index and data

122

# files belong together?

123

124

# TODO: accommodate binaries, perhaps by storing a byte count

125

126

# TODO: function to check whole file

127

128

# TODO: atomically append data, then measure backwards from the cursor

129

# position after writing to work out where it was located. we may need to

130

# bypass python file buffering.

131

132

DATA_SUFFIX = '.knit'

133

INDEX_SUFFIX = '.kndx'

134

135

136

class KnitAdapter(object):

137

"""Base class for knit record adaption."""

138

139

def __init__(self, basis_vf):

140

"""Create an adapter which accesses full texts from basis_vf.

141

142

:param basis_vf: A versioned file to access basis texts of deltas from.

143

May be None for adapters that do not need to access basis texts.

144

"""

145

self._data = KnitVersionedFiles(None, None)

146

self._annotate_factory = KnitAnnotateFactory()

147

self._plain_factory = KnitPlainFactory()

148

self._basis_vf = basis_vf

149

150

151

class FTAnnotatedToUnannotated(KnitAdapter):

152

"""An adapter from FT annotated knits to unannotated ones."""

153

154

def get_bytes(self, factory, annotated_compressed_bytes):

155

rec, contents = \

156

self._data._parse_record_unchecked(annotated_compressed_bytes)

157

content = self._annotate_factory.parse_fulltext(contents, rec[1])

158

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

159

return bytes

160

161

162

class DeltaAnnotatedToUnannotated(KnitAdapter):

163

"""An adapter for deltas from annotated to unannotated."""

164

165

def get_bytes(self, factory, annotated_compressed_bytes):

166

rec, contents = \

167

self._data._parse_record_unchecked(annotated_compressed_bytes)

168

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

169

plain=True)

170

contents = self._plain_factory.lower_line_delta(delta)

171

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

172

return bytes

173

174

175

class FTAnnotatedToFullText(KnitAdapter):

176

"""An adapter from FT annotated knits to unannotated ones."""

177

178

def get_bytes(self, factory, annotated_compressed_bytes):

179

rec, contents = \

180

self._data._parse_record_unchecked(annotated_compressed_bytes)

181

content, delta = self._annotate_factory.parse_record(factory.key[-1],

182

contents, factory._build_details, None)

183

return ''.join(content.text())

184

185

186

class DeltaAnnotatedToFullText(KnitAdapter):

187

"""An adapter for deltas from annotated to unannotated."""

188

189

def get_bytes(self, factory, annotated_compressed_bytes):

190

rec, contents = \

191

self._data._parse_record_unchecked(annotated_compressed_bytes)

192

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

193

plain=True)

194

compression_parent = factory.parents[0]

195

basis_entry = self._basis_vf.get_record_stream(

196

[compression_parent], 'unordered', True).next()

197

if basis_entry.storage_kind == 'absent':

198

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

199

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

200

# Manually apply the delta because we have one annotated content and

201

# one plain.

202

basis_content = PlainKnitContent(basis_lines, compression_parent)

203

basis_content.apply_delta(delta, rec[1])

204

basis_content._should_strip_eol = factory._build_details[1]

205

return ''.join(basis_content.text())

206

207

208

class FTPlainToFullText(KnitAdapter):

209

"""An adapter from FT plain knits to unannotated ones."""

210

211

def get_bytes(self, factory, compressed_bytes):

212

rec, contents = \

213

self._data._parse_record_unchecked(compressed_bytes)

214

content, delta = self._plain_factory.parse_record(factory.key[-1],

215

contents, factory._build_details, None)

216

return ''.join(content.text())

217

218

219

class DeltaPlainToFullText(KnitAdapter):

220

"""An adapter for deltas from annotated to unannotated."""

221

222

def get_bytes(self, factory, compressed_bytes):

223

rec, contents = \

224

self._data._parse_record_unchecked(compressed_bytes)

225

delta = self._plain_factory.parse_line_delta(contents, rec[1])

226

compression_parent = factory.parents[0]

227

# XXX: string splitting overhead.

228

basis_entry = self._basis_vf.get_record_stream(

229

[compression_parent], 'unordered', True).next()

230

if basis_entry.storage_kind == 'absent':

231

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

232

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

233

basis_content = PlainKnitContent(basis_lines, compression_parent)

234

# Manually apply the delta because we have one annotated content and

235

# one plain.

236

content, _ = self._plain_factory.parse_record(rec[1], contents,

237

factory._build_details, basis_content)

238

return ''.join(content.text())

239

240

241

class KnitContentFactory(ContentFactory):

242

"""Content factory for streaming from knits.

243

244

:seealso ContentFactory:

245

"""

246

247

def __init__(self, key, parents, build_details, sha1, raw_record,

248

annotated, knit=None):

249

"""Create a KnitContentFactory for key.

250

251

:param key: The key.

252

:param parents: The parents.

253

:param build_details: The build details as returned from

254

get_build_details.

255

:param sha1: The sha1 expected from the full text of this object.

256

:param raw_record: The bytes of the knit data from disk.

257

:param annotated: True if the raw data is annotated.

258

"""

259

ContentFactory.__init__(self)

260

self.sha1 = sha1

261

self.key = key

262

self.parents = parents

263

if build_details[0] == 'line-delta':

264

kind = 'delta'

265

else:

266

kind = 'ft'

267

if annotated:

268

annotated_kind = 'annotated-'

269

else:

270

annotated_kind = ''

271

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

272

self._raw_record = raw_record

273

self._build_details = build_details

274

self._knit = knit

275

276

def get_bytes_as(self, storage_kind):

277

if storage_kind == self.storage_kind:

278

return self._raw_record

279

if self._knit is not None:

280

if storage_kind == 'chunked':

281

return self._knit.get_lines(self.key[0])

282

elif storage_kind == 'fulltext':

283

return self._knit.get_text(self.key[0])

284

raise errors.UnavailableRepresentation(self.key, storage_kind,

285

self.storage_kind)

286

287

288

class KnitContent(object):

289

"""Content of a knit version to which deltas can be applied.

290

291

This is always stored in memory as a list of lines with \n at the end,

292

plus a flag saying if the final ending is really there or not, because that

293

corresponds to the on-disk knit representation.

294

"""

295

296

def __init__(self):

297

self._should_strip_eol = False

298

299

def apply_delta(self, delta, new_version_id):

300

"""Apply delta to this object to become new_version_id."""

301

raise NotImplementedError(self.apply_delta)

302

303

def line_delta_iter(self, new_lines):

304

"""Generate line-based delta from this content to new_lines."""

305

new_texts = new_lines.text()

306

old_texts = self.text()

307

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

308

for tag, i1, i2, j1, j2 in s.get_opcodes():

309

if tag == 'equal':

310

continue

311

# ofrom, oto, length, data

312

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

313

314

def line_delta(self, new_lines):

315

return list(self.line_delta_iter(new_lines))

316

317

@staticmethod

318

def get_line_delta_blocks(knit_delta, source, target):

319

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

320

target_len = len(target)

321

s_pos = 0

322

t_pos = 0

323

for s_begin, s_end, t_len, new_text in knit_delta:

324

true_n = s_begin - s_pos

325

n = true_n

326

if n > 0:

327

# knit deltas do not provide reliable info about whether the

328

# last line of a file matches, due to eol handling.

329

if source[s_pos + n -1] != target[t_pos + n -1]:

330

n-=1

331

if n > 0:

332

yield s_pos, t_pos, n

333

t_pos += t_len + true_n

334

s_pos = s_end

335

n = target_len - t_pos

336

if n > 0:

337

if source[s_pos + n -1] != target[t_pos + n -1]:

338

n-=1

339

if n > 0:

340

yield s_pos, t_pos, n

341

yield s_pos + (target_len - t_pos), target_len, 0

342

343

344

class AnnotatedKnitContent(KnitContent):

345

"""Annotated content."""

346

347

def __init__(self, lines):

348

KnitContent.__init__(self)

349

self._lines = lines

350

351

def annotate(self):

352

"""Return a list of (origin, text) for each content line."""

353

lines = self._lines[:]

354

if self._should_strip_eol:

355

origin, last_line = lines[-1]

356

lines[-1] = (origin, last_line.rstrip('\n'))

357

return lines

358

359

def apply_delta(self, delta, new_version_id):

360

"""Apply delta to this object to become new_version_id."""

361

offset = 0

362

lines = self._lines

363

for start, end, count, delta_lines in delta:

364

lines[offset+start:offset+end] = delta_lines

365

offset = offset + (start - end) + count

366

367

def text(self):

368

try:

369

lines = [text for origin, text in self._lines]

370

except ValueError, e:

371

# most commonly (only?) caused by the internal form of the knit

372

# missing annotation information because of a bug - see thread

373

# around 20071015

374

raise KnitCorrupt(self,

375

"line in annotated knit missing annotation information: %s"

376

% (e,))

377

if self._should_strip_eol:

378

lines[-1] = lines[-1].rstrip('\n')

379

return lines

380

381

def copy(self):

382

return AnnotatedKnitContent(self._lines[:])

383

384

385

class PlainKnitContent(KnitContent):

386

"""Unannotated content.

387

388

When annotate[_iter] is called on this content, the same version is reported

389

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

390

objects.

391

"""

392

393

def __init__(self, lines, version_id):

394

KnitContent.__init__(self)

395

self._lines = lines

396

self._version_id = version_id

397

398

def annotate(self):

399

"""Return a list of (origin, text) for each content line."""

400

return [(self._version_id, line) for line in self._lines]

401

402

def apply_delta(self, delta, new_version_id):

403

"""Apply delta to this object to become new_version_id."""

404

offset = 0

405

lines = self._lines

406

for start, end, count, delta_lines in delta:

407

lines[offset+start:offset+end] = delta_lines

408

offset = offset + (start - end) + count

409

self._version_id = new_version_id

410

411

def copy(self):

412

return PlainKnitContent(self._lines[:], self._version_id)

413

414

def text(self):

415

lines = self._lines

416

if self._should_strip_eol:

417

lines = lines[:]

418

lines[-1] = lines[-1].rstrip('\n')

419

return lines

420

421

422

class _KnitFactory(object):

423

"""Base class for common Factory functions."""

424

425

def parse_record(self, version_id, record, record_details,

426

base_content, copy_base_content=True):

427

"""Parse a record into a full content object.

428

429

:param version_id: The official version id for this content

430

:param record: The data returned by read_records_iter()

431

:param record_details: Details about the record returned by

432

get_build_details

433

:param base_content: If get_build_details returns a compression_parent,

434

you must return a base_content here, else use None

435

:param copy_base_content: When building from the base_content, decide

436

you can either copy it and return a new object, or modify it in

437

place.

438

:return: (content, delta) A Content object and possibly a line-delta,

439

delta may be None

440

"""

441

method, noeol = record_details

442

if method == 'line-delta':

443

if copy_base_content:

444

content = base_content.copy()

445

else:

446

content = base_content

447

delta = self.parse_line_delta(record, version_id)

448

content.apply_delta(delta, version_id)

449

else:

450

content = self.parse_fulltext(record, version_id)

451

delta = None

452

content._should_strip_eol = noeol

453

return (content, delta)

454

455

456

class KnitAnnotateFactory(_KnitFactory):

457

"""Factory for creating annotated Content objects."""

458

459

annotated = True

460

461

def make(self, lines, version_id):

462

num_lines = len(lines)

463

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

464

465

def parse_fulltext(self, content, version_id):

466

"""Convert fulltext to internal representation

467

468

fulltext content is of the format

469

revid(utf8) plaintext\n

470

internal representation is of the format:

471

(revid, plaintext)

472

"""

473

# TODO: jam 20070209 The tests expect this to be returned as tuples,

474

# but the code itself doesn't really depend on that.

475

# Figure out a way to not require the overhead of turning the

476

# list back into tuples.

477

lines = [tuple(line.split(' ', 1)) for line in content]

478

return AnnotatedKnitContent(lines)

479

480

def parse_line_delta_iter(self, lines):

481

return iter(self.parse_line_delta(lines))

482

483

def parse_line_delta(self, lines, version_id, plain=False):

484

"""Convert a line based delta into internal representation.

485

486

line delta is in the form of:

487

intstart intend intcount

488

1..count lines:

489

revid(utf8) newline\n

490

internal representation is

491

(start, end, count, [1..count tuples (revid, newline)])

492

493

:param plain: If True, the lines are returned as a plain

494

list without annotations, not as a list of (origin, content) tuples, i.e.

495

(start, end, count, [1..count newline])

496

"""

497

result = []

498

lines = iter(lines)

499

next = lines.next

500

501

cache = {}

502

def cache_and_return(line):

503

origin, text = line.split(' ', 1)

504

return cache.setdefault(origin, origin), text

505

506

# walk through the lines parsing.

507

# Note that the plain test is explicitly pulled out of the

508

# loop to minimise any performance impact

509

if plain:

510

for header in lines:

511

start, end, count = [int(n) for n in header.split(',')]

512

contents = [next().split(' ', 1)[1] for i in xrange(count)]

513

result.append((start, end, count, contents))

514

else:

515

for header in lines:

516

start, end, count = [int(n) for n in header.split(',')]

517

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

518

result.append((start, end, count, contents))

519

return result

520

521

def get_fulltext_content(self, lines):

522

"""Extract just the content lines from a fulltext."""

523

return (line.split(' ', 1)[1] for line in lines)

524

525

def get_linedelta_content(self, lines):

526

"""Extract just the content from a line delta.

527

528

This doesn't return all of the extra information stored in a delta.

529

Only the actual content lines.

530

"""

531

lines = iter(lines)

532

next = lines.next

533

for header in lines:

534

header = header.split(',')

535

count = int(header[2])

536

for i in xrange(count):

537

origin, text = next().split(' ', 1)

538

yield text

539

540

def lower_fulltext(self, content):

541

"""convert a fulltext content record into a serializable form.

542

543

see parse_fulltext which this inverts.

544

"""

545

# TODO: jam 20070209 We only do the caching thing to make sure that

546

# the origin is a valid utf-8 line, eventually we could remove it

547

return ['%s %s' % (o, t) for o, t in content._lines]

548

549

def lower_line_delta(self, delta):

550

"""convert a delta into a serializable form.

551

552

See parse_line_delta which this inverts.

553

"""

554

# TODO: jam 20070209 We only do the caching thing to make sure that

555

# the origin is a valid utf-8 line, eventually we could remove it

556

out = []

557

for start, end, c, lines in delta:

558

out.append('%d,%d,%d\n' % (start, end, c))

559

out.extend(origin + ' ' + text

560

for origin, text in lines)

561

return out

562

563

def annotate(self, knit, key):

564

content = knit._get_content(key)

565

# adjust for the fact that serialised annotations are only key suffixes

566

# for this factory.

567

if type(key) == tuple:

568

prefix = key[:-1]

569

origins = content.annotate()

570

result = []

571

for origin, line in origins:

572

result.append((prefix + (origin,), line))

573

return result

574

else:

575

# XXX: This smells a bit. Why would key ever be a non-tuple here?

576

# Aren't keys defined to be tuples? -- spiv 20080618

577

return content.annotate()

578

579

580

class KnitPlainFactory(_KnitFactory):

581

"""Factory for creating plain Content objects."""

582

583

annotated = False

584

585

def make(self, lines, version_id):

586

return PlainKnitContent(lines, version_id)

587

588

def parse_fulltext(self, content, version_id):

589

"""This parses an unannotated fulltext.

590

591

Note that this is not a noop - the internal representation

592

has (versionid, line) - its just a constant versionid.

593

"""

594

return self.make(content, version_id)

595

596

def parse_line_delta_iter(self, lines, version_id):

597

cur = 0

598

num_lines = len(lines)

599

while cur < num_lines:

600

header = lines[cur]

601

cur += 1

602

start, end, c = [int(n) for n in header.split(',')]

603

yield start, end, c, lines[cur:cur+c]

604

cur += c

605

606

def parse_line_delta(self, lines, version_id):

607

return list(self.parse_line_delta_iter(lines, version_id))

608

609

def get_fulltext_content(self, lines):

610

"""Extract just the content lines from a fulltext."""

611

return iter(lines)

612

613

def get_linedelta_content(self, lines):

614

"""Extract just the content from a line delta.

615

616

This doesn't return all of the extra information stored in a delta.

617

Only the actual content lines.

618

"""

619

lines = iter(lines)

620

next = lines.next

621

for header in lines:

622

header = header.split(',')

623

count = int(header[2])

624

for i in xrange(count):

625

yield next()

626

627

def lower_fulltext(self, content):

628

return content.text()

629

630

def lower_line_delta(self, delta):

631

out = []

632

for start, end, c, lines in delta:

633

out.append('%d,%d,%d\n' % (start, end, c))

634

out.extend(lines)

635

return out

636

637

def annotate(self, knit, key):

638

annotator = _KnitAnnotator(knit)

639

return annotator.annotate(key)

640

641

642

643

def make_file_factory(annotated, mapper):

644

"""Create a factory for creating a file based KnitVersionedFiles.

645

646

This is only functional enough to run interface tests, it doesn't try to

647

provide a full pack environment.

648

649

:param annotated: knit annotations are wanted.

650

:param mapper: The mapper from keys to paths.

651

"""

652

def factory(transport):

653

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

654

access = _KnitKeyAccess(transport, mapper)

655

return KnitVersionedFiles(index, access, annotated=annotated)

656

return factory

657

658

659

def make_pack_factory(graph, delta, keylength):

660

"""Create a factory for creating a pack based VersionedFiles.

661

662

This is only functional enough to run interface tests, it doesn't try to

663

provide a full pack environment.

664

665

:param graph: Store a graph.

666

:param delta: Delta compress contents.

667

:param keylength: How long should keys be.

668

"""

669

def factory(transport):

670

parents = graph or delta

671

ref_length = 0

672

if graph:

673

ref_length += 1

674

if delta:

675

ref_length += 1

676

max_delta_chain = 200

677

else:

678

max_delta_chain = 0

679

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

680

key_elements=keylength)

681

stream = transport.open_write_stream('newpack')

682

writer = pack.ContainerWriter(stream.write)

683

writer.begin()

684

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

685

deltas=delta, add_callback=graph_index.add_nodes)

686

access = _DirectPackAccess({})

687

access.set_writer(writer, graph_index, (transport, 'newpack'))

688

result = KnitVersionedFiles(index, access,

689

max_delta_chain=max_delta_chain)

690

result.stream = stream

691

result.writer = writer

692

return result

693

return factory

694

695

696

def cleanup_pack_knit(versioned_files):

697

versioned_files.stream.close()

698

versioned_files.writer.end()

699

700

701

class KnitVersionedFiles(VersionedFiles):

702

"""Storage for many versioned files using knit compression.

703

704

Backend storage is managed by indices and data objects.

705

706

:ivar _index: A _KnitGraphIndex or similar that can describe the

707

parents, graph, compression and data location of entries in this

708

KnitVersionedFiles. Note that this is only the index for

709

*this* vfs; if there are fallbacks they must be queried separately.

710

"""

711

712

def __init__(self, index, data_access, max_delta_chain=200,

713

annotated=False, reload_func=None):

714

"""Create a KnitVersionedFiles with index and data_access.

715

716

:param index: The index for the knit data.

717

:param data_access: The access object to store and retrieve knit

718

records.

719

:param max_delta_chain: The maximum number of deltas to permit during

720

insertion. Set to 0 to prohibit the use of deltas.

721

:param annotated: Set to True to cause annotations to be calculated and

722

stored during insertion.

723

:param reload_func: An function that can be called if we think we need

724

to reload the pack listing and try again. See

725

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

726

"""

727

self._index = index

728

self._access = data_access

729

self._max_delta_chain = max_delta_chain

730

if annotated:

731

self._factory = KnitAnnotateFactory()

732

else:

733

self._factory = KnitPlainFactory()

734

self._fallback_vfs = []

735

self._reload_func = reload_func

736

737

def __repr__(self):

738

return "%s(%r, %r)" % (

739

self.__class__.__name__,

740

self._index,

741

self._access)

742

743

def add_fallback_versioned_files(self, a_versioned_files):

744

"""Add a source of texts for texts not present in this knit.

745

746

:param a_versioned_files: A VersionedFiles object.

747

"""

748

self._fallback_vfs.append(a_versioned_files)

749

750

def add_lines(self, key, parents, lines, parent_texts=None,

751

left_matching_blocks=None, nostore_sha=None, random_id=False,

752

check_content=True):

753

"""See VersionedFiles.add_lines()."""

754

self._index._check_write_ok()

755

self._check_add(key, lines, random_id, check_content)

756

if parents is None:

757

# The caller might pass None if there is no graph data, but kndx

758

# indexes can't directly store that, so we give them

759

# an empty tuple instead.

760

parents = ()

761

return self._add(key, lines, parents,

762

parent_texts, left_matching_blocks, nostore_sha, random_id)

763

764

def _add(self, key, lines, parents, parent_texts,

765

left_matching_blocks, nostore_sha, random_id):

766

"""Add a set of lines on top of version specified by parents.

767

768

Any versions not present will be converted into ghosts.

769

"""

770

# first thing, if the content is something we don't need to store, find

771

# that out.

772

line_bytes = ''.join(lines)

773

digest = sha_string(line_bytes)

774

if nostore_sha == digest:

775

raise errors.ExistingContent

776

777

present_parents = []

778

if parent_texts is None:

779

parent_texts = {}

780

# Do a single query to ascertain parent presence; we only compress

781

# against parents in the same kvf.

782

present_parent_map = self._index.get_parent_map(parents)

783

for parent in parents:

784

if parent in present_parent_map:

785

present_parents.append(parent)

786

787

# Currently we can only compress against the left most present parent.

788

if (len(present_parents) == 0 or

789

present_parents[0] != parents[0]):

790

delta = False

791

else:

792

# To speed the extract of texts the delta chain is limited

793

# to a fixed number of deltas. This should minimize both

794

# I/O and the time spend applying deltas.

795

delta = self._check_should_delta(present_parents[0])

796

797

text_length = len(line_bytes)

798

options = []

799

if lines:

800

if lines[-1][-1] != '\n':

801

# copy the contents of lines.

802

lines = lines[:]

803

options.append('no-eol')

804

lines[-1] = lines[-1] + '\n'

805

line_bytes += '\n'

806

807

for element in key:

808

if type(element) != str:

809

raise TypeError("key contains non-strings: %r" % (key,))

810

# Knit hunks are still last-element only

811

version_id = key[-1]

812

content = self._factory.make(lines, version_id)

813

if 'no-eol' in options:

814

# Hint to the content object that its text() call should strip the

815

# EOL.

816

content._should_strip_eol = True

817

if delta or (self._factory.annotated and len(present_parents) > 0):

818

# Merge annotations from parent texts if needed.

819

delta_hunks = self._merge_annotations(content, present_parents,

820

parent_texts, delta, self._factory.annotated,

821

left_matching_blocks)

822

823

if delta:

824

options.append('line-delta')

825

store_lines = self._factory.lower_line_delta(delta_hunks)

826

size, bytes = self._record_to_data(key, digest,

827

store_lines)

828

else:

829

options.append('fulltext')

830

# isinstance is slower and we have no hierarchy.

831

if self._factory.__class__ == KnitPlainFactory:

832

# Use the already joined bytes saving iteration time in

833

# _record_to_data.

834

size, bytes = self._record_to_data(key, digest,

835

lines, [line_bytes])

836

else:

837

# get mixed annotation + content and feed it into the

838

# serialiser.

839

store_lines = self._factory.lower_fulltext(content)

840

size, bytes = self._record_to_data(key, digest,

841

store_lines)

842

843

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

844

self._index.add_records(

845

((key, options, access_memo, parents),),

846

random_id=random_id)

847

return digest, text_length, content

848

849

def annotate(self, key):

850

"""See VersionedFiles.annotate."""

851

return self._factory.annotate(self, key)

852

853

def check(self, progress_bar=None):

854

"""See VersionedFiles.check()."""

855

# This doesn't actually test extraction of everything, but that will

856

# impact 'bzr check' substantially, and needs to be integrated with

857

# care. However, it does check for the obvious problem of a delta with

858

# no basis.

859

keys = self._index.keys()

860

parent_map = self.get_parent_map(keys)

861

for key in keys:

862

if self._index.get_method(key) != 'fulltext':

863

compression_parent = parent_map[key][0]

864

if compression_parent not in parent_map:

865

raise errors.KnitCorrupt(self,

866

"Missing basis parent %s for %s" % (

867

compression_parent, key))

868

for fallback_vfs in self._fallback_vfs:

869

fallback_vfs.check()

870

871

def _check_add(self, key, lines, random_id, check_content):

872

"""check that version_id and lines are safe to add."""

873

version_id = key[-1]

874

if contains_whitespace(version_id):

875

raise InvalidRevisionId(version_id, self)

876

self.check_not_reserved_id(version_id)

877

# TODO: If random_id==False and the key is already present, we should

878

# probably check that the existing content is identical to what is

879

# being inserted, and otherwise raise an exception. This would make

880

# the bundle code simpler.

881

if check_content:

882

self._check_lines_not_unicode(lines)

883

self._check_lines_are_lines(lines)

884

885

def _check_header(self, key, line):

886

rec = self._split_header(line)

887

self._check_header_version(rec, key[-1])

888

return rec

889

890

def _check_header_version(self, rec, version_id):

891

"""Checks the header version on original format knit records.

892

893

These have the last component of the key embedded in the record.

894

"""

895

if rec[1] != version_id:

896

raise KnitCorrupt(self,

897

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

898

899

def _check_should_delta(self, parent):

900

"""Iterate back through the parent listing, looking for a fulltext.

901

902

This is used when we want to decide whether to add a delta or a new

903

fulltext. It searches for _max_delta_chain parents. When it finds a

904

fulltext parent, it sees if the total size of the deltas leading up to

905

it is large enough to indicate that we want a new full text anyway.

906

907

Return True if we should create a new delta, False if we should use a

908

full text.

909

"""

910

delta_size = 0

911

fulltext_size = None

912

for count in xrange(self._max_delta_chain):

913

# XXX: Collapse these two queries:

914

try:

915

# Note that this only looks in the index of this particular

916

# KnitVersionedFiles, not in the fallbacks. This ensures that

917

# we won't store a delta spanning physical repository

918

# boundaries.

919

method = self._index.get_method(parent)

920

except RevisionNotPresent:

921

# Some basis is not locally present: always delta

922

return False

923

index, pos, size = self._index.get_position(parent)

924

if method == 'fulltext':

925

fulltext_size = size

926

break

927

delta_size += size

928

# We don't explicitly check for presence because this is in an

929

# inner loop, and if it's missing it'll fail anyhow.

930

# TODO: This should be asking for compression parent, not graph

931

# parent.

932

parent = self._index.get_parent_map([parent])[parent][0]

933

else:

934

# We couldn't find a fulltext, so we must create a new one

935

return False

936

# Simple heuristic - if the total I/O wold be greater as a delta than

937

# the originally installed fulltext, we create a new fulltext.

938

return fulltext_size > delta_size

939

940

def _build_details_to_components(self, build_details):

941

"""Convert a build_details tuple to a position tuple."""

942

# record_details, access_memo, compression_parent

943

return build_details[3], build_details[0], build_details[1]

944

945

def _get_components_positions(self, keys, allow_missing=False):

946

"""Produce a map of position data for the components of keys.

947

948

This data is intended to be used for retrieving the knit records.

949

950

A dict of key to (record_details, index_memo, next, parents) is

951

returned.

952

method is the way referenced data should be applied.

953

index_memo is the handle to pass to the data access to actually get the

954

data

955

next is the build-parent of the version, or None for fulltexts.

956

parents is the version_ids of the parents of this version

957

958

:param allow_missing: If True do not raise an error on a missing component,

959

just ignore it.

960

"""

961

component_data = {}

962

pending_components = keys

963

while pending_components:

964

build_details = self._index.get_build_details(pending_components)

965

current_components = set(pending_components)

966

pending_components = set()

967

for key, details in build_details.iteritems():

968

(index_memo, compression_parent, parents,

969

record_details) = details

970

method = record_details[0]

971

if compression_parent is not None:

972

pending_components.add(compression_parent)

973

component_data[key] = self._build_details_to_components(details)

974

missing = current_components.difference(build_details)

975

if missing and not allow_missing:

976

raise errors.RevisionNotPresent(missing.pop(), self)

977

return component_data

978

979

def _get_content(self, key, parent_texts={}):

980

"""Returns a content object that makes up the specified

981

version."""

982

cached_version = parent_texts.get(key, None)

983

if cached_version is not None:

984

# Ensure the cache dict is valid.

985

if not self.get_parent_map([key]):

986

raise RevisionNotPresent(key, self)

987

return cached_version

988

text_map, contents_map = self._get_content_maps([key])

989

return contents_map[key]

990

991

def _get_content_maps(self, keys, nonlocal_keys=None):

992

"""Produce maps of text and KnitContents

993

994

:param keys: The keys to produce content maps for.

995

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

996

which are known to not be in this knit, but rather in one of the

997

fallback knits.

998

:return: (text_map, content_map) where text_map contains the texts for

999

the requested versions and content_map contains the KnitContents.

1000

"""

1001

# FUTURE: This function could be improved for the 'extract many' case

1002

# by tracking each component and only doing the copy when the number of

1003

# children than need to apply delta's to it is > 1 or it is part of the

1004

# final output.

1005

keys = list(keys)

1006

multiple_versions = len(keys) != 1

1007

record_map = self._get_record_map(keys, allow_missing=True)

1008

1009

text_map = {}

1010

content_map = {}

1011

final_content = {}

1012

if nonlocal_keys is None:

1013

nonlocal_keys = set()

1014

else:

1015

nonlocal_keys = frozenset(nonlocal_keys)

1016

missing_keys = set(nonlocal_keys)

1017

for source in self._fallback_vfs:

1018

if not missing_keys:

1019

break

1020

for record in source.get_record_stream(missing_keys,

1021

'unordered', True):

1022

if record.storage_kind == 'absent':

1023

continue

1024

missing_keys.remove(record.key)

1025

lines = split_lines(record.get_bytes_as('fulltext'))

1026

text_map[record.key] = lines

1027

content_map[record.key] = PlainKnitContent(lines, record.key)

1028

if record.key in keys:

1029

final_content[record.key] = content_map[record.key]

1030

for key in keys:

1031

if key in nonlocal_keys:

1032

# already handled

1033

continue

1034

components = []

1035

cursor = key

1036

while cursor is not None:

1037

try:

1038

record, record_details, digest, next = record_map[cursor]

1039

except KeyError:

1040

raise RevisionNotPresent(cursor, self)

1041

components.append((cursor, record, record_details, digest))

1042

cursor = next

1043

if cursor in content_map:

1044

# no need to plan further back

1045

components.append((cursor, None, None, None))

1046

break

1047

1048

content = None

1049

for (component_id, record, record_details,

1050

digest) in reversed(components):

1051

if component_id in content_map:

1052

content = content_map[component_id]

1053

else:

1054

content, delta = self._factory.parse_record(key[-1],

1055

record, record_details, content,

1056

copy_base_content=multiple_versions)

1057

if multiple_versions:

1058

content_map[component_id] = content

1059

1060

final_content[key] = content

1061

1062

# digest here is the digest from the last applied component.

1063

text = content.text()

1064

actual_sha = sha_strings(text)

1065

if actual_sha != digest:

1066

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

1067

text_map[key] = text

1068

return text_map, final_content

1069

1070

def get_parent_map(self, keys):

1071

"""Get a map of the graph parents of keys.

1072

1073

:param keys: The keys to look up parents for.

1074

:return: A mapping from keys to parents. Absent keys are absent from

1075

the mapping.

1076

"""

1077

return self._get_parent_map_with_sources(keys)[0]

1078

1079

def _get_parent_map_with_sources(self, keys):

1080

"""Get a map of the parents of keys.

1081

1082

:param keys: The keys to look up parents for.

1083

:return: A tuple. The first element is a mapping from keys to parents.

1084

Absent keys are absent from the mapping. The second element is a

1085

list with the locations each key was found in. The first element

1086

is the in-this-knit parents, the second the first fallback source,

1087

and so on.

1088

"""

1089

result = {}

1090

sources = [self._index] + self._fallback_vfs

1091

source_results = []

1092

missing = set(keys)

1093

for source in sources:

1094

if not missing:

1095

break

1096

new_result = source.get_parent_map(missing)

1097

source_results.append(new_result)

1098

result.update(new_result)

1099

missing.difference_update(set(new_result))

1100

return result, source_results

1101

1102

def _get_record_map(self, keys, allow_missing=False):

1103

"""Produce a dictionary of knit records.

1104

1105

:return: {key:(record, record_details, digest, next)}

1106

record

1107

data returned from read_records

1108

record_details

1109

opaque information to pass to parse_record

1110

digest

1111

SHA1 digest of the full text after all steps are done

1112

1113

build-parent of the version, i.e. the leftmost ancestor.

1114

Will be None if the record is not a delta.

1115

:param keys: The keys to build a map for

1116

:param allow_missing: If some records are missing, rather than

1117

error, just return the data that could be generated.

1118

"""

1119

# This retries the whole request if anything fails. Potentially we

1120

# could be a bit more selective. We could track the keys whose records

1121

# we have successfully found, and then only request the new records

1122

# from there. However, _get_components_positions grabs the whole build

1123

# chain, which means we'll likely try to grab the same records again

1124

# anyway. Also, can the build chains change as part of a pack

1125

# operation? We wouldn't want to end up with a broken chain.

1126

while True:

1127

try:

1128

position_map = self._get_components_positions(keys,

1129

allow_missing=allow_missing)

1130

# key = component_id, r = record_details, i_m = index_memo,

1131

# n = next

1132

records = [(key, i_m) for key, (r, i_m, n)

1133

in position_map.iteritems()]

1134

record_map = {}

1135

for key, record, digest in self._read_records_iter(records):

1136

(record_details, index_memo, next) = position_map[key]

1137

record_map[key] = record, record_details, digest, next

1138

return record_map

1139

except errors.RetryWithNewPacks, e:

1140

self._access.reload_or_raise(e)

1141

1142

def _split_by_prefix(self, keys):

1143

"""For the given keys, split them up based on their prefix.

1144

1145

To keep memory pressure somewhat under control, split the

1146

requests back into per-file-id requests, otherwise "bzr co"

1147

extracts the full tree into memory before writing it to disk.

1148

This should be revisited if _get_content_maps() can ever cross

1149

file-id boundaries.

1150

1151

:param keys: An iterable of key tuples

1152

:return: A dict of {prefix: [key_list]}

1153

"""

1154

split_by_prefix = {}

1155

for key in keys:

1156

if len(key) == 1:

1157

split_by_prefix.setdefault('', []).append(key)

1158

else:

1159

split_by_prefix.setdefault(key[0], []).append(key)

1160

return split_by_prefix

1161

1162

def get_record_stream(self, keys, ordering, include_delta_closure):

1163

"""Get a stream of records for keys.

1164

1165

:param keys: The keys to include.

1166

:param ordering: Either 'unordered' or 'topological'. A topologically

1167

sorted stream has compression parents strictly before their

1168

children.

1169

:param include_delta_closure: If True then the closure across any

1170

compression parents will be included (in the opaque data).

1171

:return: An iterator of ContentFactory objects, each of which is only

1172

valid until the iterator is advanced.

1173

"""

1174

# keys might be a generator

1175

keys = set(keys)

1176

if not keys:

1177

return

1178

if not self._index.has_graph:

1179

# Cannot topological order when no graph has been stored.

1180

ordering = 'unordered'

1181

1182

remaining_keys = keys

1183

while True:

1184

try:

1185

keys = set(remaining_keys)

1186

for content_factory in self._get_remaining_record_stream(keys,

1187

ordering, include_delta_closure):

1188

remaining_keys.discard(content_factory.key)

1189

yield content_factory

1190

return

1191

except errors.RetryWithNewPacks, e:

1192

self._access.reload_or_raise(e)

1193

1194

def _get_remaining_record_stream(self, keys, ordering,

1195

include_delta_closure):

1196

"""This function is the 'retry' portion for get_record_stream."""

1197

if include_delta_closure:

1198

positions = self._get_components_positions(keys, allow_missing=True)

1199

else:

1200

build_details = self._index.get_build_details(keys)

1201

# map from key to

1202

# (record_details, access_memo, compression_parent_key)

1203

positions = dict((key, self._build_details_to_components(details))

1204

for key, details in build_details.iteritems())

1205

absent_keys = keys.difference(set(positions))

1206

# There may be more absent keys : if we're missing the basis component

1207

# and are trying to include the delta closure.

1208

if include_delta_closure:

1209

needed_from_fallback = set()

1210

# Build up reconstructable_keys dict. key:True in this dict means

1211

# the key can be reconstructed.

1212

reconstructable_keys = {}

1213

for key in keys:

1214

# the delta chain

1215

try:

1216

chain = [key, positions[key][2]]

1217

except KeyError:

1218

needed_from_fallback.add(key)

1219

continue

1220

result = True

1221

while chain[-1] is not None:

1222

if chain[-1] in reconstructable_keys:

1223

result = reconstructable_keys[chain[-1]]

1224

break

1225

else:

1226

try:

1227

chain.append(positions[chain[-1]][2])

1228

except KeyError:

1229

# missing basis component

1230

needed_from_fallback.add(chain[-1])

1231

result = True

1232

break

1233

for chain_key in chain[:-1]:

1234

reconstructable_keys[chain_key] = result

1235

if not result:

1236

needed_from_fallback.add(key)

1237

# Double index lookups here : need a unified api ?

1238

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1239

if ordering == 'topological':

1240

# Global topological sort

1241

present_keys = tsort.topo_sort(global_map)

1242

# Now group by source:

1243

source_keys = []

1244

current_source = None

1245

for key in present_keys:

1246

for parent_map in parent_maps:

1247

if key in parent_map:

1248

key_source = parent_map

1249

break

1250

if current_source is not key_source:

1251

source_keys.append((key_source, []))

1252

current_source = key_source

1253

source_keys[-1][1].append(key)

1254

else:

1255

if ordering != 'unordered':

1256

raise AssertionError('valid values for ordering are:'

1257

' "unordered" or "topological" not: %r'

1258

% (ordering,))

1259

# Just group by source; remote sources first.

1260

present_keys = []

1261

source_keys = []

1262

for parent_map in reversed(parent_maps):

1263

source_keys.append((parent_map, []))

1264

for key in parent_map:

1265

present_keys.append(key)

1266

source_keys[-1][1].append(key)

1267

# We have been requested to return these records in an order that

1268

# suits us. So we ask the index to give us an optimally sorted

1269

# order.

1270

for source, sub_keys in source_keys:

1271

if source is parent_maps[0]:

1272

# Only sort the keys for this VF

1273

self._index._sort_keys_by_io(sub_keys, positions)

1274

absent_keys = keys - set(global_map)

1275

for key in absent_keys:

1276

yield AbsentContentFactory(key)

1277

# restrict our view to the keys we can answer.

1278

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1279

# XXX: At that point we need to consider the impact of double reads by

1280

# utilising components multiple times.

1281

if include_delta_closure:

1282

# XXX: get_content_maps performs its own index queries; allow state

1283

# to be passed in.

1284

non_local_keys = needed_from_fallback - absent_keys

1285

prefix_split_keys = self._split_by_prefix(present_keys)

1286

prefix_split_non_local_keys = self._split_by_prefix(non_local_keys)

1287

for prefix, keys in prefix_split_keys.iteritems():

1288

non_local = prefix_split_non_local_keys.get(prefix, [])

1289

non_local = set(non_local)

1290

text_map, _ = self._get_content_maps(keys, non_local)

1291

for key in keys:

1292

lines = text_map.pop(key)

1293

yield ChunkedContentFactory(key, global_map[key], None,

1294

lines)

1295

else:

1296

for source, keys in source_keys:

1297

if source is parent_maps[0]:

1298

# this KnitVersionedFiles

1299

records = [(key, positions[key][1]) for key in keys]

1300

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1301

(record_details, index_memo, _) = positions[key]

1302

yield KnitContentFactory(key, global_map[key],

1303

record_details, sha1, raw_data, self._factory.annotated, None)

1304

else:

1305

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1306

for record in vf.get_record_stream(keys, ordering,

1307

include_delta_closure):

1308

yield record

1309

1310

def get_sha1s(self, keys):

1311

"""See VersionedFiles.get_sha1s()."""

1312

missing = set(keys)

1313

record_map = self._get_record_map(missing, allow_missing=True)

1314

result = {}

1315

for key, details in record_map.iteritems():

1316

if key not in missing:

1317

continue

1318

# record entry 2 is the 'digest'.

1319

result[key] = details[2]

1320

missing.difference_update(set(result))

1321

for source in self._fallback_vfs:

1322

if not missing:

1323

break

1324

new_result = source.get_sha1s(missing)

1325

result.update(new_result)

1326

missing.difference_update(set(new_result))

1327

return result

1328

1329

def insert_record_stream(self, stream):

1330

"""Insert a record stream into this container.

1331

1332

:param stream: A stream of records to insert.

1333

:return: None

1334

:seealso VersionedFiles.get_record_stream:

1335

"""

1336

def get_adapter(adapter_key):

1337

try:

1338

return adapters[adapter_key]

1339

except KeyError:

1340

adapter_factory = adapter_registry.get(adapter_key)

1341

adapter = adapter_factory(self)

1342

adapters[adapter_key] = adapter

1343

return adapter

1344

delta_types = set()

1345

if self._factory.annotated:

1346

# self is annotated, we need annotated knits to use directly.

1347

annotated = "annotated-"

1348

convertibles = []

1349

else:

1350

# self is not annotated, but we can strip annotations cheaply.

1351

annotated = ""

1352

convertibles = set(["knit-annotated-ft-gz"])

1353

if self._max_delta_chain:

1354

delta_types.add("knit-annotated-delta-gz")

1355

convertibles.add("knit-annotated-delta-gz")

1356

# The set of types we can cheaply adapt without needing basis texts.

1357

native_types = set()

1358

if self._max_delta_chain:

1359

native_types.add("knit-%sdelta-gz" % annotated)

1360

delta_types.add("knit-%sdelta-gz" % annotated)

1361

native_types.add("knit-%sft-gz" % annotated)

1362

knit_types = native_types.union(convertibles)

1363

adapters = {}

1364

# Buffer all index entries that we can't add immediately because their

1365

# basis parent is missing. We don't buffer all because generating

1366

# annotations may require access to some of the new records. However we

1367

# can't generate annotations from new deltas until their basis parent

1368

# is present anyway, so we get away with not needing an index that

1369

# includes the new keys.

1370

1371

# See <http://launchpad.net/bugs/300177> about ordering of compression

1372

# parents in the records - to be conservative, we insist that all

1373

# parents must be present to avoid expanding to a fulltext.

1374

1375

# key = basis_parent, value = index entry to add

1376

buffered_index_entries = {}

1377

for record in stream:

1378

parents = record.parents

1379

if record.storage_kind in delta_types:

1380

# TODO: eventually the record itself should track

1381

# compression_parent

1382

compression_parent = parents[0]

1383

else:

1384

compression_parent = None

1385

# Raise an error when a record is missing.

1386

if record.storage_kind == 'absent':

1387

raise RevisionNotPresent([record.key], self)

1388

elif ((record.storage_kind in knit_types)

1389

and (compression_parent is None

1390

or not self._fallback_vfs

1391

or self._index.has_key(compression_parent)

1392

or not self.has_key(compression_parent))):

1393

# we can insert the knit record literally if either it has no

1394

# compression parent OR we already have its basis in this kvf

1395

# OR the basis is not present even in the fallbacks. In the

1396

# last case it will either turn up later in the stream and all

1397

# will be well, or it won't turn up at all and we'll raise an

1398

# error at the end.

1399

1400

# TODO: self.has_key is somewhat redundant with

1401

# self._index.has_key; we really want something that directly

1402

# asks if it's only present in the fallbacks. -- mbp 20081119

1403

if record.storage_kind not in native_types:

1404

try:

1405

adapter_key = (record.storage_kind, "knit-delta-gz")

1406

adapter = get_adapter(adapter_key)

1407

except KeyError:

1408

adapter_key = (record.storage_kind, "knit-ft-gz")

1409

adapter = get_adapter(adapter_key)

1410

bytes = adapter.get_bytes(

1411

record, record.get_bytes_as(record.storage_kind))

1412

else:

1413

bytes = record.get_bytes_as(record.storage_kind)

1414

options = [record._build_details[0]]

1415

if record._build_details[1]:

1416

options.append('no-eol')

1417

# Just blat it across.

1418

# Note: This does end up adding data on duplicate keys. As

1419

# modern repositories use atomic insertions this should not

1420

# lead to excessive growth in the event of interrupted fetches.

1421

# 'knit' repositories may suffer excessive growth, but as a

1422

# deprecated format this is tolerable. It can be fixed if

1423

# needed by in the kndx index support raising on a duplicate

1424

# add with identical parents and options.

1425

access_memo = self._access.add_raw_records(

1426

[(record.key, len(bytes))], bytes)[0]

1427

index_entry = (record.key, options, access_memo, parents)

1428

buffered = False

1429

if 'fulltext' not in options:

1430

# Not a fulltext, so we need to make sure the compression

1431

# parent will also be present.

1432

# Note that pack backed knits don't need to buffer here

1433

# because they buffer all writes to the transaction level,

1434

# but we don't expose that difference at the index level. If

1435

# the query here has sufficient cost to show up in

1436

# profiling we should do that.

1437

1438

# They're required to be physically in this

1439

# KnitVersionedFiles, not in a fallback.

1440

if not self._index.has_key(compression_parent):

1441

pending = buffered_index_entries.setdefault(

1442

compression_parent, [])

1443

pending.append(index_entry)

1444

buffered = True

1445

if not buffered:

1446

self._index.add_records([index_entry])

1447

elif (record.storage_kind == 'fulltext'

1448

or record.storage_kind == 'chunked'):

1449

self.add_lines(record.key, parents,

1450

split_lines(record.get_bytes_as('fulltext')))

1451

else:

1452

# Not a fulltext, and not suitable for direct insertion as a

1453

# delta, either because it's not the right format, or this

1454

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1455

# 0) or because it depends on a base only present in the

1456

# fallback kvfs.

1457

adapter_key = record.storage_kind, 'fulltext'

1458

adapter = get_adapter(adapter_key)

1459

lines = split_lines(adapter.get_bytes(

1460

record, record.get_bytes_as(record.storage_kind)))

1461

try:

1462

self.add_lines(record.key, parents, lines)

1463

except errors.RevisionAlreadyPresent:

1464

pass

1465

# Add any records whose basis parent is now available.

1466

added_keys = [record.key]

1467

while added_keys:

1468

key = added_keys.pop(0)

1469

if key in buffered_index_entries:

1470

index_entries = buffered_index_entries[key]

1471

self._index.add_records(index_entries)

1472

added_keys.extend(

1473

[index_entry[0] for index_entry in index_entries])

1474

del buffered_index_entries[key]

1475

# If there were any deltas which had a missing basis parent, error.

1476

if buffered_index_entries:

1477

from pprint import pformat

1478

raise errors.BzrCheckError(

1479

"record_stream refers to compression parents not in %r:\n%s"

1480

% (self, pformat(sorted(buffered_index_entries.keys()))))

1481

1482

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1483

"""Iterate over the lines in the versioned files from keys.

1484

1485

This may return lines from other keys. Each item the returned

1486

iterator yields is a tuple of a line and a text version that that line

1487

is present in (not introduced in).

1488

1489

Ordering of results is in whatever order is most suitable for the

1490

underlying storage format.

1491

1492

If a progress bar is supplied, it may be used to indicate progress.

1493

The caller is responsible for cleaning up progress bars (because this

1494

is an iterator).

1495

1496

NOTES:

1497

* Lines are normalised by the underlying store: they will all have \\n

1498

terminators.

1499

* Lines are returned in arbitrary order.

1500

* If a requested key did not change any lines (or didn't have any

1501

lines), it may not be mentioned at all in the result.

1502

1503

:return: An iterator over (line, key).

1504

"""

1505

if pb is None:

1506

pb = progress.DummyProgress()

1507

keys = set(keys)

1508

total = len(keys)

1509

done = False

1510

while not done:

1511

try:

1512

# we don't care about inclusions, the caller cares.

1513

# but we need to setup a list of records to visit.

1514

# we need key, position, length

1515

key_records = []

1516

build_details = self._index.get_build_details(keys)

1517

for key, details in build_details.iteritems():

1518

if key in keys:

1519

key_records.append((key, details[0]))

1520

records_iter = enumerate(self._read_records_iter(key_records))

1521

for (key_idx, (key, data, sha_value)) in records_iter:

1522

pb.update('Walking content.', key_idx, total)

1523

compression_parent = build_details[key][1]

1524

if compression_parent is None:

1525

# fulltext

1526

line_iterator = self._factory.get_fulltext_content(data)

1527

else:

1528

# Delta

1529

line_iterator = self._factory.get_linedelta_content(data)

1530

# Now that we are yielding the data for this key, remove it

1531

# from the list

1532

keys.remove(key)

1533

# XXX: It might be more efficient to yield (key,

1534

# line_iterator) in the future. However for now, this is a

1535

# simpler change to integrate into the rest of the

1536

# codebase. RBC 20071110

1537

for line in line_iterator:

1538

yield line, key

1539

done = True

1540

except errors.RetryWithNewPacks, e:

1541

self._access.reload_or_raise(e)

1542

# If there are still keys we've not yet found, we look in the fallback

1543

# vfs, and hope to find them there. Note that if the keys are found

1544

# but had no changes or no content, the fallback may not return

1545

# anything.

1546

if keys and not self._fallback_vfs:

1547

# XXX: strictly the second parameter is meant to be the file id

1548

# but it's not easily accessible here.

1549

raise RevisionNotPresent(keys, repr(self))

1550

for source in self._fallback_vfs:

1551

if not keys:

1552

break

1553

source_keys = set()

1554

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1555

source_keys.add(key)

1556

yield line, key

1557

keys.difference_update(source_keys)

1558

pb.update('Walking content.', total, total)

1559

1560

def _make_line_delta(self, delta_seq, new_content):

1561

"""Generate a line delta from delta_seq and new_content."""

1562

diff_hunks = []

1563

for op in delta_seq.get_opcodes():

1564

if op[0] == 'equal':

1565

continue

1566

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1567

return diff_hunks

1568

1569

def _merge_annotations(self, content, parents, parent_texts={},

1570

delta=None, annotated=None,

1571

left_matching_blocks=None):

1572

"""Merge annotations for content and generate deltas.

1573

1574

This is done by comparing the annotations based on changes to the text

1575

and generating a delta on the resulting full texts. If annotations are

1576

not being created then a simple delta is created.

1577

"""

1578

if left_matching_blocks is not None:

1579

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1580

else:

1581

delta_seq = None

1582

if annotated:

1583

for parent_key in parents:

1584

merge_content = self._get_content(parent_key, parent_texts)

1585

if (parent_key == parents[0] and delta_seq is not None):

1586

seq = delta_seq

1587

else:

1588

seq = patiencediff.PatienceSequenceMatcher(

1589

None, merge_content.text(), content.text())

1590

for i, j, n in seq.get_matching_blocks():

1591

if n == 0:

1592

continue

1593

# this copies (origin, text) pairs across to the new

1594

# content for any line that matches the last-checked

1595

# parent.

1596

content._lines[j:j+n] = merge_content._lines[i:i+n]

1597

# XXX: Robert says the following block is a workaround for a

1598

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1599

if content._lines and content._lines[-1][1][-1] != '\n':

1600

# The copied annotation was from a line without a trailing EOL,

1601

# reinstate one for the content object, to ensure correct

1602

# serialization.

1603

line = content._lines[-1][1] + '\n'

1604

content._lines[-1] = (content._lines[-1][0], line)

1605

if delta:

1606

if delta_seq is None:

1607

reference_content = self._get_content(parents[0], parent_texts)

1608

new_texts = content.text()

1609

old_texts = reference_content.text()

1610

delta_seq = patiencediff.PatienceSequenceMatcher(

1611

None, old_texts, new_texts)

1612

return self._make_line_delta(delta_seq, content)

1613

1614

def _parse_record(self, version_id, data):

1615

"""Parse an original format knit record.

1616

1617

These have the last element of the key only present in the stored data.

1618

"""

1619

rec, record_contents = self._parse_record_unchecked(data)

1620

self._check_header_version(rec, version_id)

1621

return record_contents, rec[3]

1622

1623

def _parse_record_header(self, key, raw_data):

1624

"""Parse a record header for consistency.

1625

1626

:return: the header and the decompressor stream.

1627

as (stream, header_record)

1628

"""

1629

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1630

try:

1631

# Current serialise

1632

rec = self._check_header(key, df.readline())

1633

except Exception, e:

1634

raise KnitCorrupt(self,

1635

"While reading {%s} got %s(%s)"

1636

% (key, e.__class__.__name__, str(e)))

1637

return df, rec

1638

1639

def _parse_record_unchecked(self, data):

1640

# profiling notes:

1641

# 4168 calls in 2880 217 internal

1642

# 4168 calls to _parse_record_header in 2121

1643

# 4168 calls to readlines in 330

1644

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1645

try:

1646

record_contents = df.readlines()

1647

except Exception, e:

1648

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1649

(data, e.__class__.__name__, str(e)))

1650

header = record_contents.pop(0)

1651

rec = self._split_header(header)

1652

last_line = record_contents.pop()

1653

if len(record_contents) != int(rec[2]):

1654

raise KnitCorrupt(self,

1655

'incorrect number of lines %s != %s'

1656

' for version {%s} %s'

1657

% (len(record_contents), int(rec[2]),

1658

rec[1], record_contents))

1659

if last_line != 'end %s\n' % rec[1]:

1660

raise KnitCorrupt(self,

1661

'unexpected version end line %r, wanted %r'

1662

% (last_line, rec[1]))

1663

df.close()

1664

return rec, record_contents

1665

1666

def _read_records_iter(self, records):

1667

"""Read text records from data file and yield result.

1668

1669

The result will be returned in whatever is the fastest to read.

1670

Not by the order requested. Also, multiple requests for the same

1671

record will only yield 1 response.

1672

:param records: A list of (key, access_memo) entries

1673

:return: Yields (key, contents, digest) in the order

1674

read, not the order requested

1675

"""

1676

if not records:

1677

return

1678

1679

# XXX: This smells wrong, IO may not be getting ordered right.

1680

needed_records = sorted(set(records), key=operator.itemgetter(1))

1681

if not needed_records:

1682

return

1683

1684

# The transport optimizes the fetching as well

1685

# (ie, reads continuous ranges.)

1686

raw_data = self._access.get_raw_records(

1687

[index_memo for key, index_memo in needed_records])

1688

1689

for (key, index_memo), data in \

1690

izip(iter(needed_records), raw_data):

1691

content, digest = self._parse_record(key[-1], data)

1692

yield key, content, digest

1693

1694

def _read_records_iter_raw(self, records):

1695

"""Read text records from data file and yield raw data.

1696

1697

This unpacks enough of the text record to validate the id is

1698

as expected but thats all.

1699

1700

Each item the iterator yields is (key, bytes, sha1_of_full_text).

1701

"""

1702

# setup an iterator of the external records:

1703

# uses readv so nice and fast we hope.

1704

if len(records):

1705

# grab the disk data needed.

1706

needed_offsets = [index_memo for key, index_memo

1707

in records]

1708

raw_records = self._access.get_raw_records(needed_offsets)

1709

1710

for key, index_memo in records:

1711

data = raw_records.next()

1712

# validate the header (note that we can only use the suffix in

1713

# current knit records).

1714

df, rec = self._parse_record_header(key, data)

1715

df.close()

1716

yield key, data, rec[3]

1717

1718

def _record_to_data(self, key, digest, lines, dense_lines=None):

1719

"""Convert key, digest, lines into a raw data block.

1720

1721

:param key: The key of the record. Currently keys are always serialised

1722

using just the trailing component.

1723

:param dense_lines: The bytes of lines but in a denser form. For

1724

instance, if lines is a list of 1000 bytestrings each ending in \n,

1725

dense_lines may be a list with one line in it, containing all the

1726

1000's lines and their \n's. Using dense_lines if it is already

1727

known is a win because the string join to create bytes in this

1728

function spends less time resizing the final string.

1729

:return: (len, a StringIO instance with the raw data ready to read.)

1730

"""

1731

# Note: using a string copy here increases memory pressure with e.g.

1732

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1733

# when doing the initial commit of a mozilla tree. RBC 20070921

1734

bytes = ''.join(chain(

1735

["version %s %d %s\n" % (key[-1],

1736

len(lines),

1737

digest)],

1738

dense_lines or lines,

1739

["end %s\n" % key[-1]]))

1740

if type(bytes) != str:

1741

raise AssertionError(

1742

'data must be plain bytes was %s' % type(bytes))

1743

if lines and lines[-1][-1] != '\n':

1744

raise ValueError('corrupt lines value %r' % lines)

1745

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1746

return len(compressed_bytes), compressed_bytes

1747

1748

def _split_header(self, line):

1749

rec = line.split()

1750

if len(rec) != 4:

1751

raise KnitCorrupt(self,

1752

'unexpected number of elements in record header')

1753

return rec

1754

1755

def keys(self):

1756

"""See VersionedFiles.keys."""

1757

if 'evil' in debug.debug_flags:

1758

trace.mutter_callsite(2, "keys scales with size of history")

1759

sources = [self._index] + self._fallback_vfs

1760

result = set()

1761

for source in sources:

1762

result.update(source.keys())

1763

return result

1764

1765

1766

class _KndxIndex(object):

1767

"""Manages knit index files

1768

1769

The index is kept in memory and read on startup, to enable

1770

fast lookups of revision information. The cursor of the index

1771

file is always pointing to the end, making it easy to append

1772

entries.

1773

1774

_cache is a cache for fast mapping from version id to a Index

1775

object.

1776

1777

_history is a cache for fast mapping from indexes to version ids.

1778

1779

The index data format is dictionary compressed when it comes to

1780

parent references; a index entry may only have parents that with a

1781

lover index number. As a result, the index is topological sorted.

1782

1783

Duplicate entries may be written to the index for a single version id

1784

if this is done then the latter one completely replaces the former:

1785

this allows updates to correct version and parent information.

1786

Note that the two entries may share the delta, and that successive

1787

annotations and references MUST point to the first entry.

1788

1789

The index file on disc contains a header, followed by one line per knit

1790

record. The same revision can be present in an index file more than once.

1791

The first occurrence gets assigned a sequence number starting from 0.

1792

1793

The format of a single line is

1794

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1795

REVISION_ID is a utf8-encoded revision id

1796

FLAGS is a comma separated list of flags about the record. Values include

1797

no-eol, line-delta, fulltext.

1798

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1799

that the the compressed data starts at.

1800

LENGTH is the ascii representation of the length of the data file.

1801

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1802

REVISION_ID.

1803

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1804

revision id already in the knit that is a parent of REVISION_ID.

1805

The ' :' marker is the end of record marker.

1806

1807

partial writes:

1808

when a write is interrupted to the index file, it will result in a line

1809

that does not end in ' :'. If the ' :' is not present at the end of a line,

1810

or at the end of the file, then the record that is missing it will be

1811

ignored by the parser.

1812

1813

When writing new records to the index file, the data is preceded by '\n'

1814

to ensure that records always start on new lines even if the last write was

1815

interrupted. As a result its normal for the last line in the index to be

1816

missing a trailing newline. One can be added with no harmful effects.

1817

1818

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

1819

where prefix is e.g. the (fileid,) for .texts instances or () for

1820

constant-mapped things like .revisions, and the old state is

1821

tuple(history_vector, cache_dict). This is used to prevent having an

1822

ABI change with the C extension that reads .kndx files.

1823

"""

1824

1825

HEADER = "# bzr knit index 8\n"

1826

1827

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

1828

"""Create a _KndxIndex on transport using mapper."""

1829

self._transport = transport

1830

self._mapper = mapper

1831

self._get_scope = get_scope

1832

self._allow_writes = allow_writes

1833

self._is_locked = is_locked

1834

self._reset_cache()

1835

self.has_graph = True

1836

1837

def add_records(self, records, random_id=False):

1838

"""Add multiple records to the index.

1839

1840

:param records: a list of tuples:

1841

(key, options, access_memo, parents).

1842

:param random_id: If True the ids being added were randomly generated

1843

and no check for existence will be performed.

1844

"""

1845

paths = {}

1846

for record in records:

1847

key = record[0]

1848

prefix = key[:-1]

1849

path = self._mapper.map(key) + '.kndx'

1850

path_keys = paths.setdefault(path, (prefix, []))

1851

path_keys[1].append(record)

1852

for path in sorted(paths):

1853

prefix, path_keys = paths[path]

1854

self._load_prefixes([prefix])

1855

lines = []

1856

orig_history = self._kndx_cache[prefix][1][:]

1857

orig_cache = self._kndx_cache[prefix][0].copy()

1858

1859

try:

1860

for key, options, (_, pos, size), parents in path_keys:

1861

if parents is None:

1862

# kndx indices cannot be parentless.

1863

parents = ()

1864

line = "\n%s %s %s %s %s :" % (

1865

key[-1], ','.join(options), pos, size,

1866

self._dictionary_compress(parents))

1867

if type(line) != str:

1868

raise AssertionError(

1869

'data must be utf8 was %s' % type(line))

1870

lines.append(line)

1871

self._cache_key(key, options, pos, size, parents)

1872

if len(orig_history):

1873

self._transport.append_bytes(path, ''.join(lines))

1874

else:

1875

self._init_index(path, lines)

1876

except:

1877

# If any problems happen, restore the original values and re-raise

1878

self._kndx_cache[prefix] = (orig_cache, orig_history)

1879

raise

1880

1881

def _cache_key(self, key, options, pos, size, parent_keys):

1882

"""Cache a version record in the history array and index cache.

1883

1884

This is inlined into _load_data for performance. KEEP IN SYNC.

1885

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1886

indexes).

1887

"""

1888

prefix = key[:-1]

1889

version_id = key[-1]

1890

# last-element only for compatibilty with the C load_data.

1891

parents = tuple(parent[-1] for parent in parent_keys)

1892

for parent in parent_keys:

1893

if parent[:-1] != prefix:

1894

raise ValueError("mismatched prefixes for %r, %r" % (

1895

key, parent_keys))

1896

cache, history = self._kndx_cache[prefix]

1897

# only want the _history index to reference the 1st index entry

1898

# for version_id

1899

if version_id not in cache:

1900

index = len(history)

1901

history.append(version_id)

1902

else:

1903

index = cache[version_id][5]

1904

cache[version_id] = (version_id,

1905

options,

1906

pos,

1907

size,

1908

parents,

1909

index)

1910

1911

def check_header(self, fp):

1912

line = fp.readline()

1913

if line == '':

1914

# An empty file can actually be treated as though the file doesn't

1915

# exist yet.

1916

raise errors.NoSuchFile(self)

1917

if line != self.HEADER:

1918

raise KnitHeaderError(badline=line, filename=self)

1919

1920

def _check_read(self):

1921

if not self._is_locked():

1922

raise errors.ObjectNotLocked(self)

1923

if self._get_scope() != self._scope:

1924

self._reset_cache()

1925

1926

def _check_write_ok(self):

1927

"""Assert if not writes are permitted."""

1928

if not self._is_locked():

1929

raise errors.ObjectNotLocked(self)

1930

if self._get_scope() != self._scope:

1931

self._reset_cache()

1932

if self._mode != 'w':

1933

raise errors.ReadOnlyObjectDirtiedError(self)

1934

1935

def get_build_details(self, keys):

1936

"""Get the method, index_memo and compression parent for keys.

1937

1938

Ghosts are omitted from the result.

1939

1940

:param keys: An iterable of keys.

1941

:return: A dict of key:(index_memo, compression_parent, parents,

1942

record_details).

1943

index_memo

1944

opaque structure to pass to read_records to extract the raw

1945

data

1946

compression_parent

1947

Content that this record is built upon, may be None

1948

parents

1949

Logical parents of this node

1950

record_details

1951

extra information about the content which needs to be passed to

1952

Factory.parse_record

1953

"""

1954

parent_map = self.get_parent_map(keys)

1955

result = {}

1956

for key in keys:

1957

if key not in parent_map:

1958

continue # Ghost

1959

method = self.get_method(key)

1960

parents = parent_map[key]

1961

if method == 'fulltext':

1962

compression_parent = None

1963

else:

1964

compression_parent = parents[0]

1965

noeol = 'no-eol' in self.get_options(key)

1966

index_memo = self.get_position(key)

1967

result[key] = (index_memo, compression_parent,

1968

parents, (method, noeol))

1969

return result

1970

1971

def get_method(self, key):

1972

"""Return compression method of specified key."""

1973

options = self.get_options(key)

1974

if 'fulltext' in options:

1975

return 'fulltext'

1976

elif 'line-delta' in options:

1977

return 'line-delta'

1978

else:

1979

raise errors.KnitIndexUnknownMethod(self, options)

1980

1981

def get_options(self, key):

1982

"""Return a list representing options.

1983

1984

e.g. ['foo', 'bar']

1985

"""

1986

prefix, suffix = self._split_key(key)

1987

self._load_prefixes([prefix])

1988

try:

1989

return self._kndx_cache[prefix][0][suffix][1]

1990

except KeyError:

1991

raise RevisionNotPresent(key, self)

1992

1993

def get_parent_map(self, keys):

1994

"""Get a map of the parents of keys.

1995

1996

:param keys: The keys to look up parents for.

1997

:return: A mapping from keys to parents. Absent keys are absent from

1998

the mapping.

1999

"""

2000

# Parse what we need to up front, this potentially trades off I/O

2001

# locality (.kndx and .knit in the same block group for the same file

2002

# id) for less checking in inner loops.

2003

prefixes = set(key[:-1] for key in keys)

2004

self._load_prefixes(prefixes)

2005

result = {}

2006

for key in keys:

2007

prefix = key[:-1]

2008

try:

2009

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2010

except KeyError:

2011

pass

2012

else:

2013

result[key] = tuple(prefix + (suffix,) for

2014

suffix in suffix_parents)

2015

return result

2016

2017

def get_position(self, key):

2018

"""Return details needed to access the version.

2019

2020

:return: a tuple (key, data position, size) to hand to the access

2021

logic to get the record.

2022

"""

2023

prefix, suffix = self._split_key(key)

2024

self._load_prefixes([prefix])

2025

entry = self._kndx_cache[prefix][0][suffix]

2026

return key, entry[2], entry[3]

2027

2028

has_key = _mod_index._has_key_from_parent_map

2029

2030

def _init_index(self, path, extra_lines=[]):

2031

"""Initialize an index."""

2032

sio = StringIO()

2033

sio.write(self.HEADER)

2034

sio.writelines(extra_lines)

2035

sio.seek(0)

2036

self._transport.put_file_non_atomic(path, sio,

2037

create_parent_dir=True)

2038

# self._create_parent_dir)

2039

# mode=self._file_mode,

2040

# dir_mode=self._dir_mode)

2041

2042

def keys(self):

2043

"""Get all the keys in the collection.

2044

2045

The keys are not ordered.

2046

"""

2047

result = set()

2048

# Identify all key prefixes.

2049

# XXX: A bit hacky, needs polish.

2050

if type(self._mapper) == ConstantMapper:

2051

prefixes = [()]

2052

else:

2053

relpaths = set()

2054

for quoted_relpath in self._transport.iter_files_recursive():

2055

path, ext = os.path.splitext(quoted_relpath)

2056

relpaths.add(path)

2057

prefixes = [self._mapper.unmap(path) for path in relpaths]

2058

self._load_prefixes(prefixes)

2059

for prefix in prefixes:

2060

for suffix in self._kndx_cache[prefix][1]:

2061

result.add(prefix + (suffix,))

2062

return result

2063

2064

def _load_prefixes(self, prefixes):

2065

"""Load the indices for prefixes."""

2066

self._check_read()

2067

for prefix in prefixes:

2068

if prefix not in self._kndx_cache:

2069

# the load_data interface writes to these variables.

2070

self._cache = {}

2071

self._history = []

2072

self._filename = prefix

2073

try:

2074

path = self._mapper.map(prefix) + '.kndx'

2075

fp = self._transport.get(path)

2076

try:

2077

# _load_data may raise NoSuchFile if the target knit is

2078

# completely empty.

2079

_load_data(self, fp)

2080

finally:

2081

fp.close()

2082

self._kndx_cache[prefix] = (self._cache, self._history)

2083

del self._cache

2084

del self._filename

2085

del self._history

2086

except NoSuchFile:

2087

self._kndx_cache[prefix] = ({}, [])

2088

if type(self._mapper) == ConstantMapper:

2089

# preserve behaviour for revisions.kndx etc.

2090

self._init_index(path)

2091

del self._cache

2092

del self._filename

2093

del self._history

2094

2095

missing_keys = _mod_index._missing_keys_from_parent_map

2096

2097

def _partition_keys(self, keys):

2098

"""Turn keys into a dict of prefix:suffix_list."""

2099

result = {}

2100

for key in keys:

2101

prefix_keys = result.setdefault(key[:-1], [])

2102

prefix_keys.append(key[-1])

2103

return result

2104

2105

def _dictionary_compress(self, keys):

2106

"""Dictionary compress keys.

2107

2108

:param keys: The keys to generate references to.

2109

:return: A string representation of keys. keys which are present are

2110

dictionary compressed, and others are emitted as fulltext with a

2111

'.' prefix.

2112

"""

2113

if not keys:

2114

return ''

2115

result_list = []

2116

prefix = keys[0][:-1]

2117

cache = self._kndx_cache[prefix][0]

2118

for key in keys:

2119

if key[:-1] != prefix:

2120

# kndx indices cannot refer across partitioned storage.

2121

raise ValueError("mismatched prefixes for %r" % keys)

2122

if key[-1] in cache:

2123

# -- inlined lookup() --

2124

result_list.append(str(cache[key[-1]][5]))

2125

# -- end lookup () --

2126

else:

2127

result_list.append('.' + key[-1])

2128

return ' '.join(result_list)

2129

2130

def _reset_cache(self):

2131

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2132

# (cache_dict, history_vector) for parsed kndx files.

2133

self._kndx_cache = {}

2134

self._scope = self._get_scope()

2135

allow_writes = self._allow_writes()

2136

if allow_writes:

2137

self._mode = 'w'

2138

else:

2139

self._mode = 'r'

2140

2141

def _sort_keys_by_io(self, keys, positions):

2142

"""Figure out an optimal order to read the records for the given keys.

2143

2144

Sort keys, grouped by index and sorted by position.

2145

2146

:param keys: A list of keys whose records we want to read. This will be

2147

sorted 'in-place'.

2148

:param positions: A dict, such as the one returned by

2149

_get_components_positions()

2150

:return: None

2151

"""

2152

def get_sort_key(key):

2153

index_memo = positions[key][1]

2154

# Group by prefix and position. index_memo[0] is the key, so it is

2155

# (file_id, revision_id) and we don't want to sort on revision_id,

2156

# index_memo[1] is the position, and index_memo[2] is the size,

2157

# which doesn't matter for the sort

2158

return index_memo[0][:-1], index_memo[1]

2159

return keys.sort(key=get_sort_key)

2160

2161

def _split_key(self, key):

2162

"""Split key into a prefix and suffix."""

2163

return key[:-1], key[-1]

2164

2165

2166

class _KnitGraphIndex(object):

2167

"""A KnitVersionedFiles index layered on GraphIndex."""

2168

2169

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2170

add_callback=None):

2171

"""Construct a KnitGraphIndex on a graph_index.

2172

2173

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2174

:param is_locked: A callback to check whether the object should answer

2175

queries.

2176

:param deltas: Allow delta-compressed records.

2177

:param parents: If True, record knits parents, if not do not record

2178

parents.

2179

:param add_callback: If not None, allow additions to the index and call

2180

this callback with a list of added GraphIndex nodes:

2181

[(node, value, node_refs), ...]

2182

:param is_locked: A callback, returns True if the index is locked and

2183

thus usable.

2184

"""

2185

self._add_callback = add_callback

2186

self._graph_index = graph_index

2187

self._deltas = deltas

2188

self._parents = parents

2189

if deltas and not parents:

2190

# XXX: TODO: Delta tree and parent graph should be conceptually

2191

# separate.

2192

raise KnitCorrupt(self, "Cannot do delta compression without "

2193

"parent tracking.")

2194

self.has_graph = parents

2195

self._is_locked = is_locked

2196

2197

def __repr__(self):

2198

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2199

2200

def add_records(self, records, random_id=False):

2201

"""Add multiple records to the index.

2202

2203

This function does not insert data into the Immutable GraphIndex

2204

backing the KnitGraphIndex, instead it prepares data for insertion by

2205

the caller and checks that it is safe to insert then calls

2206

self._add_callback with the prepared GraphIndex nodes.

2207

2208

:param records: a list of tuples:

2209

(key, options, access_memo, parents).

2210

:param random_id: If True the ids being added were randomly generated

2211

and no check for existence will be performed.

2212

"""

2213

if not self._add_callback:

2214

raise errors.ReadOnlyError(self)

2215

# we hope there are no repositories with inconsistent parentage

2216

# anymore.

2217

2218

keys = {}

2219

for (key, options, access_memo, parents) in records:

2220

if self._parents:

2221

parents = tuple(parents)

2222

index, pos, size = access_memo

2223

if 'no-eol' in options:

2224

value = 'N'

2225

else:

2226

value = ' '

2227

value += "%d %d" % (pos, size)

2228

if not self._deltas:

2229

if 'line-delta' in options:

2230

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2231

if self._parents:

2232

if self._deltas:

2233

if 'line-delta' in options:

2234

node_refs = (parents, (parents[0],))

2235

else:

2236

node_refs = (parents, ())

2237

else:

2238

node_refs = (parents, )

2239

else:

2240

if parents:

2241

raise KnitCorrupt(self, "attempt to add node with parents "

2242

"in parentless index.")

2243

node_refs = ()

2244

keys[key] = (value, node_refs)

2245

# check for dups

2246

if not random_id:

2247

present_nodes = self._get_entries(keys)

2248

for (index, key, value, node_refs) in present_nodes:

2249

if (value[0] != keys[key][0][0] or

2250

node_refs != keys[key][1]):

2251

raise KnitCorrupt(self, "inconsistent details in add_records"

2252

": %s %s" % ((value, node_refs), keys[key]))

2253

del keys[key]

2254

result = []

2255

if self._parents:

2256

for key, (value, node_refs) in keys.iteritems():

2257

result.append((key, value, node_refs))

2258

else:

2259

for key, (value, node_refs) in keys.iteritems():

2260

result.append((key, value))

2261

self._add_callback(result)

2262

2263

def _check_read(self):

2264

"""raise if reads are not permitted."""

2265

if not self._is_locked():

2266

raise errors.ObjectNotLocked(self)

2267

2268

def _check_write_ok(self):

2269

"""Assert if writes are not permitted."""

2270

if not self._is_locked():

2271

raise errors.ObjectNotLocked(self)

2272

2273

def _compression_parent(self, an_entry):

2274

# return the key that an_entry is compressed against, or None

2275

# Grab the second parent list (as deltas implies parents currently)

2276

compression_parents = an_entry[3][1]

2277

if not compression_parents:

2278

return None

2279

if len(compression_parents) != 1:

2280

raise AssertionError(

2281

"Too many compression parents: %r" % compression_parents)

2282

return compression_parents[0]

2283

2284

def get_build_details(self, keys):

2285

"""Get the method, index_memo and compression parent for version_ids.

2286

2287

Ghosts are omitted from the result.

2288

2289

:param keys: An iterable of keys.

2290

:return: A dict of key:

2291

(index_memo, compression_parent, parents, record_details).

2292

index_memo

2293

opaque structure to pass to read_records to extract the raw

2294

data

2295

compression_parent

2296

Content that this record is built upon, may be None

2297

parents

2298

Logical parents of this node

2299

record_details

2300

extra information about the content which needs to be passed to

2301

Factory.parse_record

2302

"""

2303

self._check_read()

2304

result = {}

2305

entries = self._get_entries(keys, False)

2306

for entry in entries:

2307

key = entry[1]

2308

if not self._parents:

2309

parents = ()

2310

else:

2311

parents = entry[3][0]

2312

if not self._deltas:

2313

compression_parent_key = None

2314

else:

2315

compression_parent_key = self._compression_parent(entry)

2316

noeol = (entry[2][0] == 'N')

2317

if compression_parent_key:

2318

method = 'line-delta'

2319

else:

2320

method = 'fulltext'

2321

result[key] = (self._node_to_position(entry),

2322

compression_parent_key, parents,

2323

(method, noeol))

2324

return result

2325

2326

def _get_entries(self, keys, check_present=False):

2327

"""Get the entries for keys.

2328

2329

:param keys: An iterable of index key tuples.

2330

"""

2331

keys = set(keys)

2332

found_keys = set()

2333

if self._parents:

2334

for node in self._graph_index.iter_entries(keys):

2335

yield node

2336

found_keys.add(node[1])

2337

else:

2338

# adapt parentless index to the rest of the code.

2339

for node in self._graph_index.iter_entries(keys):

2340

yield node[0], node[1], node[2], ()

2341

found_keys.add(node[1])

2342

if check_present:

2343

missing_keys = keys.difference(found_keys)

2344

if missing_keys:

2345

raise RevisionNotPresent(missing_keys.pop(), self)

2346

2347

def get_method(self, key):

2348

"""Return compression method of specified key."""

2349

return self._get_method(self._get_node(key))

2350

2351

def _get_method(self, node):

2352

if not self._deltas:

2353

return 'fulltext'

2354

if self._compression_parent(node):

2355

return 'line-delta'

2356

else:

2357

return 'fulltext'

2358

2359

def _get_node(self, key):

2360

try:

2361

return list(self._get_entries([key]))[0]

2362

except IndexError:

2363

raise RevisionNotPresent(key, self)

2364

2365

def get_options(self, key):

2366

"""Return a list representing options.

2367

2368

e.g. ['foo', 'bar']

2369

"""

2370

node = self._get_node(key)

2371

options = [self._get_method(node)]

2372

if node[2][0] == 'N':

2373

options.append('no-eol')

2374

return options

2375

2376

def get_parent_map(self, keys):

2377

"""Get a map of the parents of keys.

2378

2379

:param keys: The keys to look up parents for.

2380

:return: A mapping from keys to parents. Absent keys are absent from

2381

the mapping.

2382

"""

2383

self._check_read()

2384

nodes = self._get_entries(keys)

2385

result = {}

2386

if self._parents:

2387

for node in nodes:

2388

result[node[1]] = node[3][0]

2389

else:

2390

for node in nodes:

2391

result[node[1]] = None

2392

return result

2393

2394

def get_position(self, key):

2395

"""Return details needed to access the version.

2396

2397

:return: a tuple (index, data position, size) to hand to the access

2398

logic to get the record.

2399

"""

2400

node = self._get_node(key)

2401

return self._node_to_position(node)

2402

2403

has_key = _mod_index._has_key_from_parent_map

2404

2405

def keys(self):

2406

"""Get all the keys in the collection.

2407

2408

The keys are not ordered.

2409

"""

2410

self._check_read()

2411

return [node[1] for node in self._graph_index.iter_all_entries()]

2412

2413

missing_keys = _mod_index._missing_keys_from_parent_map

2414

2415

def _node_to_position(self, node):

2416

"""Convert an index value to position details."""

2417

bits = node[2][1:].split(' ')

2418

return node[0], int(bits[0]), int(bits[1])

2419

2420

def _sort_keys_by_io(self, keys, positions):

2421

"""Figure out an optimal order to read the records for the given keys.

2422

2423

Sort keys, grouped by index and sorted by position.

2424

2425

:param keys: A list of keys whose records we want to read. This will be

2426

sorted 'in-place'.

2427

:param positions: A dict, such as the one returned by

2428

_get_components_positions()

2429

:return: None

2430

"""

2431

def get_index_memo(key):

2432

# index_memo is at offset [1]. It is made up of (GraphIndex,

2433

# position, size). GI is an object, which will be unique for each

2434

# pack file. This causes us to group by pack file, then sort by

2435

# position. Size doesn't matter, but it isn't worth breaking up the

2436

# tuple.

2437

return positions[key][1]

2438

return keys.sort(key=get_index_memo)

2439

2440

2441

class _KnitKeyAccess(object):

2442

"""Access to records in .knit files."""

2443

2444

def __init__(self, transport, mapper):

2445

"""Create a _KnitKeyAccess with transport and mapper.

2446

2447

:param transport: The transport the access object is rooted at.

2448

:param mapper: The mapper used to map keys to .knit files.

2449

"""

2450

self._transport = transport

2451

self._mapper = mapper

2452

2453

def add_raw_records(self, key_sizes, raw_data):

2454

"""Add raw knit bytes to a storage area.

2455

2456

The data is spooled to the container writer in one bytes-record per

2457

raw data item.

2458

2459

:param sizes: An iterable of tuples containing the key and size of each

2460

raw data segment.

2461

:param raw_data: A bytestring containing the data.

2462

:return: A list of memos to retrieve the record later. Each memo is an

2463

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

2464

length), where the key is the record key.

2465

"""

2466

if type(raw_data) != str:

2467

raise AssertionError(

2468

'data must be plain bytes was %s' % type(raw_data))

2469

result = []

2470

offset = 0

2471

# TODO: This can be tuned for writing to sftp and other servers where

2472

# append() is relatively expensive by grouping the writes to each key

2473

# prefix.

2474

for key, size in key_sizes:

2475

path = self._mapper.map(key)

2476

try:

2477

base = self._transport.append_bytes(path + '.knit',

2478

raw_data[offset:offset+size])

2479

except errors.NoSuchFile:

2480

self._transport.mkdir(osutils.dirname(path))

2481

base = self._transport.append_bytes(path + '.knit',

2482

raw_data[offset:offset+size])

2483

# if base == 0:

2484

# chmod.

2485

offset += size

2486

result.append((key, base, size))

2487

return result

2488

2489

def get_raw_records(self, memos_for_retrieval):

2490

"""Get the raw bytes for a records.

2491

2492

:param memos_for_retrieval: An iterable containing the access memo for

2493

retrieving the bytes.

2494

:return: An iterator over the bytes of the records.

2495

"""

2496

# first pass, group into same-index request to minimise readv's issued.

2497

request_lists = []

2498

current_prefix = None

2499

for (key, offset, length) in memos_for_retrieval:

2500

if current_prefix == key[:-1]:

2501

current_list.append((offset, length))

2502

else:

2503

if current_prefix is not None:

2504

request_lists.append((current_prefix, current_list))

2505

current_prefix = key[:-1]

2506

current_list = [(offset, length)]

2507

# handle the last entry

2508

if current_prefix is not None:

2509

request_lists.append((current_prefix, current_list))

2510

for prefix, read_vector in request_lists:

2511

path = self._mapper.map(prefix) + '.knit'

2512

for pos, data in self._transport.readv(path, read_vector):

2513

yield data

2514

2515

2516

class _DirectPackAccess(object):

2517

"""Access to data in one or more packs with less translation."""

2518

2519

def __init__(self, index_to_packs, reload_func=None):

2520

"""Create a _DirectPackAccess object.

2521

2522

:param index_to_packs: A dict mapping index objects to the transport

2523

and file names for obtaining data.

2524

:param reload_func: A function to call if we determine that the pack

2525

files have moved and we need to reload our caches. See

2526

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

2527

"""

2528

self._container_writer = None

2529

self._write_index = None

2530

self._indices = index_to_packs

2531

self._reload_func = reload_func

2532

2533

def add_raw_records(self, key_sizes, raw_data):

2534

"""Add raw knit bytes to a storage area.

2535

2536

The data is spooled to the container writer in one bytes-record per

2537

raw data item.

2538

2539

:param sizes: An iterable of tuples containing the key and size of each

2540

raw data segment.

2541

:param raw_data: A bytestring containing the data.

2542

:return: A list of memos to retrieve the record later. Each memo is an

2543

opaque index memo. For _DirectPackAccess the memo is (index, pos,

2544

length), where the index field is the write_index object supplied

2545

to the PackAccess object.

2546

"""

2547

if type(raw_data) != str:

2548

raise AssertionError(

2549

'data must be plain bytes was %s' % type(raw_data))

2550

result = []

2551

offset = 0

2552

for key, size in key_sizes:

2553

p_offset, p_length = self._container_writer.add_bytes_record(

2554

raw_data[offset:offset+size], [])

2555

offset += size

2556

result.append((self._write_index, p_offset, p_length))

2557

return result

2558

2559

def get_raw_records(self, memos_for_retrieval):

2560

"""Get the raw bytes for a records.

2561

2562

:param memos_for_retrieval: An iterable containing the (index, pos,

2563

length) memo for retrieving the bytes. The Pack access method

2564

looks up the pack to use for a given record in its index_to_pack

2565

map.

2566

:return: An iterator over the bytes of the records.

2567

"""

2568

# first pass, group into same-index requests

2569

request_lists = []

2570

current_index = None

2571

for (index, offset, length) in memos_for_retrieval:

2572

if current_index == index:

2573

current_list.append((offset, length))

2574

else:

2575

if current_index is not None:

2576

request_lists.append((current_index, current_list))

2577

current_index = index

2578

current_list = [(offset, length)]

2579

# handle the last entry

2580

if current_index is not None:

2581

request_lists.append((current_index, current_list))

2582

for index, offsets in request_lists:

2583

try:

2584

transport, path = self._indices[index]

2585

except KeyError:

2586

# A KeyError here indicates that someone has triggered an index

2587

# reload, and this index has gone missing, we need to start

2588

# over.

2589

if self._reload_func is None:

2590

# If we don't have a _reload_func there is nothing that can

2591

# be done

2592

raise

2593

raise errors.RetryWithNewPacks(reload_occurred=True,

2594

exc_info=sys.exc_info())

2595

try:

2596

reader = pack.make_readv_reader(transport, path, offsets)

2597

for names, read_func in reader.iter_records():

2598

yield read_func(None)

2599

except errors.NoSuchFile:

2600

# A NoSuchFile error indicates that a pack file has gone

2601

# missing on disk, we need to trigger a reload, and start over.

2602

if self._reload_func is None:

2603

raise

2604

raise errors.RetryWithNewPacks(reload_occurred=False,

2605

exc_info=sys.exc_info())

2606

2607

def set_writer(self, writer, index, transport_packname):

2608

"""Set a writer to use for adding data."""

2609

if index is not None:

2610

self._indices[index] = transport_packname

2611

self._container_writer = writer

2612

self._write_index = index

2613

2614

def reload_or_raise(self, retry_exc):

2615

"""Try calling the reload function, or re-raise the original exception.

2616

2617

This should be called after _DirectPackAccess raises a

2618

RetryWithNewPacks exception. This function will handle the common logic

2619

of determining when the error is fatal versus being temporary.

2620

It will also make sure that the original exception is raised, rather

2621

than the RetryWithNewPacks exception.

2622

2623

If this function returns, then the calling function should retry

2624

whatever operation was being performed. Otherwise an exception will

2625

be raised.

2626

2627

:param retry_exc: A RetryWithNewPacks exception.

2628

"""

2629

is_error = False

2630

if self._reload_func is None:

2631

is_error = True

2632

elif not self._reload_func():

2633

# The reload claimed that nothing changed

2634

if not retry_exc.reload_occurred:

2635

# If there wasn't an earlier reload, then we really were

2636

# expecting to find changes. We didn't find them, so this is a

2637

# hard error

2638

is_error = True

2639

if is_error:

2640

exc_class, exc_value, exc_traceback = retry_exc.exc_info

2641

raise exc_class, exc_value, exc_traceback

2642

2643

2644

# Deprecated, use PatienceSequenceMatcher instead

2645

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2646

2647

2648

def annotate_knit(knit, revision_id):

2649

"""Annotate a knit with no cached annotations.

2650

2651

This implementation is for knits with no cached annotations.

2652

It will work for knits with cached annotations, but this is not

2653

recommended.

2654

"""

2655

annotator = _KnitAnnotator(knit)

2656

return iter(annotator.annotate(revision_id))

2657

2658

2659

class _KnitAnnotator(object):

2660

"""Build up the annotations for a text."""

2661

2662

def __init__(self, knit):

2663

self._knit = knit

2664

2665

# Content objects, differs from fulltexts because of how final newlines

2666

# are treated by knits. the content objects here will always have a

2667

# final newline

2668

self._fulltext_contents = {}

2669

2670

# Annotated lines of specific revisions

2671

self._annotated_lines = {}

2672

2673

# Track the raw data for nodes that we could not process yet.

2674

# This maps the revision_id of the base to a list of children that will

2675

# annotated from it.

2676

self._pending_children = {}

2677

2678

# Nodes which cannot be extracted

2679

self._ghosts = set()

2680

2681

# Track how many children this node has, so we know if we need to keep

2682

# it

2683

self._annotate_children = {}

2684

self._compression_children = {}

2685

2686

self._all_build_details = {}

2687

# The children => parent revision_id graph

2688

self._revision_id_graph = {}

2689

2690

self._heads_provider = None

2691

2692

self._nodes_to_keep_annotations = set()

2693

self._generations_until_keep = 100

2694

2695

def set_generations_until_keep(self, value):

2696

"""Set the number of generations before caching a node.

2697

2698

Setting this to -1 will cache every merge node, setting this higher

2699

will cache fewer nodes.

2700

"""

2701

self._generations_until_keep = value

2702

2703

def _add_fulltext_content(self, revision_id, content_obj):

2704

self._fulltext_contents[revision_id] = content_obj

2705

# TODO: jam 20080305 It might be good to check the sha1digest here

2706

return content_obj.text()

2707

2708

def _check_parents(self, child, nodes_to_annotate):

2709

"""Check if all parents have been processed.

2710

2711

:param child: A tuple of (rev_id, parents, raw_content)

2712

:param nodes_to_annotate: If child is ready, add it to

2713

nodes_to_annotate, otherwise put it back in self._pending_children

2714

"""

2715

for parent_id in child[1]:

2716

if (parent_id not in self._annotated_lines):

2717

# This parent is present, but another parent is missing

2718

self._pending_children.setdefault(parent_id,

2719

[]).append(child)

2720

break

2721

else:

2722

# This one is ready to be processed

2723

nodes_to_annotate.append(child)

2724

2725

def _add_annotation(self, revision_id, fulltext, parent_ids,

2726

left_matching_blocks=None):

2727

"""Add an annotation entry.

2728

2729

All parents should already have been annotated.

2730

:return: A list of children that now have their parents satisfied.

2731

"""

2732

a = self._annotated_lines

2733

annotated_parent_lines = [a[p] for p in parent_ids]

2734

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2735

fulltext, revision_id, left_matching_blocks,

2736

heads_provider=self._get_heads_provider()))

2737

self._annotated_lines[revision_id] = annotated_lines

2738

for p in parent_ids:

2739

ann_children = self._annotate_children[p]

2740

ann_children.remove(revision_id)

2741

if (not ann_children

2742

and p not in self._nodes_to_keep_annotations):

2743

del self._annotated_lines[p]

2744

del self._all_build_details[p]

2745

if p in self._fulltext_contents:

2746

del self._fulltext_contents[p]

2747

# Now that we've added this one, see if there are any pending

2748

# deltas to be done, certainly this parent is finished

2749

nodes_to_annotate = []

2750

for child in self._pending_children.pop(revision_id, []):

2751

self._check_parents(child, nodes_to_annotate)

2752

return nodes_to_annotate

2753

2754

def _get_build_graph(self, key):

2755

"""Get the graphs for building texts and annotations.

2756

2757

The data you need for creating a full text may be different than the

2758

data you need to annotate that text. (At a minimum, you need both

2759

parents to create an annotation, but only need 1 parent to generate the

2760

fulltext.)

2761

2762

:return: A list of (key, index_memo) records, suitable for

2763

passing to read_records_iter to start reading in the raw data fro/

2764

the pack file.

2765

"""

2766

if key in self._annotated_lines:

2767

# Nothing to do

2768

return []

2769

pending = set([key])

2770

records = []

2771

generation = 0

2772

kept_generation = 0

2773

while pending:

2774

# get all pending nodes

2775

generation += 1

2776

this_iteration = pending

2777

build_details = self._knit._index.get_build_details(this_iteration)

2778

self._all_build_details.update(build_details)

2779

# new_nodes = self._knit._index._get_entries(this_iteration)

2780

pending = set()

2781

for key, details in build_details.iteritems():

2782

(index_memo, compression_parent, parents,

2783

record_details) = details

2784

self._revision_id_graph[key] = parents

2785

records.append((key, index_memo))

2786

# Do we actually need to check _annotated_lines?

2787

pending.update(p for p in parents

2788

if p not in self._all_build_details)

2789

if compression_parent:

2790

self._compression_children.setdefault(compression_parent,

2791

[]).append(key)

2792

if parents:

2793

for parent in parents:

2794

self._annotate_children.setdefault(parent,

2795

[]).append(key)

2796

num_gens = generation - kept_generation

2797

if ((num_gens >= self._generations_until_keep)

2798

and len(parents) > 1):

2799

kept_generation = generation

2800

self._nodes_to_keep_annotations.add(key)

2801

2802

missing_versions = this_iteration.difference(build_details.keys())

2803

self._ghosts.update(missing_versions)

2804

for missing_version in missing_versions:

2805

# add a key, no parents

2806

self._revision_id_graph[missing_version] = ()

2807

pending.discard(missing_version) # don't look for it

2808

if self._ghosts.intersection(self._compression_children):

2809

raise KnitCorrupt(

2810

"We cannot have nodes which have a ghost compression parent:\n"

2811

"ghosts: %r\n"

2812

"compression children: %r"

2813

% (self._ghosts, self._compression_children))

2814

# Cleanout anything that depends on a ghost so that we don't wait for

2815

# the ghost to show up

2816

for node in self._ghosts:

2817

if node in self._annotate_children:

2818

# We won't be building this node

2819

del self._annotate_children[node]

2820

# Generally we will want to read the records in reverse order, because

2821

# we find the parent nodes after the children

2822

records.reverse()

2823

return records

2824

2825

def _annotate_records(self, records):

2826

"""Build the annotations for the listed records."""

2827

# We iterate in the order read, rather than a strict order requested

2828

# However, process what we can, and put off to the side things that

2829

# still need parents, cleaning them up when those parents are

2830

# processed.

2831

for (rev_id, record,

2832

digest) in self._knit._read_records_iter(records):

2833

if rev_id in self._annotated_lines:

2834

continue

2835

parent_ids = self._revision_id_graph[rev_id]

2836

parent_ids = [p for p in parent_ids if p not in self._ghosts]

2837

details = self._all_build_details[rev_id]

2838

(index_memo, compression_parent, parents,

2839

record_details) = details

2840

nodes_to_annotate = []

2841

# TODO: Remove the punning between compression parents, and

2842

# parent_ids, we should be able to do this without assuming

2843

# the build order

2844

if len(parent_ids) == 0:

2845

# There are no parents for this node, so just add it

2846

# TODO: This probably needs to be decoupled

2847

fulltext_content, delta = self._knit._factory.parse_record(

2848

rev_id, record, record_details, None)

2849

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

2850

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

2851

parent_ids, left_matching_blocks=None))

2852

else:

2853

child = (rev_id, parent_ids, record)

2854

# Check if all the parents are present

2855

self._check_parents(child, nodes_to_annotate)

2856

while nodes_to_annotate:

2857

# Should we use a queue here instead of a stack?

2858

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

2859

(index_memo, compression_parent, parents,

2860

record_details) = self._all_build_details[rev_id]

2861

blocks = None

2862

if compression_parent is not None:

2863

comp_children = self._compression_children[compression_parent]

2864

if rev_id not in comp_children:

2865

raise AssertionError("%r not in compression children %r"

2866

% (rev_id, comp_children))

2867

# If there is only 1 child, it is safe to reuse this

2868

# content

2869

reuse_content = (len(comp_children) == 1

2870

and compression_parent not in

2871

self._nodes_to_keep_annotations)

2872

if reuse_content:

2873

# Remove it from the cache since it will be changing

2874

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

2875

# Make sure to copy the fulltext since it might be

2876

# modified

2877

parent_fulltext = list(parent_fulltext_content.text())

2878

else:

2879

parent_fulltext_content = self._fulltext_contents[compression_parent]

2880

parent_fulltext = parent_fulltext_content.text()

2881

comp_children.remove(rev_id)

2882

fulltext_content, delta = self._knit._factory.parse_record(

2883

rev_id, record, record_details,

2884

parent_fulltext_content,

2885

copy_base_content=(not reuse_content))

2886

fulltext = self._add_fulltext_content(rev_id,

2887

fulltext_content)

2888

if compression_parent == parent_ids[0]:

2889

# the compression_parent is the left parent, so we can

2890

# re-use the delta

2891

blocks = KnitContent.get_line_delta_blocks(delta,

2892

parent_fulltext, fulltext)

2893

else:

2894

fulltext_content = self._knit._factory.parse_fulltext(

2895

record, rev_id)

2896

fulltext = self._add_fulltext_content(rev_id,

2897

fulltext_content)

2898

nodes_to_annotate.extend(

2899

self._add_annotation(rev_id, fulltext, parent_ids,

2900

left_matching_blocks=blocks))

2901

2902

def _get_heads_provider(self):

2903

"""Create a heads provider for resolving ancestry issues."""

2904

if self._heads_provider is not None:

2905

return self._heads_provider

2906

parent_provider = _mod_graph.DictParentsProvider(

2907

self._revision_id_graph)

2908

graph_obj = _mod_graph.Graph(parent_provider)

2909

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

2910

self._heads_provider = head_cache

2911

return head_cache

2912

2913

def annotate(self, key):

2914

"""Return the annotated fulltext at the given key.

2915

2916

:param key: The key to annotate.

2917

"""

2918

if len(self._knit._fallback_vfs) > 0:

2919

# stacked knits can't use the fast path at present.

2920

return self._simple_annotate(key)

2921

while True:

2922

try:

2923

records = self._get_build_graph(key)

2924

if key in self._ghosts:

2925

raise errors.RevisionNotPresent(key, self._knit)

2926

self._annotate_records(records)

2927

return self._annotated_lines[key]

2928

except errors.RetryWithNewPacks, e:

2929

self._knit._access.reload_or_raise(e)

2930

# The cached build_details are no longer valid

2931

self._all_build_details.clear()

2932

2933

def _simple_annotate(self, key):

2934

"""Return annotated fulltext, rediffing from the full texts.

2935

2936

This is slow but makes no assumptions about the repository

2937

being able to produce line deltas.

2938

"""

2939

# TODO: this code generates a parent maps of present ancestors; it

2940

# could be split out into a separate method, and probably should use

2941

# iter_ancestry instead. -- mbp and robertc 20080704

2942

graph = _mod_graph.Graph(self._knit)

2943

head_cache = _mod_graph.FrozenHeadsCache(graph)

2944

search = graph._make_breadth_first_searcher([key])

2945

keys = set()

2946

while True:

2947

try:

2948

present, ghosts = search.next_with_ghosts()

2949

except StopIteration:

2950

break

2951

keys.update(present)

2952

parent_map = self._knit.get_parent_map(keys)

2953

parent_cache = {}

2954

reannotate = annotate.reannotate

2955

for record in self._knit.get_record_stream(keys, 'topological', True):

2956

key = record.key

2957

fulltext = split_lines(record.get_bytes_as('fulltext'))

2958

parents = parent_map[key]

2959

if parents is not None:

2960

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

2961

else:

2962

parent_lines = []

2963

parent_cache[key] = list(

2964

reannotate(parent_lines, fulltext, key, None, head_cache))

2965

try:

2966

return parent_cache[key]

2967

except KeyError, e:

2968

raise errors.RevisionNotPresent(key, self._knit)

2969

2970

2971

try:

2972

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2973

except ImportError:

2974

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »