/brz/remove-bazaar : revision 3709.3.2

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/index.py

Committer: Robert Collins
Date: 2008-09-22 05:15:20 UTC
mto: (3696.5.1 commit-updates)
mto: This revision was merged to the branch mainline in revision 3741.
Revision ID: robertc@robertcollins.net-20080922051520-uhr3pn61w141kagv

Race-free stat-fingerprint updating during commit via a new method get_file_with_stat.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.h

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Indexing facilities."""

__all__ = [

'CombinedGraphIndex',

'GraphIndex',

'GraphIndexBuilder',

'GraphIndexPrefixAdapter',

'InMemoryGraphIndex',

]

from bisect import bisect_right

from cStringIO import StringIO

import re

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import trace

from bzrlib.bisect_multi import bisect_multi_bytes

from bzrlib.revision import NULL_REVISION

from bzrlib.trace import mutter

""")

from bzrlib import (

debug,

errors,

symbol_versioning,

)

_HEADER_READV = (0, 200)

_OPTION_KEY_ELEMENTS = "key_elements="

_OPTION_LEN = "len="

_OPTION_NODE_REFS = "node_ref_lists="

_SIGNATURE = "Bazaar Graph Index 1\n"

_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')

_newline_null_re = re.compile('[\n\0]')

class GraphIndexBuilder(object):

"""A builder that can build a GraphIndex.

The resulting graph has the structure:

_SIGNATURE OPTIONS NODES NEWLINE

_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE

OPTIONS := 'node_ref_lists=' DIGITS NEWLINE

NODES := NODE*

NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

KEY := Not-whitespace-utf8

ABSENT := 'a'

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

REFERENCE := DIGITS ; digits is the byte offset in the index of the

; referenced key.

VALUE := no-newline-no-null-bytes

"""

def __init__(self, reference_lists=0, key_elements=1):

"""Create a GraphIndex builder.

:param reference_lists: The number of node references lists for each

entry.

:param key_elements: The number of bytestrings in each key.

"""

self.reference_lists = reference_lists

self._keys = set()

# A dict of {key: (absent, ref_lists, value)}

self._nodes = {}

self._nodes_by_key = None

self._key_length = key_elements

def _check_key(self, key):

"""Raise BadIndexKey if key is not a valid key for this index."""

if type(key) != tuple:

raise errors.BadIndexKey(key)

if self._key_length != len(key):

raise errors.BadIndexKey(key)

for element in key:

if not element or _whitespace_re.search(element) is not None:

raise errors.BadIndexKey(element)

def _get_nodes_by_key(self):

if self._nodes_by_key is None:

100

nodes_by_key = {}

101

if self.reference_lists:

102

for key, (absent, references, value) in self._nodes.iteritems():

103

if absent:

104

continue

105

key_dict = nodes_by_key

106

for subkey in key[:-1]:

107

key_dict = key_dict.setdefault(subkey, {})

108

key_dict[key[-1]] = key, value, references

109

else:

110

for key, (absent, references, value) in self._nodes.iteritems():

111

if absent:

112

continue

113

key_dict = nodes_by_key

114

for subkey in key[:-1]:

115

key_dict = key_dict.setdefault(subkey, {})

116

key_dict[key[-1]] = key, value

117

self._nodes_by_key = nodes_by_key

118

return self._nodes_by_key

119

120

def _update_nodes_by_key(self, key, value, node_refs):

121

"""Update the _nodes_by_key dict with a new key.

122

123

For a key of (foo, bar, baz) create

124

_nodes_by_key[foo][bar][baz] = key_value

125

"""

126

if self._nodes_by_key is None:

127

return

128

key_dict = self._nodes_by_key

129

if self.reference_lists:

130

key_value = key, value, node_refs

131

else:

132

key_value = key, value

133

for subkey in key[:-1]:

134

key_dict = key_dict.setdefault(subkey, {})

135

key_dict[key[-1]] = key_value

136

137

def _check_key_ref_value(self, key, references, value):

138

"""Check that 'key' and 'references' are all valid.

139

140

:param key: A key tuple. Must conform to the key interface (be a tuple,

141

be of the right length, not have any whitespace or nulls in any key

142

element.)

143

:param references: An iterable of reference lists. Something like

144

[[(ref, key)], [(ref, key), (other, key)]]

145

:param value: The value associate with this key. Must not contain

146

newlines or null characters.

147

:return: (node_refs, absent_references)

148

node_refs basically a packed form of 'references' where all

149

iterables are tuples

150

absent_references reference keys that are not in self._nodes.

151

This may contain duplicates if the same key is

152

referenced in multiple lists.

153

"""

154

self._check_key(key)

155

if _newline_null_re.search(value) is not None:

156

raise errors.BadIndexValue(value)

157

if len(references) != self.reference_lists:

158

raise errors.BadIndexValue(references)

159

node_refs = []

160

absent_references = []

161

for reference_list in references:

162

for reference in reference_list:

163

# If reference *is* in self._nodes, then we know it has already

164

# been checked.

165

if reference not in self._nodes:

166

self._check_key(reference)

167

absent_references.append(reference)

168

node_refs.append(tuple(reference_list))

169

return tuple(node_refs), absent_references

170

171

def add_node(self, key, value, references=()):

172

"""Add a node to the index.

173

174

:param key: The key. keys are non-empty tuples containing

175

as many whitespace-free utf8 bytestrings as the key length

176

defined for this index.

177

:param references: An iterable of iterables of keys. Each is a

178

reference to another key.

179

:param value: The value to associate with the key. It may be any

180

bytes as long as it does not contain \0 or \n.

181

"""

182

(node_refs,

183

absent_references) = self._check_key_ref_value(key, references, value)

184

if key in self._nodes and self._nodes[key][0] != 'a':

185

raise errors.BadIndexDuplicateKey(key, self)

186

for reference in absent_references:

187

# There may be duplicates, but I don't think it is worth worrying

188

# about

189

self._nodes[reference] = ('a', (), '')

190

self._nodes[key] = ('', node_refs, value)

191

self._keys.add(key)

192

if self._nodes_by_key is not None and self._key_length > 1:

193

self._update_nodes_by_key(key, value, node_refs)

194

195

def finish(self):

196

lines = [_SIGNATURE]

197

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

198

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

199

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

200

prefix_length = sum(len(x) for x in lines)

201

# references are byte offsets. To avoid having to do nasty

202

# polynomial work to resolve offsets (references to later in the

203

# file cannot be determined until all the inbetween references have

204

# been calculated too) we pad the offsets with 0's to make them be

205

# of consistent length. Using binary offsets would break the trivial

206

# file parsing.

207

# to calculate the width of zero's needed we do three passes:

208

# one to gather all the non-reference data and the number of references.

209

# one to pad all the data with reference-length and determine entry

210

# addresses.

211

# One to serialise.

212

213

# forward sorted by key. In future we may consider topological sorting,

214

# at the cost of table scans for direct lookup, or a second index for

215

# direct lookup

216

nodes = sorted(self._nodes.items())

217

# if we do not prepass, we don't know how long it will be up front.

218

expected_bytes = None

219

# we only need to pre-pass if we have reference lists at all.

220

if self.reference_lists:

221

key_offset_info = []

222

non_ref_bytes = prefix_length

223

total_references = 0

224

# TODO use simple multiplication for the constants in this loop.

225

for key, (absent, references, value) in nodes:

226

# record the offset known *so far* for this key:

227

# the non reference bytes to date, and the total references to

228

# date - saves reaccumulating on the second pass

229

key_offset_info.append((key, non_ref_bytes, total_references))

230

# key is literal, value is literal, there are 3 null's, 1 NL

231

# key is variable length tuple, \x00 between elements

232

non_ref_bytes += sum(len(element) for element in key)

233

if self._key_length > 1:

234

non_ref_bytes += self._key_length - 1

235

# value is literal bytes, there are 3 null's, 1 NL.

236

non_ref_bytes += len(value) + 3 + 1

237

# one byte for absent if set.

238

if absent:

239

non_ref_bytes += 1

240

elif self.reference_lists:

241

# (ref_lists -1) tabs

242

non_ref_bytes += self.reference_lists - 1

243

# (ref-1 cr's per ref_list)

244

for ref_list in references:

245

# how many references across the whole file?

246

total_references += len(ref_list)

247

# accrue reference separators

248

if ref_list:

249

non_ref_bytes += len(ref_list) - 1

250

# how many digits are needed to represent the total byte count?

251

digits = 1

252

possible_total_bytes = non_ref_bytes + total_references*digits

253

while 10 ** digits < possible_total_bytes:

254

digits += 1

255

possible_total_bytes = non_ref_bytes + total_references*digits

256

expected_bytes = possible_total_bytes + 1 # terminating newline

257

# resolve key addresses.

258

key_addresses = {}

259

for key, non_ref_bytes, total_references in key_offset_info:

260

key_addresses[key] = non_ref_bytes + total_references*digits

261

# serialise

262

format_string = '%%0%sd' % digits

263

for key, (absent, references, value) in nodes:

264

flattened_references = []

265

for ref_list in references:

266

ref_addresses = []

267

for reference in ref_list:

268

ref_addresses.append(format_string % key_addresses[reference])

269

flattened_references.append('\r'.join(ref_addresses))

270

string_key = '\x00'.join(key)

271

lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,

272

'\t'.join(flattened_references), value))

273

lines.append('\n')

274

result = StringIO(''.join(lines))

275

if expected_bytes and len(result.getvalue()) != expected_bytes:

276

raise errors.BzrError('Failed index creation. Internal error:'

277

' mismatched output length and expected length: %d %d' %

278

(len(result.getvalue()), expected_bytes))

279

return result

280

281

282

class GraphIndex(object):

283

"""An index for data with embedded graphs.

284

285

The index maps keys to a list of key reference lists, and a value.

286

Each node has the same number of key reference lists. Each key reference

287

list can be empty or an arbitrary length. The value is an opaque NULL

288

terminated string without any newlines. The storage of the index is

289

hidden in the interface: keys and key references are always tuples of

290

bytestrings, never the internal representation (e.g. dictionary offsets).

291

292

It is presumed that the index will not be mutated - it is static data.

293

294

Successive iter_all_entries calls will read the entire index each time.

295

Additionally, iter_entries calls will read the index linearly until the

296

desired keys are found. XXX: This must be fixed before the index is

297

suitable for production use. :XXX

298

"""

299

300

def __init__(self, transport, name, size):

301

"""Open an index called name on transport.

302

303

:param transport: A bzrlib.transport.Transport.

304

:param name: A path to provide to transport API calls.

305

:param size: The size of the index in bytes. This is used for bisection

306

logic to perform partial index reads. While the size could be

307

obtained by statting the file this introduced an additional round

308

trip as well as requiring stat'able transports, both of which are

309

avoided by having it supplied. If size is None, then bisection

310

support will be disabled and accessing the index will just stream

311

all the data.

312

"""

313

self._transport = transport

314

self._name = name

315

# Becomes a dict of key:(value, reference-list-byte-locations) used by

316

# the bisection interface to store parsed but not resolved keys.

317

self._bisect_nodes = None

318

# Becomes a dict of key:(value, reference-list-keys) which are ready to

319

# be returned directly to callers.

320

self._nodes = None

321

# a sorted list of slice-addresses for the parsed bytes of the file.

322

# e.g. (0,1) would mean that byte 0 is parsed.

323

self._parsed_byte_map = []

324

# a sorted list of keys matching each slice address for parsed bytes

325

# e.g. (None, 'foo@bar') would mean that the first byte contained no

326

# key, and the end byte of the slice is the of the data for 'foo@bar'

327

self._parsed_key_map = []

328

self._key_count = None

329

self._keys_by_offset = None

330

self._nodes_by_key = None

331

self._size = size

332

# The number of bytes we've read so far in trying to process this file

333

self._bytes_read = 0

334

335

def __eq__(self, other):

336

"""Equal when self and other were created with the same parameters."""

337

return (

338

type(self) == type(other) and

339

self._transport == other._transport and

340

self._name == other._name and

341

self._size == other._size)

342

343

def __ne__(self, other):

344

return not self.__eq__(other)

345

346

def __repr__(self):

347

return "%s(%r)" % (self.__class__.__name__,

348

self._transport.abspath(self._name))

349

350

def _buffer_all(self, stream=None):

351

"""Buffer all the index data.

352

353

Mutates self._nodes and self.keys_by_offset.

354

"""

355

if self._nodes is not None:

356

# We already did this

357

return

358

if 'index' in debug.debug_flags:

359

mutter('Reading entire index %s', self._transport.abspath(self._name))

360

if stream is None:

361

stream = self._transport.get(self._name)

362

self._read_prefix(stream)

363

self._expected_elements = 3 + self._key_length

364

line_count = 0

365

# raw data keyed by offset

366

self._keys_by_offset = {}

367

# ready-to-return key:value or key:value, node_ref_lists

368

self._nodes = {}

369

self._nodes_by_key = {}

370

trailers = 0

371

pos = stream.tell()

372

lines = stream.read().split('\n')

373

del lines[-1]

374

_, _, _, trailers = self._parse_lines(lines, pos)

375

for key, absent, references, value in self._keys_by_offset.itervalues():

376

if absent:

377

continue

378

# resolve references:

379

if self.node_ref_lists:

380

node_value = (value, self._resolve_references(references))

381

else:

382

node_value = value

383

self._nodes[key] = node_value

384

if self._key_length > 1:

385

# TODO: We may want to do this lazily, but if we are calling

386

# _buffer_all, we are likely to be doing

387

# iter_entries_prefix

388

key_dict = self._nodes_by_key

389

if self.node_ref_lists:

390

key_value = key, node_value[0], node_value[1]

391

else:

392

key_value = key, node_value

393

# For a key of (foo, bar, baz) create

394

# _nodes_by_key[foo][bar][baz] = key_value

395

for subkey in key[:-1]:

396

key_dict = key_dict.setdefault(subkey, {})

397

key_dict[key[-1]] = key_value

398

# cache the keys for quick set intersections

399

self._keys = set(self._nodes)

400

if trailers != 1:

401

# there must be one line - the empty trailer line.

402

raise errors.BadIndexData(self)

403

404

def iter_all_entries(self):

405

"""Iterate over all keys within the index.

406

407

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

408

The former tuple is used when there are no reference lists in the

409

index, making the API compatible with simple key:value index types.

410

There is no defined order for the result iteration - it will be in

411

the most efficient order for the index.

412

"""

413

if 'evil' in debug.debug_flags:

414

trace.mutter_callsite(3,

415

"iter_all_entries scales with size of history.")

416

if self._nodes is None:

417

self._buffer_all()

418

if self.node_ref_lists:

419

for key, (value, node_ref_lists) in self._nodes.iteritems():

420

yield self, key, value, node_ref_lists

421

else:

422

for key, value in self._nodes.iteritems():

423

yield self, key, value

424

425

def _read_prefix(self, stream):

426

signature = stream.read(len(self._signature()))

427

if not signature == self._signature():

428

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

429

options_line = stream.readline()

430

if not options_line.startswith(_OPTION_NODE_REFS):

431

raise errors.BadIndexOptions(self)

432

try:

433

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])

434

except ValueError:

435

raise errors.BadIndexOptions(self)

436

options_line = stream.readline()

437

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

438

raise errors.BadIndexOptions(self)

439

try:

440

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])

441

except ValueError:

442

raise errors.BadIndexOptions(self)

443

options_line = stream.readline()

444

if not options_line.startswith(_OPTION_LEN):

445

raise errors.BadIndexOptions(self)

446

try:

447

self._key_count = int(options_line[len(_OPTION_LEN):-1])

448

except ValueError:

449

raise errors.BadIndexOptions(self)

450

451

def _resolve_references(self, references):

452

"""Return the resolved key references for references.

453

454

References are resolved by looking up the location of the key in the

455

_keys_by_offset map and substituting the key name, preserving ordering.

456

457

:param references: An iterable of iterables of key locations. e.g.

458

[[123, 456], [123]]

459

:return: A tuple of tuples of keys.

460

"""

461

node_refs = []

462

for ref_list in references:

463

node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))

464

return tuple(node_refs)

465

466

def _find_index(self, range_map, key):

467

"""Helper for the _parsed_*_index calls.

468

469

Given a range map - [(start, end), ...], finds the index of the range

470

in the map for key if it is in the map, and if it is not there, the

471

immediately preceeding range in the map.

472

"""

473

result = bisect_right(range_map, key) - 1

474

if result + 1 < len(range_map):

475

# check the border condition, it may be in result + 1

476

if range_map[result + 1][0] == key[0]:

477

return result + 1

478

return result

479

480

def _parsed_byte_index(self, offset):

481

"""Return the index of the entry immediately before offset.

482

483

e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that

484

there is one unparsed byte (the 11th, addressed as[10]). then:

485

asking for 0 will return 0

486

asking for 10 will return 0

487

asking for 11 will return 1

488

asking for 12 will return 1

489

"""

490

key = (offset, 0)

491

return self._find_index(self._parsed_byte_map, key)

492

493

def _parsed_key_index(self, key):

494

"""Return the index of the entry immediately before key.

495

496

e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,

497

meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive

498

have been parsed, then:

499

asking for '' will return 0

500

asking for 'a' will return 0

501

asking for 'b' will return 1

502

asking for 'e' will return 1

503

"""

504

search_key = (key, None)

505

return self._find_index(self._parsed_key_map, search_key)

506

507

def _is_parsed(self, offset):

508

"""Returns True if offset has been parsed."""

509

index = self._parsed_byte_index(offset)

510

if index == len(self._parsed_byte_map):

511

return offset < self._parsed_byte_map[index - 1][1]

512

start, end = self._parsed_byte_map[index]

513

return offset >= start and offset < end

514

515

def _iter_entries_from_total_buffer(self, keys):

516

"""Iterate over keys when the entire index is parsed."""

517

keys = keys.intersection(self._keys)

518

if self.node_ref_lists:

519

for key in keys:

520

value, node_refs = self._nodes[key]

521

yield self, key, value, node_refs

522

else:

523

for key in keys:

524

yield self, key, self._nodes[key]

525

526

def iter_entries(self, keys):

527

"""Iterate over keys within the index.

528

529

:param keys: An iterable providing the keys to be retrieved.

530

:return: An iterable as per iter_all_entries, but restricted to the

531

keys supplied. No additional keys will be returned, and every

532

key supplied that is in the index will be returned.

533

"""

534

keys = set(keys)

535

if not keys:

536

return []

537

if self._size is None and self._nodes is None:

538

self._buffer_all()

539

540

# We fit about 20 keys per minimum-read (4K), so if we are looking for

541

# more than 1/20th of the index its likely (assuming homogenous key

542

# spread) that we'll read the entire index. If we're going to do that,

543

# buffer the whole thing. A better analysis might take key spread into

544

# account - but B+Tree indices are better anyway.

545

# We could look at all data read, and use a threshold there, which will

546

# trigger on ancestry walks, but that is not yet fully mapped out.

547

if self._nodes is None and len(keys) * 20 > self.key_count():

548

self._buffer_all()

549

if self._nodes is not None:

550

return self._iter_entries_from_total_buffer(keys)

551

else:

552

return (result[1] for result in bisect_multi_bytes(

553

self._lookup_keys_via_location, self._size, keys))

554

555

def iter_entries_prefix(self, keys):

556

"""Iterate over keys within the index using prefix matching.

557

558

Prefix matching is applied within the tuple of a key, not to within

559

the bytestring of each key element. e.g. if you have the keys ('foo',

560

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

561

only the former key is returned.

562

563

WARNING: Note that this method currently causes a full index parse

564

unconditionally (which is reasonably appropriate as it is a means for

565

thunking many small indices into one larger one and still supplies

566

iter_all_entries at the thunk layer).

567

568

:param keys: An iterable providing the key prefixes to be retrieved.

569

Each key prefix takes the form of a tuple the length of a key, but

570

with the last N elements 'None' rather than a regular bytestring.

571

The first element cannot be 'None'.

572

:return: An iterable as per iter_all_entries, but restricted to the

573

keys with a matching prefix to those supplied. No additional keys

574

will be returned, and every match that is in the index will be

575

returned.

576

"""

577

keys = set(keys)

578

if not keys:

579

return

580

# load data - also finds key lengths

581

if self._nodes is None:

582

self._buffer_all()

583

if self._key_length == 1:

584

for key in keys:

585

# sanity check

586

if key[0] is None:

587

raise errors.BadIndexKey(key)

588

if len(key) != self._key_length:

589

raise errors.BadIndexKey(key)

590

if self.node_ref_lists:

591

value, node_refs = self._nodes[key]

592

yield self, key, value, node_refs

593

else:

594

yield self, key, self._nodes[key]

595

return

596

for key in keys:

597

# sanity check

598

if key[0] is None:

599

raise errors.BadIndexKey(key)

600

if len(key) != self._key_length:

601

raise errors.BadIndexKey(key)

602

# find what it refers to:

603

key_dict = self._nodes_by_key

604

elements = list(key)

605

# find the subdict whose contents should be returned.

606

try:

607

while len(elements) and elements[0] is not None:

608

key_dict = key_dict[elements[0]]

609

elements.pop(0)

610

except KeyError:

611

# a non-existant lookup.

612

continue

613

if len(elements):

614

dicts = [key_dict]

615

while dicts:

616

key_dict = dicts.pop(-1)

617

# can't be empty or would not exist

618

item, value = key_dict.iteritems().next()

619

if type(value) == dict:

620

# push keys

621

dicts.extend(key_dict.itervalues())

622

else:

623

# yield keys

624

for value in key_dict.itervalues():

625

# each value is the key:value:node refs tuple

626

# ready to yield.

627

yield (self, ) + value

628

else:

629

# the last thing looked up was a terminal element

630

yield (self, ) + key_dict

631

632

def key_count(self):

633

"""Return an estimate of the number of keys in this index.

634

635

For GraphIndex the estimate is exact.

636

"""

637

if self._key_count is None:

638

self._read_and_parse([_HEADER_READV])

639

return self._key_count

640

641

def _lookup_keys_via_location(self, location_keys):

642

"""Public interface for implementing bisection.

643

644

If _buffer_all has been called, then all the data for the index is in

645

memory, and this method should not be called, as it uses a separate

646

cache because it cannot pre-resolve all indices, which buffer_all does

647

for performance.

648

649

:param location_keys: A list of location(byte offset), key tuples.

650

:return: A list of (location_key, result) tuples as expected by

651

bzrlib.bisect_multi.bisect_multi_bytes.

652

"""

653

# Possible improvements:

654

# - only bisect lookup each key once

655

# - sort the keys first, and use that to reduce the bisection window

656

# -----

657

# this progresses in three parts:

658

# read data

659

# parse it

660

# attempt to answer the question from the now in memory data.

661

# build the readv request

662

# for each location, ask for 800 bytes - much more than rows we've seen

663

# anywhere.

664

readv_ranges = []

665

for location, key in location_keys:

666

# can we answer from cache?

667

if self._bisect_nodes and key in self._bisect_nodes:

668

# We have the key parsed.

669

continue

670

index = self._parsed_key_index(key)

671

if (len(self._parsed_key_map) and

672

self._parsed_key_map[index][0] <= key and

673

(self._parsed_key_map[index][1] >= key or

674

# end of the file has been parsed

675

self._parsed_byte_map[index][1] == self._size)):

676

# the key has been parsed, so no lookup is needed even if its

677

# not present.

678

continue

679

# - if we have examined this part of the file already - yes

680

index = self._parsed_byte_index(location)

681

if (len(self._parsed_byte_map) and

682

self._parsed_byte_map[index][0] <= location and

683

self._parsed_byte_map[index][1] > location):

684

# the byte region has been parsed, so no read is needed.

685

continue

686

length = 800

687

if location + length > self._size:

688

length = self._size - location

689

# todo, trim out parsed locations.

690

if length > 0:

691

readv_ranges.append((location, length))

692

# read the header if needed

693

if self._bisect_nodes is None:

694

readv_ranges.append(_HEADER_READV)

695

self._read_and_parse(readv_ranges)

696

result = []

697

if self._nodes is not None:

698

# _read_and_parse triggered a _buffer_all because we requested the

699

# whole data range

700

for location, key in location_keys:

701

if key not in self._nodes: # not present

702

result.append(((location, key), False))

703

elif self.node_ref_lists:

704

value, refs = self._nodes[key]

705

result.append(((location, key),

706

(self, key, value, refs)))

707

else:

708

result.append(((location, key),

709

(self, key, self._nodes[key])))

710

return result

711

# generate results:

712

# - figure out <, >, missing, present

713

# - result present references so we can return them.

714

# keys that we cannot answer until we resolve references

715

pending_references = []

716

pending_locations = set()

717

for location, key in location_keys:

718

# can we answer from cache?

719

if key in self._bisect_nodes:

720

# the key has been parsed, so no lookup is needed

721

if self.node_ref_lists:

722

# the references may not have been all parsed.

723

value, refs = self._bisect_nodes[key]

724

wanted_locations = []

725

for ref_list in refs:

726

for ref in ref_list:

727

if ref not in self._keys_by_offset:

728

wanted_locations.append(ref)

729

if wanted_locations:

730

pending_locations.update(wanted_locations)

731

pending_references.append((location, key))

732

continue

733

result.append(((location, key), (self, key,

734

value, self._resolve_references(refs))))

735

else:

736

result.append(((location, key),

737

(self, key, self._bisect_nodes[key])))

738

continue

739

else:

740

# has the region the key should be in, been parsed?

741

index = self._parsed_key_index(key)

742

if (self._parsed_key_map[index][0] <= key and

743

(self._parsed_key_map[index][1] >= key or

744

# end of the file has been parsed

745

self._parsed_byte_map[index][1] == self._size)):

746

result.append(((location, key), False))

747

continue

748

# no, is the key above or below the probed location:

749

# get the range of the probed & parsed location

750

index = self._parsed_byte_index(location)

751

# if the key is below the start of the range, its below

752

if key < self._parsed_key_map[index][0]:

753

direction = -1

754

else:

755

direction = +1

756

result.append(((location, key), direction))

757

readv_ranges = []

758

# lookup data to resolve references

759

for location in pending_locations:

760

length = 800

761

if location + length > self._size:

762

length = self._size - location

763

# TODO: trim out parsed locations (e.g. if the 800 is into the

764

# parsed region trim it, and dont use the adjust_for_latency

765

# facility)

766

if length > 0:

767

readv_ranges.append((location, length))

768

self._read_and_parse(readv_ranges)

769

if self._nodes is not None:

770

# The _read_and_parse triggered a _buffer_all, grab the data and

771

# return it

772

for location, key in pending_references:

773

value, refs = self._nodes[key]

774

result.append(((location, key), (self, key, value, refs)))

775

return result

776

for location, key in pending_references:

777

# answer key references we had to look-up-late.

778

value, refs = self._bisect_nodes[key]

779

result.append(((location, key), (self, key,

780

value, self._resolve_references(refs))))

781

return result

782

783

def _parse_header_from_bytes(self, bytes):

784

"""Parse the header from a region of bytes.

785

786

:param bytes: The data to parse.

787

:return: An offset, data tuple such as readv yields, for the unparsed

788

data. (which may length 0).

789

"""

790

signature = bytes[0:len(self._signature())]

791

if not signature == self._signature():

792

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

793

lines = bytes[len(self._signature()):].splitlines()

794

options_line = lines[0]

795

if not options_line.startswith(_OPTION_NODE_REFS):

796

raise errors.BadIndexOptions(self)

797

try:

798

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

799

except ValueError:

800

raise errors.BadIndexOptions(self)

801

options_line = lines[1]

802

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

803

raise errors.BadIndexOptions(self)

804

try:

805

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

806

except ValueError:

807

raise errors.BadIndexOptions(self)

808

options_line = lines[2]

809

if not options_line.startswith(_OPTION_LEN):

810

raise errors.BadIndexOptions(self)

811

try:

812

self._key_count = int(options_line[len(_OPTION_LEN):])

813

except ValueError:

814

raise errors.BadIndexOptions(self)

815

# calculate the bytes we have processed

816

header_end = (len(signature) + len(lines[0]) + len(lines[1]) +

817

len(lines[2]) + 3)

818

self._parsed_bytes(0, None, header_end, None)

819

# setup parsing state

820

self._expected_elements = 3 + self._key_length

821

# raw data keyed by offset

822

self._keys_by_offset = {}

823

# keys with the value and node references

824

self._bisect_nodes = {}

825

return header_end, bytes[header_end:]

826

827

def _parse_region(self, offset, data):

828

"""Parse node data returned from a readv operation.

829

830

:param offset: The byte offset the data starts at.

831

:param data: The data to parse.

832

"""

833

# trim the data.

834

# end first:

835

end = offset + len(data)

836

high_parsed = offset

837

while True:

838

# Trivial test - if the current index's end is within the

839

# low-matching parsed range, we're done.

840

index = self._parsed_byte_index(high_parsed)

841

if end < self._parsed_byte_map[index][1]:

842

return

843

# print "[%d:%d]" % (offset, end), \

844

# self._parsed_byte_map[index:index + 2]

845

high_parsed, last_segment = self._parse_segment(

846

offset, data, end, index)

847

if last_segment:

848

return

849

850

def _parse_segment(self, offset, data, end, index):

851

"""Parse one segment of data.

852

853

:param offset: Where 'data' begins in the file.

854

:param data: Some data to parse a segment of.

855

:param end: Where data ends

856

:param index: The current index into the parsed bytes map.

857

:return: True if the parsed segment is the last possible one in the

858

range of data.

859

:return: high_parsed_byte, last_segment.

860

high_parsed_byte is the location of the highest parsed byte in this

861

segment, last_segment is True if the parsed segment is the last

862

possible one in the data block.

863

"""

864

# default is to use all data

865

trim_end = None

866

# accomodate overlap with data before this.

867

if offset < self._parsed_byte_map[index][1]:

868

# overlaps the lower parsed region

869

# skip the parsed data

870

trim_start = self._parsed_byte_map[index][1] - offset

871

# don't trim the start for \n

872

start_adjacent = True

873

elif offset == self._parsed_byte_map[index][1]:

874

# abuts the lower parsed region

875

# use all data

876

trim_start = None

877

# do not trim anything

878

start_adjacent = True

879

else:

880

# does not overlap the lower parsed region

881

# use all data

882

trim_start = None

883

# but trim the leading \n

884

start_adjacent = False

885

if end == self._size:

886

# lines up to the end of all data:

887

# use it all

888

trim_end = None

889

# do not strip to the last \n

890

end_adjacent = True

891

last_segment = True

892

elif index + 1 == len(self._parsed_byte_map):

893

# at the end of the parsed data

894

# use it all

895

trim_end = None

896

# but strip to the last \n

897

end_adjacent = False

898

last_segment = True

899

elif end == self._parsed_byte_map[index + 1][0]:

900

# buts up against the next parsed region

901

# use it all

902

trim_end = None

903

# do not strip to the last \n

904

end_adjacent = True

905

last_segment = True

906

elif end > self._parsed_byte_map[index + 1][0]:

907

# overlaps into the next parsed region

908

# only consider the unparsed data

909

trim_end = self._parsed_byte_map[index + 1][0] - offset

910

# do not strip to the last \n as we know its an entire record

911

end_adjacent = True

912

last_segment = end < self._parsed_byte_map[index + 1][1]

913

else:

914

# does not overlap into the next region

915

# use it all

916

trim_end = None

917

# but strip to the last \n

918

end_adjacent = False

919

last_segment = True

920

# now find bytes to discard if needed

921

if not start_adjacent:

922

# work around python bug in rfind

923

if trim_start is None:

924

trim_start = data.find('\n') + 1

925

else:

926

trim_start = data.find('\n', trim_start) + 1

927

if not (trim_start != 0):

928

raise AssertionError('no \n was present')

929

# print 'removing start', offset, trim_start, repr(data[:trim_start])

930

if not end_adjacent:

931

# work around python bug in rfind

932

if trim_end is None:

933

trim_end = data.rfind('\n') + 1

934

else:

935

trim_end = data.rfind('\n', None, trim_end) + 1

936

if not (trim_end != 0):

937

raise AssertionError('no \n was present')

938

# print 'removing end', offset, trim_end, repr(data[trim_end:])

939

# adjust offset and data to the parseable data.

940

trimmed_data = data[trim_start:trim_end]

941

if not (trimmed_data):

942

raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'

943

% (trim_start, trim_end, offset, offset + len(data)))

944

if trim_start:

945

offset += trim_start

946

# print "parsing", repr(trimmed_data)

947

# splitlines mangles the \r delimiters.. don't use it.

948

lines = trimmed_data.split('\n')

949

del lines[-1]

950

pos = offset

951

first_key, last_key, nodes, _ = self._parse_lines(lines, pos)

952

for key, value in nodes:

953

self._bisect_nodes[key] = value

954

self._parsed_bytes(offset, first_key,

955

offset + len(trimmed_data), last_key)

956

return offset + len(trimmed_data), last_segment

957

958

def _parse_lines(self, lines, pos):

959

key = None

960

first_key = None

961

trailers = 0

962

nodes = []

963

for line in lines:

964

if line == '':

965

# must be at the end

966

if self._size:

967

if not (self._size == pos + 1):

968

raise AssertionError("%s %s" % (self._size, pos))

969

trailers += 1

970

continue

971

elements = line.split('\0')

972

if len(elements) != self._expected_elements:

973

raise errors.BadIndexData(self)

974

# keys are tuples. Each element is a string that may occur many

975

# times, so we intern them to save space. AB, RC, 200807

976

key = tuple(intern(element) for element in elements[:self._key_length])

977

if first_key is None:

978

first_key = key

979

absent, references, value = elements[-3:]

980

ref_lists = []

981

for ref_string in references.split('\t'):

982

ref_lists.append(tuple([

983

int(ref) for ref in ref_string.split('\r') if ref

984

]))

985

ref_lists = tuple(ref_lists)

986

self._keys_by_offset[pos] = (key, absent, ref_lists, value)

987

pos += len(line) + 1 # +1 for the \n

988

if absent:

989

continue

990

if self.node_ref_lists:

991

node_value = (value, ref_lists)

992

else:

993

node_value = value

994

nodes.append((key, node_value))

995

# print "parsed ", key

996

return first_key, key, nodes, trailers

997

998

def _parsed_bytes(self, start, start_key, end, end_key):

999

"""Mark the bytes from start to end as parsed.

1000

1001

Calling self._parsed_bytes(1,2) will mark one byte (the one at offset

1002

1) as parsed.

1003

1004

:param start: The start of the parsed region.

1005

:param end: The end of the parsed region.

1006

"""

1007

index = self._parsed_byte_index(start)

1008

new_value = (start, end)

1009

new_key = (start_key, end_key)

1010

if index == -1:

1011

# first range parsed is always the beginning.

1012

self._parsed_byte_map.insert(index, new_value)

1013

self._parsed_key_map.insert(index, new_key)

1014

return

1015

# four cases:

1016

# new region

1017

# extend lower region

1018

# extend higher region

1019

# combine two regions

1020

if (index + 1 < len(self._parsed_byte_map) and

1021

self._parsed_byte_map[index][1] == start and

1022

self._parsed_byte_map[index + 1][0] == end):

1023

# combine two regions

1024

self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],

1025

self._parsed_byte_map[index + 1][1])

1026

self._parsed_key_map[index] = (self._parsed_key_map[index][0],

1027

self._parsed_key_map[index + 1][1])

1028

del self._parsed_byte_map[index + 1]

1029

del self._parsed_key_map[index + 1]

1030

elif self._parsed_byte_map[index][1] == start:

1031

# extend the lower entry

1032

self._parsed_byte_map[index] = (

1033

self._parsed_byte_map[index][0], end)

1034

self._parsed_key_map[index] = (

1035

self._parsed_key_map[index][0], end_key)

1036

elif (index + 1 < len(self._parsed_byte_map) and

1037

self._parsed_byte_map[index + 1][0] == end):

1038

# extend the higher entry

1039

self._parsed_byte_map[index + 1] = (

1040

start, self._parsed_byte_map[index + 1][1])

1041

self._parsed_key_map[index + 1] = (

1042

start_key, self._parsed_key_map[index + 1][1])

1043

else:

1044

# new entry

1045

self._parsed_byte_map.insert(index + 1, new_value)

1046

self._parsed_key_map.insert(index + 1, new_key)

1047

1048

def _read_and_parse(self, readv_ranges):

1049

"""Read the the ranges and parse the resulting data.

1050

1051

:param readv_ranges: A prepared readv range list.

1052

"""

1053

if not readv_ranges:

1054

return

1055

if self._nodes is None and self._bytes_read * 2 >= self._size:

1056

# We've already read more than 50% of the file and we are about to

1057

# request more data, just _buffer_all() and be done

1058

self._buffer_all()

1059

return

1060

1061

readv_data = self._transport.readv(self._name, readv_ranges, True,

1062

self._size)

1063

# parse

1064

for offset, data in readv_data:

1065

self._bytes_read += len(data)

1066

if offset == 0 and len(data) == self._size:

1067

# We read the whole range, most likely because the

1068

# Transport upcast our readv ranges into one long request

1069

# for enough total data to grab the whole index.

1070

self._buffer_all(StringIO(data))

1071

return

1072

if self._bisect_nodes is None:

1073

# this must be the start

1074

if not (offset == 0):

1075

raise AssertionError()

1076

offset, data = self._parse_header_from_bytes(data)

1077

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

1078

self._parse_region(offset, data)

1079

1080

def _signature(self):

1081

"""The file signature for this index type."""

1082

return _SIGNATURE

1083

1084

def validate(self):

1085

"""Validate that everything in the index can be accessed."""

1086

# iter_all validates completely at the moment, so just do that.

1087

for node in self.iter_all_entries():

1088

pass

1089

1090

1091

class CombinedGraphIndex(object):

1092

"""A GraphIndex made up from smaller GraphIndices.

1093

1094

The backing indices must implement GraphIndex, and are presumed to be

1095

static data.

1096

1097

Queries against the combined index will be made against the first index,

1098

and then the second and so on. The order of index's can thus influence

1099

performance significantly. For example, if one index is on local disk and a

1100

second on a remote server, the local disk index should be before the other

1101

in the index list.

1102

"""

1103

1104

def __init__(self, indices):

1105

"""Create a CombinedGraphIndex backed by indices.

1106

1107

:param indices: An ordered list of indices to query for data.

1108

"""

1109

self._indices = indices

1110

1111

def __repr__(self):

1112

return "%s(%s)" % (

1113

self.__class__.__name__,

1114

', '.join(map(repr, self._indices)))

1115

1116

@symbol_versioning.deprecated_method(symbol_versioning.one_one)

1117

def get_parents(self, revision_ids):

1118

"""See graph._StackedParentsProvider.get_parents.

1119

1120

This implementation thunks the graph.Graph.get_parents api across to

1121

GraphIndex.

1122

1123

:param revision_ids: An iterable of graph keys for this graph.

1124

:return: A list of parent details for each key in revision_ids.

1125

Each parent details will be one of:

1126

* None when the key was missing

1127

* (NULL_REVISION,) when the key has no parents.

1128

* (parent_key, parent_key...) otherwise.

1129

"""

1130

parent_map = self.get_parent_map(revision_ids)

1131

return [parent_map.get(r, None) for r in revision_ids]

1132

1133

def get_parent_map(self, keys):

1134

"""See graph._StackedParentsProvider.get_parent_map"""

1135

search_keys = set(keys)

1136

if NULL_REVISION in search_keys:

1137

search_keys.discard(NULL_REVISION)

1138

found_parents = {NULL_REVISION:[]}

1139

else:

1140

found_parents = {}

1141

for index, key, value, refs in self.iter_entries(search_keys):

1142

parents = refs[0]

1143

if not parents:

1144

parents = (NULL_REVISION,)

1145

found_parents[key] = parents

1146

return found_parents

1147

1148

def insert_index(self, pos, index):

1149

"""Insert a new index in the list of indices to query.

1150

1151

:param pos: The position to insert the index.

1152

:param index: The index to insert.

1153

"""

1154

self._indices.insert(pos, index)

1155

1156

def iter_all_entries(self):

1157

"""Iterate over all keys within the index

1158

1159

Duplicate keys across child indices are presumed to have the same

1160

value and are only reported once.

1161

1162

:return: An iterable of (index, key, reference_lists, value).

1163

There is no defined order for the result iteration - it will be in

1164

the most efficient order for the index.

1165

"""

1166

seen_keys = set()

1167

for index in self._indices:

1168

for node in index.iter_all_entries():

1169

if node[1] not in seen_keys:

1170

yield node

1171

seen_keys.add(node[1])

1172

1173

def iter_entries(self, keys):

1174

"""Iterate over keys within the index.

1175

1176

Duplicate keys across child indices are presumed to have the same

1177

value and are only reported once.

1178

1179

:param keys: An iterable providing the keys to be retrieved.

1180

:return: An iterable of (index, key, reference_lists, value). There is no

1181

defined order for the result iteration - it will be in the most

1182

efficient order for the index.

1183

"""

1184

keys = set(keys)

1185

for index in self._indices:

1186

if not keys:

1187

return

1188

for node in index.iter_entries(keys):

1189

keys.remove(node[1])

1190

yield node

1191

1192

def iter_entries_prefix(self, keys):

1193

"""Iterate over keys within the index using prefix matching.

1194

1195

Duplicate keys across child indices are presumed to have the same

1196

value and are only reported once.

1197

1198

Prefix matching is applied within the tuple of a key, not to within

1199

the bytestring of each key element. e.g. if you have the keys ('foo',

1200

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1201

only the former key is returned.

1202

1203

:param keys: An iterable providing the key prefixes to be retrieved.

1204

Each key prefix takes the form of a tuple the length of a key, but

1205

with the last N elements 'None' rather than a regular bytestring.

1206

The first element cannot be 'None'.

1207

:return: An iterable as per iter_all_entries, but restricted to the

1208

keys with a matching prefix to those supplied. No additional keys

1209

will be returned, and every match that is in the index will be

1210

returned.

1211

"""

1212

keys = set(keys)

1213

if not keys:

1214

return

1215

seen_keys = set()

1216

for index in self._indices:

1217

for node in index.iter_entries_prefix(keys):

1218

if node[1] in seen_keys:

1219

continue

1220

seen_keys.add(node[1])

1221

yield node

1222

1223

def key_count(self):

1224

"""Return an estimate of the number of keys in this index.

1225

1226

For CombinedGraphIndex this is approximated by the sum of the keys of

1227

the child indices. As child indices may have duplicate keys this can

1228

have a maximum error of the number of child indices * largest number of

1229

keys in any index.

1230

"""

1231

return sum((index.key_count() for index in self._indices), 0)

1232

1233

def validate(self):

1234

"""Validate that everything in the index can be accessed."""

1235

for index in self._indices:

1236

index.validate()

1237

1238

1239

class InMemoryGraphIndex(GraphIndexBuilder):

1240

"""A GraphIndex which operates entirely out of memory and is mutable.

1241

1242

This is designed to allow the accumulation of GraphIndex entries during a

1243

single write operation, where the accumulated entries need to be immediately

1244

available - for example via a CombinedGraphIndex.

1245

"""

1246

1247

def add_nodes(self, nodes):

1248

"""Add nodes to the index.

1249

1250

:param nodes: An iterable of (key, node_refs, value) entries to add.

1251

"""

1252

if self.reference_lists:

1253

for (key, value, node_refs) in nodes:

1254

self.add_node(key, value, node_refs)

1255

else:

1256

for (key, value) in nodes:

1257

self.add_node(key, value)

1258

1259

def iter_all_entries(self):

1260

"""Iterate over all keys within the index

1261

1262

:return: An iterable of (index, key, reference_lists, value). There is no

1263

defined order for the result iteration - it will be in the most

1264

efficient order for the index (in this case dictionary hash order).

1265

"""

1266

if 'evil' in debug.debug_flags:

1267

trace.mutter_callsite(3,

1268

"iter_all_entries scales with size of history.")

1269

if self.reference_lists:

1270

for key, (absent, references, value) in self._nodes.iteritems():

1271

if not absent:

1272

yield self, key, value, references

1273

else:

1274

for key, (absent, references, value) in self._nodes.iteritems():

1275

if not absent:

1276

yield self, key, value

1277

1278

def iter_entries(self, keys):

1279

"""Iterate over keys within the index.

1280

1281

:param keys: An iterable providing the keys to be retrieved.

1282

:return: An iterable of (index, key, value, reference_lists). There is no

1283

defined order for the result iteration - it will be in the most

1284

efficient order for the index (keys iteration order in this case).

1285

"""

1286

keys = set(keys)

1287

if self.reference_lists:

1288

for key in keys.intersection(self._keys):

1289

node = self._nodes[key]

1290

if not node[0]:

1291

yield self, key, node[2], node[1]

1292

else:

1293

for key in keys.intersection(self._keys):

1294

node = self._nodes[key]

1295

if not node[0]:

1296

yield self, key, node[2]

1297

1298

def iter_entries_prefix(self, keys):

1299

"""Iterate over keys within the index using prefix matching.

1300

1301

Prefix matching is applied within the tuple of a key, not to within

1302

the bytestring of each key element. e.g. if you have the keys ('foo',

1303

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1304

only the former key is returned.

1305

1306

:param keys: An iterable providing the key prefixes to be retrieved.

1307

Each key prefix takes the form of a tuple the length of a key, but

1308

with the last N elements 'None' rather than a regular bytestring.

1309

The first element cannot be 'None'.

1310

:return: An iterable as per iter_all_entries, but restricted to the

1311

keys with a matching prefix to those supplied. No additional keys

1312

will be returned, and every match that is in the index will be

1313

returned.

1314

"""

1315

# XXX: To much duplication with the GraphIndex class; consider finding

1316

# a good place to pull out the actual common logic.

1317

keys = set(keys)

1318

if not keys:

1319

return

1320

if self._key_length == 1:

1321

for key in keys:

1322

# sanity check

1323

if key[0] is None:

1324

raise errors.BadIndexKey(key)

1325

if len(key) != self._key_length:

1326

raise errors.BadIndexKey(key)

1327

node = self._nodes[key]

1328

if node[0]:

1329

continue

1330

if self.reference_lists:

1331

yield self, key, node[2], node[1]

1332

else:

1333

yield self, key, node[2]

1334

return

1335

nodes_by_key = self._get_nodes_by_key()

1336

for key in keys:

1337

# sanity check

1338

if key[0] is None:

1339

raise errors.BadIndexKey(key)

1340

if len(key) != self._key_length:

1341

raise errors.BadIndexKey(key)

1342

# find what it refers to:

1343

key_dict = nodes_by_key

1344

elements = list(key)

1345

# find the subdict to return

1346

try:

1347

while len(elements) and elements[0] is not None:

1348

key_dict = key_dict[elements[0]]

1349

elements.pop(0)

1350

except KeyError:

1351

# a non-existant lookup.

1352

continue

1353

if len(elements):

1354

dicts = [key_dict]

1355

while dicts:

1356

key_dict = dicts.pop(-1)

1357

# can't be empty or would not exist

1358

item, value = key_dict.iteritems().next()

1359

if type(value) == dict:

1360

# push keys

1361

dicts.extend(key_dict.itervalues())

1362

else:

1363

# yield keys

1364

for value in key_dict.itervalues():

1365

yield (self, ) + value

1366

else:

1367

yield (self, ) + key_dict

1368

1369

def key_count(self):

1370

"""Return an estimate of the number of keys in this index.

1371

1372

For InMemoryGraphIndex the estimate is exact.

1373

"""

1374

return len(self._keys)

1375

1376

def validate(self):

1377

"""In memory index's have no known corruption at the moment."""

1378

1379

1380

class GraphIndexPrefixAdapter(object):

1381

"""An adapter between GraphIndex with different key lengths.

1382

1383

Queries against this will emit queries against the adapted Graph with the

1384

prefix added, queries for all items use iter_entries_prefix. The returned

1385

nodes will have their keys and node references adjusted to remove the

1386

prefix. Finally, an add_nodes_callback can be supplied - when called the

1387

nodes and references being added will have prefix prepended.

1388

"""

1389

1390

def __init__(self, adapted, prefix, missing_key_length,

1391

add_nodes_callback=None):

1392

"""Construct an adapter against adapted with prefix."""

1393

self.adapted = adapted

1394

self.prefix_key = prefix + (None,)*missing_key_length

1395

self.prefix = prefix

1396

self.prefix_len = len(prefix)

1397

self.add_nodes_callback = add_nodes_callback

1398

1399

def add_nodes(self, nodes):

1400

"""Add nodes to the index.

1401

1402

:param nodes: An iterable of (key, node_refs, value) entries to add.

1403

"""

1404

# save nodes in case its an iterator

1405

nodes = tuple(nodes)

1406

translated_nodes = []

1407

try:

1408

# Add prefix_key to each reference node_refs is a tuple of tuples,

1409

# so split it apart, and add prefix_key to the internal reference

1410

for (key, value, node_refs) in nodes:

1411

adjusted_references = (

1412

tuple(tuple(self.prefix + ref_node for ref_node in ref_list)

1413

for ref_list in node_refs))

1414

translated_nodes.append((self.prefix + key, value,

1415

adjusted_references))

1416

except ValueError:

1417

# XXX: TODO add an explicit interface for getting the reference list

1418

# status, to handle this bit of user-friendliness in the API more

1419

# explicitly.

1420

for (key, value) in nodes:

1421

translated_nodes.append((self.prefix + key, value))

1422

self.add_nodes_callback(translated_nodes)

1423

1424

def add_node(self, key, value, references=()):

1425

"""Add a node to the index.

1426

1427

:param key: The key. keys are non-empty tuples containing

1428

as many whitespace-free utf8 bytestrings as the key length

1429

defined for this index.

1430

:param references: An iterable of iterables of keys. Each is a

1431

reference to another key.

1432

:param value: The value to associate with the key. It may be any

1433

bytes as long as it does not contain \0 or \n.

1434

"""

1435

self.add_nodes(((key, value, references), ))

1436

1437

def _strip_prefix(self, an_iter):

1438

"""Strip prefix data from nodes and return it."""

1439

for node in an_iter:

1440

# cross checks

1441

if node[1][:self.prefix_len] != self.prefix:

1442

raise errors.BadIndexData(self)

1443

for ref_list in node[3]:

1444

for ref_node in ref_list:

1445

if ref_node[:self.prefix_len] != self.prefix:

1446

raise errors.BadIndexData(self)

1447

yield node[0], node[1][self.prefix_len:], node[2], (

1448

tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)

1449

for ref_list in node[3]))

1450

1451

def iter_all_entries(self):

1452

"""Iterate over all keys within the index

1453

1454

iter_all_entries is implemented against the adapted index using

1455

iter_entries_prefix.

1456

1457

:return: An iterable of (index, key, reference_lists, value). There is no

1458

defined order for the result iteration - it will be in the most

1459

efficient order for the index (in this case dictionary hash order).

1460

"""

1461

return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))

1462

1463

def iter_entries(self, keys):

1464

"""Iterate over keys within the index.

1465

1466

:param keys: An iterable providing the keys to be retrieved.

1467

:return: An iterable of (index, key, value, reference_lists). There is no

1468

defined order for the result iteration - it will be in the most

1469

efficient order for the index (keys iteration order in this case).

1470

"""

1471

return self._strip_prefix(self.adapted.iter_entries(

1472

self.prefix + key for key in keys))

1473

1474

def iter_entries_prefix(self, keys):

1475

"""Iterate over keys within the index using prefix matching.

1476

1477

Prefix matching is applied within the tuple of a key, not to within

1478

the bytestring of each key element. e.g. if you have the keys ('foo',

1479

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1480

only the former key is returned.

1481

1482

:param keys: An iterable providing the key prefixes to be retrieved.

1483

Each key prefix takes the form of a tuple the length of a key, but

1484

with the last N elements 'None' rather than a regular bytestring.

1485

The first element cannot be 'None'.

1486

:return: An iterable as per iter_all_entries, but restricted to the

1487

keys with a matching prefix to those supplied. No additional keys

1488

will be returned, and every match that is in the index will be

1489

returned.

1490

"""

1491

return self._strip_prefix(self.adapted.iter_entries_prefix(

1492

self.prefix + key for key in keys))

1493

1494

def key_count(self):

1495

"""Return an estimate of the number of keys in this index.

1496

1497

For GraphIndexPrefixAdapter this is relatively expensive - key

1498

iteration with the prefix is done.

1499

"""

1500

return len(list(self.iter_all_entries()))

1501

1502

def validate(self):

1503

"""Call the adapted's validate."""

1504

self.adapted.validate()

Older »