/brz/remove-bazaar : revision 2916.2.13

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/index.py

Committer: Andrew Bennetts
Date: 2007-10-29 08:34:38 UTC
mto: (2535.4.22 streaming-smart-fetch)
mto: This revision was merged to the branch mainline in revision 2981.
Revision ID: andrew.bennetts@canonical.com-20071029083438-ke1vsv97dvgrvup5

Improve some docstrings.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Indexing facilities."""

__all__ = [

'CombinedGraphIndex',

'GraphIndex',

'GraphIndexBuilder',

'GraphIndexPrefixAdapter',

'InMemoryGraphIndex',

]

from bisect import bisect_right

from cStringIO import StringIO

import re

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import trace

from bzrlib.bisect_multi import bisect_multi_bytes

from bzrlib.trace import mutter

""")

from bzrlib import debug, errors

_OPTION_KEY_ELEMENTS = "key_elements="

_OPTION_LEN = "len="

_OPTION_NODE_REFS = "node_ref_lists="

_SIGNATURE = "Bazaar Graph Index 1\n"

_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')

_newline_null_re = re.compile('[\n\0]')

class GraphIndexBuilder(object):

"""A builder that can build a GraphIndex.

The resulting graph has the structure:

_SIGNATURE OPTIONS NODES NEWLINE

_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE

OPTIONS := 'node_ref_lists=' DIGITS NEWLINE

NODES := NODE*

NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

KEY := Not-whitespace-utf8

ABSENT := 'a'

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

REFERENCE := DIGITS ; digits is the byte offset in the index of the

; referenced key.

VALUE := no-newline-no-null-bytes

"""

def __init__(self, reference_lists=0, key_elements=1):

"""Create a GraphIndex builder.

:param reference_lists: The number of node references lists for each

entry.

:param key_elements: The number of bytestrings in each key.

"""

self.reference_lists = reference_lists

self._keys = set()

self._nodes = {}

self._nodes_by_key = {}

self._key_length = key_elements

def _check_key(self, key):

"""Raise BadIndexKey if key is not a valid key for this index."""

if type(key) != tuple:

raise errors.BadIndexKey(key)

if self._key_length != len(key):

raise errors.BadIndexKey(key)

for element in key:

if not element or _whitespace_re.search(element) is not None:

raise errors.BadIndexKey(element)

def add_node(self, key, value, references=()):

"""Add a node to the index.

:param key: The key. keys are non-empty tuples containing

as many whitespace-free utf8 bytestrings as the key length

defined for this index.

:param references: An iterable of iterables of keys. Each is a

reference to another key.

:param value: The value to associate with the key. It may be any

100

bytes as long as it does not contain \0 or \n.

101

"""

102

self._check_key(key)

103

if _newline_null_re.search(value) is not None:

104

raise errors.BadIndexValue(value)

105

if len(references) != self.reference_lists:

106

raise errors.BadIndexValue(references)

107

node_refs = []

108

for reference_list in references:

109

for reference in reference_list:

110

self._check_key(reference)

111

if reference not in self._nodes:

112

self._nodes[reference] = ('a', (), '')

113

node_refs.append(tuple(reference_list))

114

if key in self._nodes and self._nodes[key][0] == '':

115

raise errors.BadIndexDuplicateKey(key, self)

116

self._nodes[key] = ('', tuple(node_refs), value)

117

self._keys.add(key)

118

if self._key_length > 1:

119

key_dict = self._nodes_by_key

120

if self.reference_lists:

121

key_value = key, value, tuple(node_refs)

122

else:

123

key_value = key, value

124

# possibly should do this on-demand, but it seems likely it is

125

# always wanted

126

# For a key of (foo, bar, baz) create

127

# _nodes_by_key[foo][bar][baz] = key_value

128

for subkey in key[:-1]:

129

key_dict = key_dict.setdefault(subkey, {})

130

key_dict[key[-1]] = key_value

131

132

def finish(self):

133

lines = [_SIGNATURE]

134

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

135

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

136

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

137

prefix_length = sum(len(x) for x in lines)

138

# references are byte offsets. To avoid having to do nasty

139

# polynomial work to resolve offsets (references to later in the

140

# file cannot be determined until all the inbetween references have

141

# been calculated too) we pad the offsets with 0's to make them be

142

# of consistent length. Using binary offsets would break the trivial

143

# file parsing.

144

# to calculate the width of zero's needed we do three passes:

145

# one to gather all the non-reference data and the number of references.

146

# one to pad all the data with reference-length and determine entry

147

# addresses.

148

# One to serialise.

149

150

# forward sorted by key. In future we may consider topological sorting,

151

# at the cost of table scans for direct lookup, or a second index for

152

# direct lookup

153

nodes = sorted(self._nodes.items())

154

# if we do not prepass, we don't know how long it will be up front.

155

expected_bytes = None

156

# we only need to pre-pass if we have reference lists at all.

157

if self.reference_lists:

158

key_offset_info = []

159

non_ref_bytes = prefix_length

160

total_references = 0

161

# TODO use simple multiplication for the constants in this loop.

162

for key, (absent, references, value) in nodes:

163

# record the offset known *so far* for this key:

164

# the non reference bytes to date, and the total references to

165

# date - saves reaccumulating on the second pass

166

key_offset_info.append((key, non_ref_bytes, total_references))

167

# key is literal, value is literal, there are 3 null's, 1 NL

168

# key is variable length tuple, \x00 between elements

169

non_ref_bytes += sum(len(element) for element in key)

170

if self._key_length > 1:

171

non_ref_bytes += self._key_length - 1

172

# value is literal bytes, there are 3 null's, 1 NL.

173

non_ref_bytes += len(value) + 3 + 1

174

# one byte for absent if set.

175

if absent:

176

non_ref_bytes += 1

177

elif self.reference_lists:

178

# (ref_lists -1) tabs

179

non_ref_bytes += self.reference_lists - 1

180

# (ref-1 cr's per ref_list)

181

for ref_list in references:

182

# how many references across the whole file?

183

total_references += len(ref_list)

184

# accrue reference separators

185

if ref_list:

186

non_ref_bytes += len(ref_list) - 1

187

# how many digits are needed to represent the total byte count?

188

digits = 1

189

possible_total_bytes = non_ref_bytes + total_references*digits

190

while 10 ** digits < possible_total_bytes:

191

digits += 1

192

possible_total_bytes = non_ref_bytes + total_references*digits

193

expected_bytes = possible_total_bytes + 1 # terminating newline

194

# resolve key addresses.

195

key_addresses = {}

196

for key, non_ref_bytes, total_references in key_offset_info:

197

key_addresses[key] = non_ref_bytes + total_references*digits

198

# serialise

199

format_string = '%%0%sd' % digits

200

for key, (absent, references, value) in nodes:

201

flattened_references = []

202

for ref_list in references:

203

ref_addresses = []

204

for reference in ref_list:

205

ref_addresses.append(format_string % key_addresses[reference])

206

flattened_references.append('\r'.join(ref_addresses))

207

string_key = '\x00'.join(key)

208

lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,

209

'\t'.join(flattened_references), value))

210

lines.append('\n')

211

result = StringIO(''.join(lines))

212

if expected_bytes and len(result.getvalue()) != expected_bytes:

213

raise errors.BzrError('Failed index creation. Internal error:'

214

' mismatched output length and expected length: %d %d' %

215

(len(result.getvalue()), expected_bytes))

216

return StringIO(''.join(lines))

217

218

219

class GraphIndex(object):

220

"""An index for data with embedded graphs.

221

222

The index maps keys to a list of key reference lists, and a value.

223

Each node has the same number of key reference lists. Each key reference

224

list can be empty or an arbitrary length. The value is an opaque NULL

225

terminated string without any newlines. The storage of the index is

226

hidden in the interface: keys and key references are always tuples of

227

bytestrings, never the internal representation (e.g. dictionary offsets).

228

229

It is presumed that the index will not be mutated - it is static data.

230

231

Successive iter_all_entries calls will read the entire index each time.

232

Additionally, iter_entries calls will read the index linearly until the

233

desired keys are found. XXX: This must be fixed before the index is

234

suitable for production use. :XXX

235

"""

236

237

def __init__(self, transport, name, size):

238

"""Open an index called name on transport.

239

240

:param transport: A bzrlib.transport.Transport.

241

:param name: A path to provide to transport API calls.

242

:param size: The size of the index in bytes. This is used for bisection

243

logic to perform partial index reads. While the size could be

244

obtained by statting the file this introduced an additional round

245

trip as well as requiring stat'able transports, both of which are

246

avoided by having it supplied. If size is None, then bisection

247

support will be disabled and accessing the index will just stream

248

all the data.

249

"""

250

self._transport = transport

251

self._name = name

252

# Becomes a dict of key:(value, reference-list-byte-locations) used by

253

# the bisection interface to store parsed but not resolved keys.

254

self._bisect_nodes = None

255

# Becomes a dict of key:(value, reference-list-keys) which are ready to

256

# be returned directly to callers.

257

self._nodes = None

258

# a sorted list of slice-addresses for the parsed bytes of the file.

259

# e.g. (0,1) would mean that byte 0 is parsed.

260

self._parsed_byte_map = []

261

# a sorted list of keys matching each slice address for parsed bytes

262

# e.g. (None, 'foo@bar') would mean that the first byte contained no

263

# key, and the end byte of the slice is the of the data for 'foo@bar'

264

self._parsed_key_map = []

265

self._key_count = None

266

self._keys_by_offset = None

267

self._nodes_by_key = None

268

self._size = size

269

270

def _buffer_all(self):

271

"""Buffer all the index data.

272

273

Mutates self._nodes and self.keys_by_offset.

274

"""

275

if 'index' in debug.debug_flags:

276

mutter('Reading entire index %s', self._transport.abspath(self._name))

277

stream = self._transport.get(self._name)

278

self._read_prefix(stream)

279

self._expected_elements = 3 + self._key_length

280

line_count = 0

281

# raw data keyed by offset

282

self._keys_by_offset = {}

283

# ready-to-return key:value or key:value, node_ref_lists

284

self._nodes = {}

285

self._nodes_by_key = {}

286

trailers = 0

287

pos = stream.tell()

288

lines = stream.read().split('\n')

289

del lines[-1]

290

_, _, _, trailers = self._parse_lines(lines, pos)

291

for key, absent, references, value in self._keys_by_offset.itervalues():

292

if absent:

293

continue

294

# resolve references:

295

if self.node_ref_lists:

296

node_value = (value, self._resolve_references(references))

297

else:

298

node_value = value

299

self._nodes[key] = node_value

300

if self._key_length > 1:

301

subkey = list(reversed(key[:-1]))

302

key_dict = self._nodes_by_key

303

if self.node_ref_lists:

304

key_value = key, node_value[0], node_value[1]

305

else:

306

key_value = key, node_value

307

# possibly should do this on-demand, but it seems likely it is

308

# always wanted

309

# For a key of (foo, bar, baz) create

310

# _nodes_by_key[foo][bar][baz] = key_value

311

for subkey in key[:-1]:

312

key_dict = key_dict.setdefault(subkey, {})

313

key_dict[key[-1]] = key_value

314

# cache the keys for quick set intersections

315

self._keys = set(self._nodes)

316

if trailers != 1:

317

# there must be one line - the empty trailer line.

318

raise errors.BadIndexData(self)

319

320

def iter_all_entries(self):

321

"""Iterate over all keys within the index.

322

323

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

324

The former tuple is used when there are no reference lists in the

325

index, making the API compatible with simple key:value index types.

326

There is no defined order for the result iteration - it will be in

327

the most efficient order for the index.

328

"""

329

if 'evil' in debug.debug_flags:

330

trace.mutter_callsite(3,

331

"iter_all_entries scales with size of history.")

332

if self._nodes is None:

333

self._buffer_all()

334

if self.node_ref_lists:

335

for key, (value, node_ref_lists) in self._nodes.iteritems():

336

yield self, key, value, node_ref_lists

337

else:

338

for key, value in self._nodes.iteritems():

339

yield self, key, value

340

341

def _read_prefix(self, stream):

342

signature = stream.read(len(self._signature()))

343

if not signature == self._signature():

344

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

345

options_line = stream.readline()

346

if not options_line.startswith(_OPTION_NODE_REFS):

347

raise errors.BadIndexOptions(self)

348

try:

349

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])

350

except ValueError:

351

raise errors.BadIndexOptions(self)

352

options_line = stream.readline()

353

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

354

raise errors.BadIndexOptions(self)

355

try:

356

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])

357

except ValueError:

358

raise errors.BadIndexOptions(self)

359

options_line = stream.readline()

360

if not options_line.startswith(_OPTION_LEN):

361

raise errors.BadIndexOptions(self)

362

try:

363

self._key_count = int(options_line[len(_OPTION_LEN):-1])

364

except ValueError:

365

raise errors.BadIndexOptions(self)

366

367

def _resolve_references(self, references):

368

"""Return the resolved key references for references.

369

370

References are resolved by looking up the location of the key in the

371

_keys_by_offset map and substituting the key name, preserving ordering.

372

373

:param references: An iterable of iterables of key locations. e.g.

374

[[123, 456], [123]]

375

:return: A tuple of tuples of keys.

376

"""

377

node_refs = []

378

for ref_list in references:

379

node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))

380

return tuple(node_refs)

381

382

def _find_index(self, range_map, key):

383

"""Helper for the _parsed_*_index calls.

384

385

Given a range map - [(start, end), ...], finds the index of the range

386

in the map for key if it is in the map, and if it is not there, the

387

immediately preceeding range in the map.

388

"""

389

result = bisect_right(range_map, key) - 1

390

if result + 1 < len(range_map):

391

# check the border condition, it may be in result + 1

392

if range_map[result + 1][0] == key[0]:

393

return result + 1

394

return result

395

396

def _parsed_byte_index(self, offset):

397

"""Return the index of the entry immediately before offset.

398

399

e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that

400

there is one unparsed byte (the 11th, addressed as[10]). then:

401

asking for 0 will return 0

402

asking for 10 will return 0

403

asking for 11 will return 1

404

asking for 12 will return 1

405

"""

406

key = (offset, 0)

407

return self._find_index(self._parsed_byte_map, key)

408

409

def _parsed_key_index(self, key):

410

"""Return the index of the entry immediately before key.

411

412

e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,

413

meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive

414

have been parsed, then:

415

asking for '' will return 0

416

asking for 'a' will return 0

417

asking for 'b' will return 1

418

asking for 'e' will return 1

419

"""

420

search_key = (key, None)

421

return self._find_index(self._parsed_key_map, search_key)

422

423

def _is_parsed(self, offset):

424

"""Returns True if offset has been parsed."""

425

index = self._parsed_byte_index(offset)

426

if index == len(self._parsed_byte_map):

427

return offset < self._parsed_byte_map[index - 1][1]

428

start, end = self._parsed_byte_map[index]

429

return offset >= start and offset < end

430

431

def _iter_entries_from_total_buffer(self, keys):

432

"""Iterate over keys when the entire index is parsed."""

433

keys = keys.intersection(self._keys)

434

if self.node_ref_lists:

435

for key in keys:

436

value, node_refs = self._nodes[key]

437

yield self, key, value, node_refs

438

else:

439

for key in keys:

440

yield self, key, self._nodes[key]

441

442

def iter_entries(self, keys):

443

"""Iterate over keys within the index.

444

445

:param keys: An iterable providing the keys to be retrieved.

446

:return: An iterable as per iter_all_entries, but restricted to the

447

keys supplied. No additional keys will be returned, and every

448

key supplied that is in the index will be returned.

449

"""

450

# PERFORMANCE TODO: parse and bisect all remaining data at some

451

# threshold of total-index processing/get calling layers that expect to

452

# read the entire index to use the iter_all_entries method instead.

453

keys = set(keys)

454

if not keys:

455

return []

456

if self._size is None and self._nodes is None:

457

self._buffer_all()

458

if self._nodes is not None:

459

return self._iter_entries_from_total_buffer(keys)

460

else:

461

return (result[1] for result in bisect_multi_bytes(

462

self._lookup_keys_via_location, self._size, keys))

463

464

def iter_entries_prefix(self, keys):

465

"""Iterate over keys within the index using prefix matching.

466

467

Prefix matching is applied within the tuple of a key, not to within

468

the bytestring of each key element. e.g. if you have the keys ('foo',

469

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

470

only the former key is returned.

471

472

WARNING: Note that this method currently causes a full index parse

473

unconditionally (which is reasonably appropriate as it is a means for

474

thunking many small indices into one larger one and still supplies

475

iter_all_entries at the thunk layer).

476

477

:param keys: An iterable providing the key prefixes to be retrieved.

478

Each key prefix takes the form of a tuple the length of a key, but

479

with the last N elements 'None' rather than a regular bytestring.

480

The first element cannot be 'None'.

481

:return: An iterable as per iter_all_entries, but restricted to the

482

keys with a matching prefix to those supplied. No additional keys

483

will be returned, and every match that is in the index will be

484

returned.

485

"""

486

keys = set(keys)

487

if not keys:

488

return

489

# load data - also finds key lengths

490

if self._nodes is None:

491

self._buffer_all()

492

if self._key_length == 1:

493

for key in keys:

494

# sanity check

495

if key[0] is None:

496

raise errors.BadIndexKey(key)

497

if len(key) != self._key_length:

498

raise errors.BadIndexKey(key)

499

if self.node_ref_lists:

500

value, node_refs = self._nodes[key]

501

yield self, key, value, node_refs

502

else:

503

yield self, key, self._nodes[key]

504

return

505

for key in keys:

506

# sanity check

507

if key[0] is None:

508

raise errors.BadIndexKey(key)

509

if len(key) != self._key_length:

510

raise errors.BadIndexKey(key)

511

# find what it refers to:

512

key_dict = self._nodes_by_key

513

elements = list(key)

514

# find the subdict whose contents should be returned.

515

try:

516

while len(elements) and elements[0] is not None:

517

key_dict = key_dict[elements[0]]

518

elements.pop(0)

519

except KeyError:

520

# a non-existant lookup.

521

continue

522

if len(elements):

523

dicts = [key_dict]

524

while dicts:

525

key_dict = dicts.pop(-1)

526

# can't be empty or would not exist

527

item, value = key_dict.iteritems().next()

528

if type(value) == dict:

529

# push keys

530

dicts.extend(key_dict.itervalues())

531

else:

532

# yield keys

533

for value in key_dict.itervalues():

534

# each value is the key:value:node refs tuple

535

# ready to yield.

536

yield (self, ) + value

537

else:

538

# the last thing looked up was a terminal element

539

yield (self, ) + key_dict

540

541

def key_count(self):

542

"""Return an estimate of the number of keys in this index.

543

544

For GraphIndex the estimate is exact.

545

"""

546

if self._key_count is None:

547

# really this should just read the prefix

548

self._buffer_all()

549

return self._key_count

550

551

def _lookup_keys_via_location(self, location_keys):

552

"""Public interface for implementing bisection.

553

554

If _buffer_all has been called, then all the data for the index is in

555

memory, and this method should not be called, as it uses a separate

556

cache because it cannot pre-resolve all indices, which buffer_all does

557

for performance.

558

559

:param location_keys: A list of location(byte offset), key tuples.

560

:return: A list of (location_key, result) tuples as expected by

561

bzrlib.bisect_multi.bisect_multi_bytes.

562

"""

563

# Possible improvements:

564

# - only bisect lookup each key once

565

# - sort the keys first, and use that to reduce the bisection window

566

# -----

567

# this progresses in three parts:

568

# read data

569

# parse it

570

# attempt to answer the question from the now in memory data.

571

# build the readv request

572

# for each location, ask for 800 bytes - much more than rows we've seen

573

# anywhere.

574

readv_ranges = []

575

for location, key in location_keys:

576

# can we answer from cache?

577

if self._bisect_nodes and key in self._bisect_nodes:

578

# We have the key parsed.

579

continue

580

index = self._parsed_key_index(key)

581

if (len(self._parsed_key_map) and

582

self._parsed_key_map[index][0] <= key and

583

(self._parsed_key_map[index][1] >= key or

584

# end of the file has been parsed

585

self._parsed_byte_map[index][1] == self._size)):

586

# the key has been parsed, so no lookup is needed even if its

587

# not present.

588

continue

589

# - if we have examined this part of the file already - yes

590

index = self._parsed_byte_index(location)

591

if (len(self._parsed_byte_map) and

592

self._parsed_byte_map[index][0] <= location and

593

self._parsed_byte_map[index][1] > location):

594

# the byte region has been parsed, so no read is needed.

595

continue

596

length = 800

597

if location + length > self._size:

598

length = self._size - location

599

# todo, trim out parsed locations.

600

if length > 0:

601

readv_ranges.append((location, length))

602

# read the header if needed

603

if self._bisect_nodes is None:

604

readv_ranges.append((0, 200))

605

self._read_and_parse(readv_ranges)

606

# generate results:

607

# - figure out <, >, missing, present

608

# - result present references so we can return them.

609

result = []

610

# keys that we cannot answer until we resolve references

611

pending_references = []

612

pending_locations = set()

613

for location, key in location_keys:

614

# can we answer from cache?

615

if key in self._bisect_nodes:

616

# the key has been parsed, so no lookup is needed

617

if self.node_ref_lists:

618

# the references may not have been all parsed.

619

value, refs = self._bisect_nodes[key]

620

wanted_locations = []

621

for ref_list in refs:

622

for ref in ref_list:

623

if ref not in self._keys_by_offset:

624

wanted_locations.append(ref)

625

if wanted_locations:

626

pending_locations.update(wanted_locations)

627

pending_references.append((location, key))

628

continue

629

result.append(((location, key), (self, key,

630

value, self._resolve_references(refs))))

631

else:

632

result.append(((location, key),

633

(self, key, self._bisect_nodes[key])))

634

continue

635

else:

636

# has the region the key should be in, been parsed?

637

index = self._parsed_key_index(key)

638

if (self._parsed_key_map[index][0] <= key and

639

(self._parsed_key_map[index][1] >= key or

640

# end of the file has been parsed

641

self._parsed_byte_map[index][1] == self._size)):

642

result.append(((location, key), False))

643

continue

644

# no, is the key above or below the probed location:

645

# get the range of the probed & parsed location

646

index = self._parsed_byte_index(location)

647

# if the key is below the start of the range, its below

648

if key < self._parsed_key_map[index][0]:

649

direction = -1

650

else:

651

direction = +1

652

result.append(((location, key), direction))

653

readv_ranges = []

654

# lookup data to resolve references

655

for location in pending_locations:

656

length = 800

657

if location + length > self._size:

658

length = self._size - location

659

# TODO: trim out parsed locations (e.g. if the 800 is into the

660

# parsed region trim it, and dont use the adjust_for_latency

661

# facility)

662

if length > 0:

663

readv_ranges.append((location, length))

664

self._read_and_parse(readv_ranges)

665

for location, key in pending_references:

666

# answer key references we had to look-up-late.

667

index = self._parsed_key_index(key)

668

value, refs = self._bisect_nodes[key]

669

result.append(((location, key), (self, key,

670

value, self._resolve_references(refs))))

671

return result

672

673

def _parse_header_from_bytes(self, bytes):

674

"""Parse the header from a region of bytes.

675

676

:param bytes: The data to parse.

677

:return: An offset, data tuple such as readv yields, for the unparsed

678

data. (which may length 0).

679

"""

680

signature = bytes[0:len(self._signature())]

681

if not signature == self._signature():

682

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

683

lines = bytes[len(self._signature()):].splitlines()

684

options_line = lines[0]

685

if not options_line.startswith(_OPTION_NODE_REFS):

686

raise errors.BadIndexOptions(self)

687

try:

688

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

689

except ValueError:

690

raise errors.BadIndexOptions(self)

691

options_line = lines[1]

692

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

693

raise errors.BadIndexOptions(self)

694

try:

695

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

696

except ValueError:

697

raise errors.BadIndexOptions(self)

698

options_line = lines[2]

699

if not options_line.startswith(_OPTION_LEN):

700

raise errors.BadIndexOptions(self)

701

try:

702

self._key_count = int(options_line[len(_OPTION_LEN):])

703

except ValueError:

704

raise errors.BadIndexOptions(self)

705

# calculate the bytes we have processed

706

header_end = (len(signature) + len(lines[0]) + len(lines[1]) +

707

len(lines[2]) + 3)

708

self._parsed_bytes(0, None, header_end, None)

709

# setup parsing state

710

self._expected_elements = 3 + self._key_length

711

# raw data keyed by offset

712

self._keys_by_offset = {}

713

# keys with the value and node references

714

self._bisect_nodes = {}

715

return header_end, bytes[header_end:]

716

717

def _parse_region(self, offset, data):

718

"""Parse node data returned from a readv operation.

719

720

:param offset: The byte offset the data starts at.

721

:param data: The data to parse.

722

"""

723

# trim the data.

724

# end first:

725

end = offset + len(data)

726

high_parsed = offset

727

while True:

728

# Trivial test - if the current index's end is within the

729

# low-matching parsed range, we're done.

730

index = self._parsed_byte_index(high_parsed)

731

if end < self._parsed_byte_map[index][1]:

732

return

733

# print "[%d:%d]" % (offset, end), \

734

# self._parsed_byte_map[index:index + 2]

735

high_parsed, last_segment = self._parse_segment(

736

offset, data, end, index)

737

if last_segment:

738

return

739

740

def _parse_segment(self, offset, data, end, index):

741

"""Parse one segment of data.

742

743

:param offset: Where 'data' begins in the file.

744

:param data: Some data to parse a segment of.

745

:param end: Where data ends

746

:param index: The current index into the parsed bytes map.

747

:return: True if the parsed segment is the last possible one in the

748

range of data.

749

:return: high_parsed_byte, last_segment.

750

high_parsed_byte is the location of the highest parsed byte in this

751

segment, last_segment is True if the parsed segment is the last

752

possible one in the data block.

753

"""

754

# default is to use all data

755

trim_end = None

756

# accomodate overlap with data before this.

757

if offset < self._parsed_byte_map[index][1]:

758

# overlaps the lower parsed region

759

# skip the parsed data

760

trim_start = self._parsed_byte_map[index][1] - offset

761

# don't trim the start for \n

762

start_adjacent = True

763

elif offset == self._parsed_byte_map[index][1]:

764

# abuts the lower parsed region

765

# use all data

766

trim_start = None

767

# do not trim anything

768

start_adjacent = True

769

else:

770

# does not overlap the lower parsed region

771

# use all data

772

trim_start = None

773

# but trim the leading \n

774

start_adjacent = False

775

if end == self._size:

776

# lines up to the end of all data:

777

# use it all

778

trim_end = None

779

# do not strip to the last \n

780

end_adjacent = True

781

last_segment = True

782

elif index + 1 == len(self._parsed_byte_map):

783

# at the end of the parsed data

784

# use it all

785

trim_end = None

786

# but strip to the last \n

787

end_adjacent = False

788

last_segment = True

789

elif end == self._parsed_byte_map[index + 1][0]:

790

# buts up against the next parsed region

791

# use it all

792

trim_end = None

793

# do not strip to the last \n

794

end_adjacent = True

795

last_segment = True

796

elif end > self._parsed_byte_map[index + 1][0]:

797

# overlaps into the next parsed region

798

# only consider the unparsed data

799

trim_end = self._parsed_byte_map[index + 1][0] - offset

800

# do not strip to the last \n as we know its an entire record

801

end_adjacent = True

802

last_segment = end < self._parsed_byte_map[index + 1][1]

803

else:

804

# does not overlap into the next region

805

# use it all

806

trim_end = None

807

# but strip to the last \n

808

end_adjacent = False

809

last_segment = True

810

# now find bytes to discard if needed

811

if not start_adjacent:

812

# work around python bug in rfind

813

if trim_start is None:

814

trim_start = data.find('\n') + 1

815

else:

816

trim_start = data.find('\n', trim_start) + 1

817

assert trim_start != 0, 'no \n was present'

818

# print 'removing start', offset, trim_start, repr(data[:trim_start])

819

if not end_adjacent:

820

# work around python bug in rfind

821

if trim_end is None:

822

trim_end = data.rfind('\n') + 1

823

else:

824

trim_end = data.rfind('\n', None, trim_end) + 1

825

assert trim_end != 0, 'no \n was present'

826

# print 'removing end', offset, trim_end, repr(data[trim_end:])

827

# adjust offset and data to the parseable data.

828

trimmed_data = data[trim_start:trim_end]

829

assert trimmed_data, 'read unneeded data [%d:%d] from [%d:%d]' % (

830

trim_start, trim_end, offset, offset + len(data))

831

if trim_start:

832

offset += trim_start

833

# print "parsing", repr(trimmed_data)

834

# splitlines mangles the \r delimiters.. don't use it.

835

lines = trimmed_data.split('\n')

836

del lines[-1]

837

pos = offset

838

first_key, last_key, nodes, _ = self._parse_lines(lines, pos)

839

for key, value in nodes:

840

self._bisect_nodes[key] = value

841

self._parsed_bytes(offset, first_key,

842

offset + len(trimmed_data), last_key)

843

return offset + len(trimmed_data), last_segment

844

845

def _parse_lines(self, lines, pos):

846

key = None

847

first_key = None

848

trailers = 0

849

nodes = []

850

for line in lines:

851

if line == '':

852

# must be at the end

853

if self._size:

854

assert self._size == pos + 1, "%s %s" % (self._size, pos)

855

trailers += 1

856

continue

857

elements = line.split('\0')

858

if len(elements) != self._expected_elements:

859

raise errors.BadIndexData(self)

860

# keys are tuples

861

key = tuple(elements[:self._key_length])

862

if first_key is None:

863

first_key = key

864

absent, references, value = elements[-3:]

865

ref_lists = []

866

for ref_string in references.split('\t'):

867

ref_lists.append(tuple([

868

int(ref) for ref in ref_string.split('\r') if ref

869

]))

870

ref_lists = tuple(ref_lists)

871

self._keys_by_offset[pos] = (key, absent, ref_lists, value)

872

pos += len(line) + 1 # +1 for the \n

873

if absent:

874

continue

875

if self.node_ref_lists:

876

node_value = (value, ref_lists)

877

else:

878

node_value = value

879

nodes.append((key, node_value))

880

# print "parsed ", key

881

return first_key, key, nodes, trailers

882

883

def _parsed_bytes(self, start, start_key, end, end_key):

884

"""Mark the bytes from start to end as parsed.

885

886

Calling self._parsed_bytes(1,2) will mark one byte (the one at offset

887

1) as parsed.

888

889

:param start: The start of the parsed region.

890

:param end: The end of the parsed region.

891

"""

892

index = self._parsed_byte_index(start)

893

new_value = (start, end)

894

new_key = (start_key, end_key)

895

if index == -1:

896

# first range parsed is always the beginning.

897

self._parsed_byte_map.insert(index, new_value)

898

self._parsed_key_map.insert(index, new_key)

899

return

900

# four cases:

901

# new region

902

# extend lower region

903

# extend higher region

904

# combine two regions

905

if (index + 1 < len(self._parsed_byte_map) and

906

self._parsed_byte_map[index][1] == start and

907

self._parsed_byte_map[index + 1][0] == end):

908

# combine two regions

909

self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],

910

self._parsed_byte_map[index + 1][1])

911

self._parsed_key_map[index] = (self._parsed_key_map[index][0],

912

self._parsed_key_map[index + 1][1])

913

del self._parsed_byte_map[index + 1]

914

del self._parsed_key_map[index + 1]

915

elif self._parsed_byte_map[index][1] == start:

916

# extend the lower entry

917

self._parsed_byte_map[index] = (

918

self._parsed_byte_map[index][0], end)

919

self._parsed_key_map[index] = (

920

self._parsed_key_map[index][0], end_key)

921

elif (index + 1 < len(self._parsed_byte_map) and

922

self._parsed_byte_map[index + 1][0] == end):

923

# extend the higher entry

924

self._parsed_byte_map[index + 1] = (

925

start, self._parsed_byte_map[index + 1][1])

926

self._parsed_key_map[index + 1] = (

927

start_key, self._parsed_key_map[index + 1][1])

928

else:

929

# new entry

930

self._parsed_byte_map.insert(index + 1, new_value)

931

self._parsed_key_map.insert(index + 1, new_key)

932

933

def _read_and_parse(self, readv_ranges):

934

"""Read the the ranges and parse the resulting data.

935

936

:param readv_ranges: A prepared readv range list.

937

"""

938

if readv_ranges:

939

readv_data = self._transport.readv(self._name, readv_ranges, True,

940

self._size)

941

# parse

942

for offset, data in readv_data:

943

if self._bisect_nodes is None:

944

# this must be the start

945

assert offset == 0

946

offset, data = self._parse_header_from_bytes(data)

947

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

948

self._parse_region(offset, data)

949

950

def _signature(self):

951

"""The file signature for this index type."""

952

return _SIGNATURE

953

954

def validate(self):

955

"""Validate that everything in the index can be accessed."""

956

# iter_all validates completely at the moment, so just do that.

957

for node in self.iter_all_entries():

958

pass

959

960

961

class CombinedGraphIndex(object):

962

"""A GraphIndex made up from smaller GraphIndices.

963

964

The backing indices must implement GraphIndex, and are presumed to be

965

static data.

966

967

Queries against the combined index will be made against the first index,

968

and then the second and so on. The order of index's can thus influence

969

performance significantly. For example, if one index is on local disk and a

970

second on a remote server, the local disk index should be before the other

971

in the index list.

972

"""

973

974

def __init__(self, indices):

975

"""Create a CombinedGraphIndex backed by indices.

976

977

:param indices: An ordered list of indices to query for data.

978

"""

979

self._indices = indices

980

981

def __repr__(self):

982

return "%s(%s)" % (

983

self.__class__.__name__,

984

', '.join(map(repr, self._indices)))

985

986

def insert_index(self, pos, index):

987

"""Insert a new index in the list of indices to query.

988

989

:param pos: The position to insert the index.

990

:param index: The index to insert.

991

"""

992

self._indices.insert(pos, index)

993

994

def iter_all_entries(self):

995

"""Iterate over all keys within the index

996

997

Duplicate keys across child indices are presumed to have the same

998

value and are only reported once.

999

1000

:return: An iterable of (index, key, reference_lists, value).

1001

There is no defined order for the result iteration - it will be in

1002

the most efficient order for the index.

1003

"""

1004

seen_keys = set()

1005

for index in self._indices:

1006

for node in index.iter_all_entries():

1007

if node[1] not in seen_keys:

1008

yield node

1009

seen_keys.add(node[1])

1010

1011

def iter_entries(self, keys):

1012

"""Iterate over keys within the index.

1013

1014

Duplicate keys across child indices are presumed to have the same

1015

value and are only reported once.

1016

1017

:param keys: An iterable providing the keys to be retrieved.

1018

:return: An iterable of (index, key, reference_lists, value). There is no

1019

defined order for the result iteration - it will be in the most

1020

efficient order for the index.

1021

"""

1022

keys = set(keys)

1023

for index in self._indices:

1024

if not keys:

1025

return

1026

for node in index.iter_entries(keys):

1027

keys.remove(node[1])

1028

yield node

1029

1030

def iter_entries_prefix(self, keys):

1031

"""Iterate over keys within the index using prefix matching.

1032

1033

Duplicate keys across child indices are presumed to have the same

1034

value and are only reported once.

1035

1036

Prefix matching is applied within the tuple of a key, not to within

1037

the bytestring of each key element. e.g. if you have the keys ('foo',

1038

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1039

only the former key is returned.

1040

1041

:param keys: An iterable providing the key prefixes to be retrieved.

1042

Each key prefix takes the form of a tuple the length of a key, but

1043

with the last N elements 'None' rather than a regular bytestring.

1044

The first element cannot be 'None'.

1045

:return: An iterable as per iter_all_entries, but restricted to the

1046

keys with a matching prefix to those supplied. No additional keys

1047

will be returned, and every match that is in the index will be

1048

returned.

1049

"""

1050

keys = set(keys)

1051

if not keys:

1052

return

1053

seen_keys = set()

1054

for index in self._indices:

1055

for node in index.iter_entries_prefix(keys):

1056

if node[1] in seen_keys:

1057

continue

1058

seen_keys.add(node[1])

1059

yield node

1060

1061

def key_count(self):

1062

"""Return an estimate of the number of keys in this index.

1063

1064

For CombinedGraphIndex this is approximated by the sum of the keys of

1065

the child indices. As child indices may have duplicate keys this can

1066

have a maximum error of the number of child indices * largest number of

1067

keys in any index.

1068

"""

1069

return sum((index.key_count() for index in self._indices), 0)

1070

1071

def validate(self):

1072

"""Validate that everything in the index can be accessed."""

1073

for index in self._indices:

1074

index.validate()

1075

1076

1077

class InMemoryGraphIndex(GraphIndexBuilder):

1078

"""A GraphIndex which operates entirely out of memory and is mutable.

1079

1080

This is designed to allow the accumulation of GraphIndex entries during a

1081

single write operation, where the accumulated entries need to be immediately

1082

available - for example via a CombinedGraphIndex.

1083

"""

1084

1085

def add_nodes(self, nodes):

1086

"""Add nodes to the index.

1087

1088

:param nodes: An iterable of (key, node_refs, value) entries to add.

1089

"""

1090

if self.reference_lists:

1091

for (key, value, node_refs) in nodes:

1092

self.add_node(key, value, node_refs)

1093

else:

1094

for (key, value) in nodes:

1095

self.add_node(key, value)

1096

1097

def iter_all_entries(self):

1098

"""Iterate over all keys within the index

1099

1100

:return: An iterable of (index, key, reference_lists, value). There is no

1101

defined order for the result iteration - it will be in the most

1102

efficient order for the index (in this case dictionary hash order).

1103

"""

1104

if 'evil' in debug.debug_flags:

1105

trace.mutter_callsite(3,

1106

"iter_all_entries scales with size of history.")

1107

if self.reference_lists:

1108

for key, (absent, references, value) in self._nodes.iteritems():

1109

if not absent:

1110

yield self, key, value, references

1111

else:

1112

for key, (absent, references, value) in self._nodes.iteritems():

1113

if not absent:

1114

yield self, key, value

1115

1116

def iter_entries(self, keys):

1117

"""Iterate over keys within the index.

1118

1119

:param keys: An iterable providing the keys to be retrieved.

1120

:return: An iterable of (index, key, reference_lists, value). There is no

1121

defined order for the result iteration - it will be in the most

1122

efficient order for the index (keys iteration order in this case).

1123

"""

1124

keys = set(keys)

1125

if self.reference_lists:

1126

for key in keys.intersection(self._keys):

1127

node = self._nodes[key]

1128

if not node[0]:

1129

yield self, key, node[2], node[1]

1130

else:

1131

for key in keys.intersection(self._keys):

1132

node = self._nodes[key]

1133

if not node[0]:

1134

yield self, key, node[2]

1135

1136

def iter_entries_prefix(self, keys):

1137

"""Iterate over keys within the index using prefix matching.

1138

1139

Prefix matching is applied within the tuple of a key, not to within

1140

the bytestring of each key element. e.g. if you have the keys ('foo',

1141

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1142

only the former key is returned.

1143

1144

:param keys: An iterable providing the key prefixes to be retrieved.

1145

Each key prefix takes the form of a tuple the length of a key, but

1146

with the last N elements 'None' rather than a regular bytestring.

1147

The first element cannot be 'None'.

1148

:return: An iterable as per iter_all_entries, but restricted to the

1149

keys with a matching prefix to those supplied. No additional keys

1150

will be returned, and every match that is in the index will be

1151

returned.

1152

"""

1153

# XXX: To much duplication with the GraphIndex class; consider finding

1154

# a good place to pull out the actual common logic.

1155

keys = set(keys)

1156

if not keys:

1157

return

1158

if self._key_length == 1:

1159

for key in keys:

1160

# sanity check

1161

if key[0] is None:

1162

raise errors.BadIndexKey(key)

1163

if len(key) != self._key_length:

1164

raise errors.BadIndexKey(key)

1165

node = self._nodes[key]

1166

if node[0]:

1167

continue

1168

if self.reference_lists:

1169

yield self, key, node[2], node[1]

1170

else:

1171

yield self, key, node[2]

1172

return

1173

for key in keys:

1174

# sanity check

1175

if key[0] is None:

1176

raise errors.BadIndexKey(key)

1177

if len(key) != self._key_length:

1178

raise errors.BadIndexKey(key)

1179

# find what it refers to:

1180

key_dict = self._nodes_by_key

1181

elements = list(key)

1182

# find the subdict to return

1183

try:

1184

while len(elements) and elements[0] is not None:

1185

key_dict = key_dict[elements[0]]

1186

elements.pop(0)

1187

except KeyError:

1188

# a non-existant lookup.

1189

continue

1190

if len(elements):

1191

dicts = [key_dict]

1192

while dicts:

1193

key_dict = dicts.pop(-1)

1194

# can't be empty or would not exist

1195

item, value = key_dict.iteritems().next()

1196

if type(value) == dict:

1197

# push keys

1198

dicts.extend(key_dict.itervalues())

1199

else:

1200

# yield keys

1201

for value in key_dict.itervalues():

1202

yield (self, ) + value

1203

else:

1204

yield (self, ) + key_dict

1205

1206

def key_count(self):

1207

"""Return an estimate of the number of keys in this index.

1208

1209

For InMemoryGraphIndex the estimate is exact.

1210

"""

1211

return len(self._keys)

1212

1213

def validate(self):

1214

"""In memory index's have no known corruption at the moment."""

1215

1216

1217

class GraphIndexPrefixAdapter(object):

1218

"""An adapter between GraphIndex with different key lengths.

1219

1220

Queries against this will emit queries against the adapted Graph with the

1221

prefix added, queries for all items use iter_entries_prefix. The returned

1222

nodes will have their keys and node references adjusted to remove the

1223

prefix. Finally, an add_nodes_callback can be supplied - when called the

1224

nodes and references being added will have prefix prepended.

1225

"""

1226

1227

def __init__(self, adapted, prefix, missing_key_length,

1228

add_nodes_callback=None):

1229

"""Construct an adapter against adapted with prefix."""

1230

self.adapted = adapted

1231

self.prefix_key = prefix + (None,)*missing_key_length

1232

self.prefix = prefix

1233

self.prefix_len = len(prefix)

1234

self.add_nodes_callback = add_nodes_callback

1235

1236

def add_nodes(self, nodes):

1237

"""Add nodes to the index.

1238

1239

:param nodes: An iterable of (key, node_refs, value) entries to add.

1240

"""

1241

# save nodes in case its an iterator

1242

nodes = tuple(nodes)

1243

translated_nodes = []

1244

try:

1245

# Add prefix_key to each reference node_refs is a tuple of tuples,

1246

# so split it apart, and add prefix_key to the internal reference

1247

for (key, value, node_refs) in nodes:

1248

adjusted_references = (

1249

tuple(tuple(self.prefix + ref_node for ref_node in ref_list)

1250

for ref_list in node_refs))

1251

translated_nodes.append((self.prefix + key, value,

1252

adjusted_references))

1253

except ValueError:

1254

# XXX: TODO add an explicit interface for getting the reference list

1255

# status, to handle this bit of user-friendliness in the API more

1256

# explicitly.

1257

for (key, value) in nodes:

1258

translated_nodes.append((self.prefix + key, value))

1259

self.add_nodes_callback(translated_nodes)

1260

1261

def add_node(self, key, value, references=()):

1262

"""Add a node to the index.

1263

1264

:param key: The key. keys are non-empty tuples containing

1265

as many whitespace-free utf8 bytestrings as the key length

1266

defined for this index.

1267

:param references: An iterable of iterables of keys. Each is a

1268

reference to another key.

1269

:param value: The value to associate with the key. It may be any

1270

bytes as long as it does not contain \0 or \n.

1271

"""

1272

self.add_nodes(((key, value, references), ))

1273

1274

def _strip_prefix(self, an_iter):

1275

"""Strip prefix data from nodes and return it."""

1276

for node in an_iter:

1277

# cross checks

1278

if node[1][:self.prefix_len] != self.prefix:

1279

raise errors.BadIndexData(self)

1280

for ref_list in node[3]:

1281

for ref_node in ref_list:

1282

if ref_node[:self.prefix_len] != self.prefix:

1283

raise errors.BadIndexData(self)

1284

yield node[0], node[1][self.prefix_len:], node[2], (

1285

tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)

1286

for ref_list in node[3]))

1287

1288

def iter_all_entries(self):

1289

"""Iterate over all keys within the index

1290

1291

iter_all_entries is implemented against the adapted index using

1292

iter_entries_prefix.

1293

1294

:return: An iterable of (index, key, reference_lists, value). There is no

1295

defined order for the result iteration - it will be in the most

1296

efficient order for the index (in this case dictionary hash order).

1297

"""

1298

return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))

1299

1300

def iter_entries(self, keys):

1301

"""Iterate over keys within the index.

1302

1303

:param keys: An iterable providing the keys to be retrieved.

1304

:return: An iterable of (key, reference_lists, value). There is no

1305

defined order for the result iteration - it will be in the most

1306

efficient order for the index (keys iteration order in this case).

1307

"""

1308

return self._strip_prefix(self.adapted.iter_entries(

1309

self.prefix + key for key in keys))

1310

1311

def iter_entries_prefix(self, keys):

1312

"""Iterate over keys within the index using prefix matching.

1313

1314

Prefix matching is applied within the tuple of a key, not to within

1315

the bytestring of each key element. e.g. if you have the keys ('foo',

1316

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1317

only the former key is returned.

1318

1319

:param keys: An iterable providing the key prefixes to be retrieved.

1320

Each key prefix takes the form of a tuple the length of a key, but

1321

with the last N elements 'None' rather than a regular bytestring.

1322

The first element cannot be 'None'.

1323

:return: An iterable as per iter_all_entries, but restricted to the

1324

keys with a matching prefix to those supplied. No additional keys

1325

will be returned, and every match that is in the index will be

1326

returned.

1327

"""

1328

return self._strip_prefix(self.adapted.iter_entries_prefix(

1329

self.prefix + key for key in keys))

1330

1331

def key_count(self):

1332

"""Return an estimate of the number of keys in this index.

1333

1334

For GraphIndexPrefixAdapter this is relatively expensive - key

1335

iteration with the prefix is done.

1336

"""

1337

return len(list(self.iter_all_entries()))

1338

1339

def validate(self):

1340

"""Call the adapted's validate."""

1341

self.adapted.validate()

Older »