/brz/remove-bazaar : revision 4197

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

Committer: Canonical.com Patch Queue Manager
Date: 2009-03-24 17:01:50 UTC
mfrom: (4178.3.7 lru_cache_linked_lst)
Revision ID: pqm@pqm.ubuntu.com-20090324170150-9wtdpv5w7192zdwy

(jam) Improvements to LRUCache structure, use a double-linked-list

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_guess_renames.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/check-newsbugs.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""B+Tree indices"""

import array

import bisect

from bisect import bisect_right

from copy import deepcopy

import math

import struct

import tempfile

import zlib

from bzrlib import (

chunk_writer,

debug,

errors,

index,

lru_cache,

osutils,

trace,

)

from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN

from bzrlib.transport import get_transport

_BTSIGNATURE = "B+Tree Graph Index 2\n"

_OPTION_ROW_LENGTHS = "row_lengths="

_LEAF_FLAG = "type=leaf\n"

_INTERNAL_FLAG = "type=internal\n"

_INTERNAL_OFFSET = "offset="

_RESERVED_HEADER_BYTES = 120

_PAGE_SIZE = 4096

# 4K per page: 4MB - 1000 entries

_NODE_CACHE_SIZE = 1000

class _BuilderRow(object):

"""The stored state accumulated while writing out a row in the index.

:ivar spool: A temporary file used to accumulate nodes for this row

in the tree.

:ivar nodes: The count of nodes emitted so far.

"""

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

skipped_bytes = padding

self.spool.writelines(byte_lines)

remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE

if remainder != 0:

raise AssertionError("incorrect node length: %d, %d"

% (self.spool.tell(), remainder))

self.nodes += 1

self.writer = None

class _InternalBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out internal rows."""

def finish_node(self, pad=True):

if not pad:

raise AssertionError("Must pad internal nodes only.")

_BuilderRow.finish_node(self)

class _LeafBuilderRow(_BuilderRow):

"""The stored state accumulated while writing out a leaf rows."""

100

class BTreeBuilder(index.GraphIndexBuilder):

101

"""A Builder for B+Tree based Graph indices.

102

103

The resulting graph has the structure:

104

105

_SIGNATURE OPTIONS NODES

106

_SIGNATURE := 'B+Tree Graph Index 1' NEWLINE

107

OPTIONS := REF_LISTS KEY_ELEMENTS LENGTH

108

REF_LISTS := 'node_ref_lists=' DIGITS NEWLINE

109

KEY_ELEMENTS := 'key_elements=' DIGITS NEWLINE

110

LENGTH := 'len=' DIGITS NEWLINE

111

ROW_LENGTHS := 'row_lengths' DIGITS (COMMA DIGITS)*

112

NODES := NODE_COMPRESSED*

113

NODE_COMPRESSED:= COMPRESSED_BYTES{4096}

114

NODE_RAW := INTERNAL | LEAF

115

INTERNAL := INTERNAL_FLAG POINTERS

116

LEAF := LEAF_FLAG ROWS

117

KEY_ELEMENT := Not-whitespace-utf8

118

KEY := KEY_ELEMENT (NULL KEY_ELEMENT)*

119

ROWS := ROW*

120

ROW := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

121

ABSENT := 'a'

122

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

123

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

124

REFERENCE := KEY

125

VALUE := no-newline-no-null-bytes

126

"""

127

128

def __init__(self, reference_lists=0, key_elements=1, spill_at=100000):

129

"""See GraphIndexBuilder.__init__.

130

131

:param spill_at: Optional parameter controlling the maximum number

132

of nodes that BTreeBuilder will hold in memory.

133

"""

134

index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,

135

key_elements=key_elements)

136

self._spill_at = spill_at

137

self._backing_indices = []

138

# A map of {key: (node_refs, value)}

139

self._nodes = {}

140

# Indicate it hasn't been built yet

141

self._nodes_by_key = None

142

self._optimize_for_size = False

143

144

def add_node(self, key, value, references=()):

145

"""Add a node to the index.

146

147

If adding the node causes the builder to reach its spill_at threshold,

148

disk spilling will be triggered.

149

150

:param key: The key. keys are non-empty tuples containing

151

as many whitespace-free utf8 bytestrings as the key length

152

defined for this index.

153

:param references: An iterable of iterables of keys. Each is a

154

reference to another key.

155

:param value: The value to associate with the key. It may be any

156

bytes as long as it does not contain \0 or \n.

157

"""

158

# we don't care about absent_references

159

node_refs, _ = self._check_key_ref_value(key, references, value)

160

if key in self._nodes:

161

raise errors.BadIndexDuplicateKey(key, self)

162

self._nodes[key] = (node_refs, value)

163

self._keys.add(key)

164

if self._nodes_by_key is not None and self._key_length > 1:

165

self._update_nodes_by_key(key, value, node_refs)

166

if len(self._keys) < self._spill_at:

167

return

168

self._spill_mem_keys_to_disk()

169

170

def _spill_mem_keys_to_disk(self):

171

"""Write the in memory keys down to disk to cap memory consumption.

172

173

If we already have some keys written to disk, we will combine them so

174

as to preserve the sorted order. The algorithm for combining uses

175

powers of two. So on the first spill, write all mem nodes into a

176

single index. On the second spill, combine the mem nodes with the nodes

177

on disk to create a 2x sized disk index and get rid of the first index.

178

On the third spill, create a single new disk index, which will contain

179

the mem nodes, and preserve the existing 2x sized index. On the fourth,

180

combine mem with the first and second indexes, creating a new one of

181

size 4x. On the fifth create a single new one, etc.

182

"""

183

if self._combine_backing_indices:

184

(new_backing_file, size,

185

backing_pos) = self._spill_mem_keys_and_combine()

186

else:

187

new_backing_file, size = self._spill_mem_keys_without_combining()

188

dir_path, base_name = osutils.split(new_backing_file.name)

189

# Note: The transport here isn't strictly needed, because we will use

190

# direct access to the new_backing._file object

191

new_backing = BTreeGraphIndex(get_transport(dir_path),

192

base_name, size)

193

# GC will clean up the file

194

new_backing._file = new_backing_file

195

if self._combine_backing_indices:

196

if len(self._backing_indices) == backing_pos:

197

self._backing_indices.append(None)

198

self._backing_indices[backing_pos] = new_backing

199

for backing_pos in range(backing_pos):

200

self._backing_indices[backing_pos] = None

201

else:

202

self._backing_indices.append(new_backing)

203

self._keys = set()

204

self._nodes = {}

205

self._nodes_by_key = None

206

207

def _spill_mem_keys_without_combining(self):

208

return self._write_nodes(self._iter_mem_nodes(), allow_optimize=False)

209

210

def _spill_mem_keys_and_combine(self):

211

iterators_to_combine = [self._iter_mem_nodes()]

212

pos = -1

213

for pos, backing in enumerate(self._backing_indices):

214

if backing is None:

215

pos -= 1

216

break

217

iterators_to_combine.append(backing.iter_all_entries())

218

backing_pos = pos + 1

219

new_backing_file, size = \

220

self._write_nodes(self._iter_smallest(iterators_to_combine),

221

allow_optimize=False)

222

return new_backing_file, size, backing_pos

223

224

def add_nodes(self, nodes):

225

"""Add nodes to the index.

226

227

:param nodes: An iterable of (key, node_refs, value) entries to add.

228

"""

229

if self.reference_lists:

230

for (key, value, node_refs) in nodes:

231

self.add_node(key, value, node_refs)

232

else:

233

for (key, value) in nodes:

234

self.add_node(key, value)

235

236

def _iter_mem_nodes(self):

237

"""Iterate over the nodes held in memory."""

238

nodes = self._nodes

239

if self.reference_lists:

240

for key in sorted(nodes):

241

references, value = nodes[key]

242

yield self, key, value, references

243

else:

244

for key in sorted(nodes):

245

references, value = nodes[key]

246

yield self, key, value

247

248

def _iter_smallest(self, iterators_to_combine):

249

if len(iterators_to_combine) == 1:

250

for value in iterators_to_combine[0]:

251

yield value

252

return

253

current_values = []

254

for iterator in iterators_to_combine:

255

try:

256

current_values.append(iterator.next())

257

except StopIteration:

258

current_values.append(None)

259

last = None

260

while True:

261

# Decorate candidates with the value to allow 2.4's min to be used.

262

candidates = [(item[1][1], item) for item

263

in enumerate(current_values) if item[1] is not None]

264

if not len(candidates):

265

return

266

selected = min(candidates)

267

# undecorate back to (pos, node)

268

selected = selected[1]

269

if last == selected[1][1]:

270

raise errors.BadIndexDuplicateKey(last, self)

271

last = selected[1][1]

272

# Yield, with self as the index

273

yield (self,) + selected[1][1:]

274

pos = selected[0]

275

try:

276

current_values[pos] = iterators_to_combine[pos].next()

277

except StopIteration:

278

current_values[pos] = None

279

280

def _add_key(self, string_key, line, rows, allow_optimize=True):

281

"""Add a key to the current chunk.

282

283

:param string_key: The key to add.

284

:param line: The fully serialised key and value.

285

:param allow_optimize: If set to False, prevent setting the optimize

286

flag when writing out. This is used by the _spill_mem_keys_to_disk

287

functionality.

288

"""

289

if rows[-1].writer is None:

290

# opening a new leaf chunk;

291

for pos, internal_row in enumerate(rows[:-1]):

292

# flesh out any internal nodes that are needed to

293

# preserve the height of the tree

294

if internal_row.writer is None:

295

length = _PAGE_SIZE

296

if internal_row.nodes == 0:

297

length -= _RESERVED_HEADER_BYTES # padded

298

if allow_optimize:

299

optimize_for_size = self._optimize_for_size

300

else:

301

optimize_for_size = False

302

internal_row.writer = chunk_writer.ChunkWriter(length, 0,

303

optimize_for_size=optimize_for_size)

304

internal_row.writer.write(_INTERNAL_FLAG)

305

internal_row.writer.write(_INTERNAL_OFFSET +

306

str(rows[pos + 1].nodes) + "\n")

307

# add a new leaf

308

length = _PAGE_SIZE

309

if rows[-1].nodes == 0:

310

length -= _RESERVED_HEADER_BYTES # padded

311

rows[-1].writer = chunk_writer.ChunkWriter(length,

312

optimize_for_size=self._optimize_for_size)

313

rows[-1].writer.write(_LEAF_FLAG)

314

if rows[-1].writer.write(line):

315

# this key did not fit in the node:

316

rows[-1].finish_node()

317

key_line = string_key + "\n"

318

new_row = True

319

for row in reversed(rows[:-1]):

320

# Mark the start of the next node in the node above. If it

321

# doesn't fit then propogate upwards until we find one that

322

# it does fit into.

323

if row.writer.write(key_line):

324

row.finish_node()

325

else:

326

# We've found a node that can handle the pointer.

327

new_row = False

328

break

329

# If we reached the current root without being able to mark the

330

# division point, then we need a new root:

331

if new_row:

332

# We need a new row

333

if 'index' in debug.debug_flags:

334

trace.mutter('Inserting new global row.')

335

new_row = _InternalBuilderRow()

336

reserved_bytes = 0

337

rows.insert(0, new_row)

338

# This will be padded, hence the -100

339

new_row.writer = chunk_writer.ChunkWriter(

340

_PAGE_SIZE - _RESERVED_HEADER_BYTES,

341

reserved_bytes,

342

optimize_for_size=self._optimize_for_size)

343

new_row.writer.write(_INTERNAL_FLAG)

344

new_row.writer.write(_INTERNAL_OFFSET +

345

str(rows[1].nodes - 1) + "\n")

346

new_row.writer.write(key_line)

347

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

348

349

def _write_nodes(self, node_iterator, allow_optimize=True):

350

"""Write node_iterator out as a B+Tree.

351

352

:param node_iterator: An iterator of sorted nodes. Each node should

353

match the output given by iter_all_entries.

354

:param allow_optimize: If set to False, prevent setting the optimize

355

flag when writing out. This is used by the _spill_mem_keys_to_disk

356

functionality.

357

:return: A file handle for a temporary file containing a B+Tree for

358

the nodes.

359

"""

360

# The index rows - rows[0] is the root, rows[1] is the layer under it

361

# etc.

362

rows = []

363

# forward sorted by key. In future we may consider topological sorting,

364

# at the cost of table scans for direct lookup, or a second index for

365

# direct lookup

366

key_count = 0

367

# A stack with the number of nodes of each size. 0 is the root node

368

# and must always be 1 (if there are any nodes in the tree).

369

self.row_lengths = []

370

# Loop over all nodes adding them to the bottom row

371

# (rows[-1]). When we finish a chunk in a row,

372

# propogate the key that didn't fit (comes after the chunk) to the

373

# row above, transitively.

374

for node in node_iterator:

375

if key_count == 0:

376

# First key triggers the first row

377

rows.append(_LeafBuilderRow())

378

key_count += 1

379

string_key, line = _btree_serializer._flatten_node(node,

380

self.reference_lists)

381

self._add_key(string_key, line, rows, allow_optimize=allow_optimize)

382

for row in reversed(rows):

383

pad = (type(row) != _LeafBuilderRow)

384

row.finish_node(pad=pad)

385

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

386

lines = [_BTSIGNATURE]

387

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

388

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

389

lines.append(_OPTION_LEN + str(key_count) + '\n')

390

row_lengths = [row.nodes for row in rows]

391

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

392

result.writelines(lines)

393

position = sum(map(len, lines))

394

root_row = True

395

if position > _RESERVED_HEADER_BYTES:

396

raise AssertionError("Could not fit the header in the"

397

" reserved space: %d > %d"

398

% (position, _RESERVED_HEADER_BYTES))

399

# write the rows out:

400

for row in rows:

401

reserved = _RESERVED_HEADER_BYTES # reserved space for first node

402

row.spool.flush()

403

row.spool.seek(0)

404

# copy nodes to the finalised file.

405

# Special case the first node as it may be prefixed

406

node = row.spool.read(_PAGE_SIZE)

407

result.write(node[reserved:])

408

result.write("\x00" * (reserved - position))

409

position = 0 # Only the root row actually has an offset

410

copied_len = osutils.pumpfile(row.spool, result)

411

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

412

if type(row) != _LeafBuilderRow:

413

raise AssertionError("Incorrect amount of data copied"

414

" expected: %d, got: %d"

415

% ((row.nodes - 1) * _PAGE_SIZE,

416

copied_len))

417

result.flush()

418

size = result.tell()

419

result.seek(0)

420

return result, size

421

422

def finish(self):

423

"""Finalise the index.

424

425

:return: A file handle for a temporary file containing the nodes added

426

to the index.

427

"""

428

return self._write_nodes(self.iter_all_entries())[0]

429

430

def iter_all_entries(self):

431

"""Iterate over all keys within the index

432

433

:return: An iterable of (index, key, reference_lists, value). There is no

434

defined order for the result iteration - it will be in the most

435

efficient order for the index (in this case dictionary hash order).

436

"""

437

if 'evil' in debug.debug_flags:

438

trace.mutter_callsite(3,

439

"iter_all_entries scales with size of history.")

440

# Doing serial rather than ordered would be faster; but this shouldn't

441

# be getting called routinely anyway.

442

iterators = [self._iter_mem_nodes()]

443

for backing in self._backing_indices:

444

if backing is not None:

445

iterators.append(backing.iter_all_entries())

446

if len(iterators) == 1:

447

return iterators[0]

448

return self._iter_smallest(iterators)

449

450

def iter_entries(self, keys):

451

"""Iterate over keys within the index.

452

453

:param keys: An iterable providing the keys to be retrieved.

454

:return: An iterable of (index, key, value, reference_lists). There is no

455

defined order for the result iteration - it will be in the most

456

efficient order for the index (keys iteration order in this case).

457

"""

458

keys = set(keys)

459

local_keys = keys.intersection(self._keys)

460

if self.reference_lists:

461

for key in local_keys:

462

node = self._nodes[key]

463

yield self, key, node[1], node[0]

464

else:

465

for key in local_keys:

466

node = self._nodes[key]

467

yield self, key, node[1]

468

# Find things that are in backing indices that have not been handled

469

# yet.

470

if not self._backing_indices:

471

return # We won't find anything there either

472

# Remove all of the keys that we found locally

473

keys.difference_update(local_keys)

474

for backing in self._backing_indices:

475

if backing is None:

476

continue

477

if not keys:

478

return

479

for node in backing.iter_entries(keys):

480

keys.remove(node[1])

481

yield (self,) + node[1:]

482

483

def iter_entries_prefix(self, keys):

484

"""Iterate over keys within the index using prefix matching.

485

486

Prefix matching is applied within the tuple of a key, not to within

487

the bytestring of each key element. e.g. if you have the keys ('foo',

488

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

489

only the former key is returned.

490

491

:param keys: An iterable providing the key prefixes to be retrieved.

492

Each key prefix takes the form of a tuple the length of a key, but

493

with the last N elements 'None' rather than a regular bytestring.

494

The first element cannot be 'None'.

495

:return: An iterable as per iter_all_entries, but restricted to the

496

keys with a matching prefix to those supplied. No additional keys

497

will be returned, and every match that is in the index will be

498

returned.

499

"""

500

# XXX: To much duplication with the GraphIndex class; consider finding

501

# a good place to pull out the actual common logic.

502

keys = set(keys)

503

if not keys:

504

return

505

for backing in self._backing_indices:

506

if backing is None:

507

continue

508

for node in backing.iter_entries_prefix(keys):

509

yield (self,) + node[1:]

510

if self._key_length == 1:

511

for key in keys:

512

# sanity check

513

if key[0] is None:

514

raise errors.BadIndexKey(key)

515

if len(key) != self._key_length:

516

raise errors.BadIndexKey(key)

517

try:

518

node = self._nodes[key]

519

except KeyError:

520

continue

521

if self.reference_lists:

522

yield self, key, node[1], node[0]

523

else:

524

yield self, key, node[1]

525

return

526

for key in keys:

527

# sanity check

528

if key[0] is None:

529

raise errors.BadIndexKey(key)

530

if len(key) != self._key_length:

531

raise errors.BadIndexKey(key)

532

# find what it refers to:

533

key_dict = self._get_nodes_by_key()

534

elements = list(key)

535

# find the subdict to return

536

try:

537

while len(elements) and elements[0] is not None:

538

key_dict = key_dict[elements[0]]

539

elements.pop(0)

540

except KeyError:

541

# a non-existant lookup.

542

continue

543

if len(elements):

544

dicts = [key_dict]

545

while dicts:

546

key_dict = dicts.pop(-1)

547

# can't be empty or would not exist

548

item, value = key_dict.iteritems().next()

549

if type(value) == dict:

550

# push keys

551

dicts.extend(key_dict.itervalues())

552

else:

553

# yield keys

554

for value in key_dict.itervalues():

555

yield (self, ) + value

556

else:

557

yield (self, ) + key_dict

558

559

def _get_nodes_by_key(self):

560

if self._nodes_by_key is None:

561

nodes_by_key = {}

562

if self.reference_lists:

563

for key, (references, value) in self._nodes.iteritems():

564

key_dict = nodes_by_key

565

for subkey in key[:-1]:

566

key_dict = key_dict.setdefault(subkey, {})

567

key_dict[key[-1]] = key, value, references

568

else:

569

for key, (references, value) in self._nodes.iteritems():

570

key_dict = nodes_by_key

571

for subkey in key[:-1]:

572

key_dict = key_dict.setdefault(subkey, {})

573

key_dict[key[-1]] = key, value

574

self._nodes_by_key = nodes_by_key

575

return self._nodes_by_key

576

577

def key_count(self):

578

"""Return an estimate of the number of keys in this index.

579

580

For InMemoryGraphIndex the estimate is exact.

581

"""

582

return len(self._keys) + sum(backing.key_count() for backing in

583

self._backing_indices if backing is not None)

584

585

def validate(self):

586

"""In memory index's have no known corruption at the moment."""

587

588

589

class _LeafNode(object):

590

"""A leaf node for a serialised B+Tree index."""

591

592

def __init__(self, bytes, key_length, ref_list_length):

593

"""Parse bytes to create a leaf node object."""

594

# splitlines mangles the \r delimiters.. don't use it.

595

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

596

key_length, ref_list_length))

597

598

599

class _InternalNode(object):

600

"""An internal node for a serialised B+Tree index."""

601

602

def __init__(self, bytes):

603

"""Parse bytes to create an internal node object."""

604

# splitlines mangles the \r delimiters.. don't use it.

605

self.keys = self._parse_lines(bytes.split('\n'))

606

607

def _parse_lines(self, lines):

608

nodes = []

609

self.offset = int(lines[1][7:])

610

for line in lines[2:]:

611

if line == '':

612

break

613

nodes.append(tuple(line.split('\0')))

614

return nodes

615

616

617

class BTreeGraphIndex(object):

618

"""Access to nodes via the standard GraphIndex interface for B+Tree's.

619

620

Individual nodes are held in a LRU cache. This holds the root node in

621

memory except when very large walks are done.

622

"""

623

624

def __init__(self, transport, name, size):

625

"""Create a B+Tree index object on the index name.

626

627

:param transport: The transport to read data for the index from.

628

:param name: The file name of the index on transport.

629

:param size: Optional size of the index in bytes. This allows

630

compatibility with the GraphIndex API, as well as ensuring that

631

the initial read (to read the root node header) can be done

632

without over-reading even on empty indices, and on small indices

633

allows single-IO to read the entire index.

634

"""

635

self._transport = transport

636

self._name = name

637

self._size = size

638

self._file = None

639

self._recommended_pages = self._compute_recommended_pages()

640

self._root_node = None

641

# Default max size is 100,000 leave values

642

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

643

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

644

self._internal_node_cache = lru_cache.LRUCache()

645

self._key_count = None

646

self._row_lengths = None

647

self._row_offsets = None # Start of each row, [-1] is the end

648

649

def __eq__(self, other):

650

"""Equal when self and other were created with the same parameters."""

651

return (

652

type(self) == type(other) and

653

self._transport == other._transport and

654

self._name == other._name and

655

self._size == other._size)

656

657

def __ne__(self, other):

658

return not self.__eq__(other)

659

660

def _get_and_cache_nodes(self, nodes):

661

"""Read nodes and cache them in the lru.

662

663

The nodes list supplied is sorted and then read from disk, each node

664

being inserted it into the _node_cache.

665

666

Note: Asking for more nodes than the _node_cache can contain will

667

result in some of the results being immediately discarded, to prevent

668

this an assertion is raised if more nodes are asked for than are

669

cachable.

670

671

:return: A dict of {node_pos: node}

672

"""

673

found = {}

674

start_of_leaves = None

675

for node_pos, node in self._read_nodes(sorted(nodes)):

676

if node_pos == 0: # Special case

677

self._root_node = node

678

else:

679

if start_of_leaves is None:

680

start_of_leaves = self._row_offsets[-2]

681

if node_pos < start_of_leaves:

682

self._internal_node_cache.add(node_pos, node)

683

else:

684

self._leaf_node_cache.add(node_pos, node)

685

found[node_pos] = node

686

return found

687

688

def _compute_recommended_pages(self):

689

"""Convert transport's recommended_page_size into btree pages.

690

691

recommended_page_size is in bytes, we want to know how many _PAGE_SIZE

692

pages fit in that length.

693

"""

694

recommended_read = self._transport.recommended_page_size()

695

recommended_pages = int(math.ceil(recommended_read /

696

float(_PAGE_SIZE)))

697

return recommended_pages

698

699

def _compute_total_pages_in_index(self):

700

"""How many pages are in the index.

701

702

If we have read the header we will use the value stored there.

703

Otherwise it will be computed based on the length of the index.

704

"""

705

if self._size is None:

706

raise AssertionError('_compute_total_pages_in_index should not be'

707

' called when self._size is None')

708

if self._root_node is not None:

709

# This is the number of pages as defined by the header

710

return self._row_offsets[-1]

711

# This is the number of pages as defined by the size of the index. They

712

# should be indentical.

713

total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))

714

return total_pages

715

716

def _expand_offsets(self, offsets):

717

"""Find extra pages to download.

718

719

The idea is that we always want to make big-enough requests (like 64kB

720

for http), so that we don't waste round trips. So given the entries

721

that we already have cached and the new pages being downloaded figure

722

out what other pages we might want to read.

723

724

See also doc/developers/btree_index_prefetch.txt for more details.

725

726

:param offsets: The offsets to be read

727

:return: A list of offsets to download

728

"""

729

if 'index' in debug.debug_flags:

730

trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)

731

732

if len(offsets) >= self._recommended_pages:

733

# Don't add more, we are already requesting more than enough

734

if 'index' in debug.debug_flags:

735

trace.mutter(' not expanding large request (%s >= %s)',

736

len(offsets), self._recommended_pages)

737

return offsets

738

if self._size is None:

739

# Don't try anything, because we don't know where the file ends

740

if 'index' in debug.debug_flags:

741

trace.mutter(' not expanding without knowing index size')

742

return offsets

743

total_pages = self._compute_total_pages_in_index()

744

cached_offsets = self._get_offsets_to_cached_pages()

745

# If reading recommended_pages would read the rest of the index, just

746

# do so.

747

if total_pages - len(cached_offsets) <= self._recommended_pages:

748

# Read whatever is left

749

if cached_offsets:

750

expanded = [x for x in xrange(total_pages)

751

if x not in cached_offsets]

752

else:

753

expanded = range(total_pages)

754

if 'index' in debug.debug_flags:

755

trace.mutter(' reading all unread pages: %s', expanded)

756

return expanded

757

758

if self._root_node is None:

759

# ATM on the first read of the root node of a large index, we don't

760

# bother pre-reading any other pages. This is because the

761

# likelyhood of actually reading interesting pages is very low.

762

# See doc/developers/btree_index_prefetch.txt for a discussion, and

763

# a possible implementation when we are guessing that the second

764

# layer index is small

765

final_offsets = offsets

766

else:

767

tree_depth = len(self._row_lengths)

768

if len(cached_offsets) < tree_depth and len(offsets) == 1:

769

# We haven't read enough to justify expansion

770

# If we are only going to read the root node, and 1 leaf node,

771

# then it isn't worth expanding our request. Once we've read at

772

# least 2 nodes, then we are probably doing a search, and we

773

# start expanding our requests.

774

if 'index' in debug.debug_flags:

775

trace.mutter(' not expanding on first reads')

776

return offsets

777

final_offsets = self._expand_to_neighbors(offsets, cached_offsets,

778

total_pages)

779

780

final_offsets = sorted(final_offsets)

781

if 'index' in debug.debug_flags:

782

trace.mutter('expanded: %s', final_offsets)

783

return final_offsets

784

785

def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):

786

"""Expand requests to neighbors until we have enough pages.

787

788

This is called from _expand_offsets after policy has determined that we

789

want to expand.

790

We only want to expand requests within a given layer. We cheat a little

791

bit and assume all requests will be in the same layer. This is true

792

given the current design, but if it changes this algorithm may perform

793

oddly.

794

795

:param offsets: requested offsets

796

:param cached_offsets: offsets for pages we currently have cached

797

:return: A set() of offsets after expansion

798

"""

799

final_offsets = set(offsets)

800

first = end = None

801

new_tips = set(final_offsets)

802

while len(final_offsets) < self._recommended_pages and new_tips:

803

next_tips = set()

804

for pos in new_tips:

805

if first is None:

806

first, end = self._find_layer_first_and_end(pos)

807

previous = pos - 1

808

if (previous > 0

809

and previous not in cached_offsets

810

and previous not in final_offsets

811

and previous >= first):

812

next_tips.add(previous)

813

after = pos + 1

814

if (after < total_pages

815

and after not in cached_offsets

816

and after not in final_offsets

817

and after < end):

818

next_tips.add(after)

819

# This would keep us from going bigger than

820

# recommended_pages by only expanding the first offsets.

821

# However, if we are making a 'wide' request, it is

822

# reasonable to expand all points equally.

823

# if len(final_offsets) > recommended_pages:

824

# break

825

final_offsets.update(next_tips)

826

new_tips = next_tips

827

return final_offsets

828

829

def external_references(self, ref_list_num):

830

if self._root_node is None:

831

self._get_root_node()

832

if ref_list_num + 1 > self.node_ref_lists:

833

raise ValueError('No ref list %d, index has %d ref lists'

834

% (ref_list_num, self.node_ref_lists))

835

keys = set()

836

refs = set()

837

for node in self.iter_all_entries():

838

keys.add(node[1])

839

refs.update(node[3][ref_list_num])

840

return refs - keys

841

842

def _find_layer_first_and_end(self, offset):

843

"""Find the start/stop nodes for the layer corresponding to offset.

844

845

:return: (first, end)

846

first is the first node in this layer

847

end is the first node of the next layer

848

"""

849

first = end = 0

850

for roffset in self._row_offsets:

851

first = end

852

end = roffset

853

if offset < roffset:

854

break

855

return first, end

856

857

def _get_offsets_to_cached_pages(self):

858

"""Determine what nodes we already have cached."""

859

cached_offsets = set(self._internal_node_cache.keys())

860

cached_offsets.update(self._leaf_node_cache.keys())

861

if self._root_node is not None:

862

cached_offsets.add(0)

863

return cached_offsets

864

865

def _get_root_node(self):

866

if self._root_node is None:

867

# We may not have a root node yet

868

self._get_internal_nodes([0])

869

return self._root_node

870

871

def _get_nodes(self, cache, node_indexes):

872

found = {}

873

needed = []

874

for idx in node_indexes:

875

if idx == 0 and self._root_node is not None:

876

found[0] = self._root_node

877

continue

878

try:

879

found[idx] = cache[idx]

880

except KeyError:

881

needed.append(idx)

882

if not needed:

883

return found

884

needed = self._expand_offsets(needed)

885

found.update(self._get_and_cache_nodes(needed))

886

return found

887

888

def _get_internal_nodes(self, node_indexes):

889

"""Get a node, from cache or disk.

890

891

After getting it, the node will be cached.

892

"""

893

return self._get_nodes(self._internal_node_cache, node_indexes)

894

895

def _cache_leaf_values(self, nodes):

896

"""Cache directly from key => value, skipping the btree."""

897

if self._leaf_value_cache is not None:

898

for node in nodes.itervalues():

899

for key, value in node.keys.iteritems():

900

if key in self._leaf_value_cache:

901

# Don't add the rest of the keys, we've seen this node

902

# before.

903

break

904

self._leaf_value_cache[key] = value

905

906

def _get_leaf_nodes(self, node_indexes):

907

"""Get a bunch of nodes, from cache or disk."""

908

found = self._get_nodes(self._leaf_node_cache, node_indexes)

909

self._cache_leaf_values(found)

910

return found

911

912

def iter_all_entries(self):

913

"""Iterate over all keys within the index.

914

915

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

916

The former tuple is used when there are no reference lists in the

917

index, making the API compatible with simple key:value index types.

918

There is no defined order for the result iteration - it will be in

919

the most efficient order for the index.

920

"""

921

if 'evil' in debug.debug_flags:

922

trace.mutter_callsite(3,

923

"iter_all_entries scales with size of history.")

924

if not self.key_count():

925

return

926

if self._row_offsets[-1] == 1:

927

# There is only the root node, and we read that via key_count()

928

if self.node_ref_lists:

929

for key, (value, refs) in sorted(self._root_node.keys.items()):

930

yield (self, key, value, refs)

931

else:

932

for key, (value, refs) in sorted(self._root_node.keys.items()):

933

yield (self, key, value)

934

return

935

start_of_leaves = self._row_offsets[-2]

936

end_of_leaves = self._row_offsets[-1]

937

needed_offsets = range(start_of_leaves, end_of_leaves)

938

if needed_offsets == [0]:

939

# Special case when we only have a root node, as we have already

940

# read everything

941

nodes = [(0, self._root_node)]

942

else:

943

nodes = self._read_nodes(needed_offsets)

944

# We iterate strictly in-order so that we can use this function

945

# for spilling index builds to disk.

946

if self.node_ref_lists:

947

for _, node in nodes:

948

for key, (value, refs) in sorted(node.keys.items()):

949

yield (self, key, value, refs)

950

else:

951

for _, node in nodes:

952

for key, (value, refs) in sorted(node.keys.items()):

953

yield (self, key, value)

954

955

@staticmethod

956

def _multi_bisect_right(in_keys, fixed_keys):

957

"""Find the positions where each 'in_key' would fit in fixed_keys.

958

959

This is equivalent to doing "bisect_right" on each in_key into

960

fixed_keys

961

962

:param in_keys: A sorted list of keys to match with fixed_keys

963

:param fixed_keys: A sorted list of keys to match against

964

:return: A list of (integer position, [key list]) tuples.

965

"""

966

if not in_keys:

967

return []

968

if not fixed_keys:

969

# no pointers in the fixed_keys list, which means everything must

970

# fall to the left.

971

return [(0, in_keys)]

972

973

# TODO: Iterating both lists will generally take M + N steps

974

# Bisecting each key will generally take M * log2 N steps.

975

# If we had an efficient way to compare, we could pick the method

976

# based on which has the fewer number of steps.

977

# There is also the argument that bisect_right is a compiled

978

# function, so there is even more to be gained.

979

# iter_steps = len(in_keys) + len(fixed_keys)

980

# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)

981

if len(in_keys) == 1: # Bisect will always be faster for M = 1

982

return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]

983

# elif bisect_steps < iter_steps:

984

# offsets = {}

985

# for key in in_keys:

986

# offsets.setdefault(bisect_right(fixed_keys, key),

987

# []).append(key)

988

# return [(o, offsets[o]) for o in sorted(offsets)]

989

in_keys_iter = iter(in_keys)

990

fixed_keys_iter = enumerate(fixed_keys)

991

cur_in_key = in_keys_iter.next()

992

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

993

994

class InputDone(Exception): pass

995

class FixedDone(Exception): pass

996

997

output = []

998

cur_out = []

999

1000

# TODO: Another possibility is that rather than iterating on each side,

1001

# we could use a combination of bisecting and iterating. For

1002

# example, while cur_in_key < fixed_key, bisect to find its

1003

# point, then iterate all matching keys, then bisect (restricted

1004

# to only the remainder) for the next one, etc.

1005

try:

1006

while True:

1007

if cur_in_key < cur_fixed_key:

1008

cur_keys = []

1009

cur_out = (cur_fixed_offset, cur_keys)

1010

output.append(cur_out)

1011

while cur_in_key < cur_fixed_key:

1012

cur_keys.append(cur_in_key)

1013

try:

1014

cur_in_key = in_keys_iter.next()

1015

except StopIteration:

1016

raise InputDone

1017

# At this point cur_in_key must be >= cur_fixed_key

1018

# step the cur_fixed_key until we pass the cur key, or walk off

1019

# the end

1020

while cur_in_key >= cur_fixed_key:

1021

try:

1022

cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()

1023

except StopIteration:

1024

raise FixedDone

1025

except InputDone:

1026

# We consumed all of the input, nothing more to do

1027

pass

1028

except FixedDone:

1029

# There was some input left, but we consumed all of fixed, so we

1030

# have to add one more for the tail

1031

cur_keys = [cur_in_key]

1032

cur_keys.extend(in_keys_iter)

1033

cur_out = (len(fixed_keys), cur_keys)

1034

output.append(cur_out)

1035

return output

1036

1037

def iter_entries(self, keys):

1038

"""Iterate over keys within the index.

1039

1040

:param keys: An iterable providing the keys to be retrieved.

1041

:return: An iterable as per iter_all_entries, but restricted to the

1042

keys supplied. No additional keys will be returned, and every

1043

key supplied that is in the index will be returned.

1044

"""

1045

# 6 seconds spent in miss_torture using the sorted() line.

1046

# Even with out of order disk IO it seems faster not to sort it when

1047

# large queries are being made.

1048

# However, now that we are doing multi-way bisecting, we need the keys

1049

# in sorted order anyway. We could change the multi-way code to not

1050

# require sorted order. (For example, it bisects for the first node,

1051

# does an in-order search until a key comes before the current point,

1052

# which it then bisects for, etc.)

1053

keys = frozenset(keys)

1054

if not keys:

1055

return

1056

1057

if not self.key_count():

1058

return

1059

1060

needed_keys = []

1061

if self._leaf_value_cache is None:

1062

needed_keys = keys

1063

else:

1064

for key in keys:

1065

value = self._leaf_value_cache.get(key, None)

1066

if value is not None:

1067

# This key is known not to be here, skip it

1068

value, refs = value

1069

if self.node_ref_lists:

1070

yield (self, key, value, refs)

1071

else:

1072

yield (self, key, value)

1073

else:

1074

needed_keys.append(key)

1075

1076

last_key = None

1077

needed_keys = keys

1078

if not needed_keys:

1079

return

1080

# 6 seconds spent in miss_torture using the sorted() line.

1081

# Even with out of order disk IO it seems faster not to sort it when

1082

# large queries are being made.

1083

needed_keys = sorted(needed_keys)

1084

1085

nodes_and_keys = [(0, needed_keys)]

1086

1087

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1088

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1089

nodes = self._get_internal_nodes(node_indexes)

1090

1091

next_nodes_and_keys = []

1092

for node_index, sub_keys in nodes_and_keys:

1093

node = nodes[node_index]

1094

positions = self._multi_bisect_right(sub_keys, node.keys)

1095

node_offset = next_row_start + node.offset

1096

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1097

for pos, s_keys in positions])

1098

nodes_and_keys = next_nodes_and_keys

1099

# We should now be at the _LeafNodes

1100

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1101

1102

# TODO: We may *not* want to always read all the nodes in one

1103

# big go. Consider setting a max size on this.

1104

1105

nodes = self._get_leaf_nodes(node_indexes)

1106

for node_index, sub_keys in nodes_and_keys:

1107

if not sub_keys:

1108

continue

1109

node = nodes[node_index]

1110

for next_sub_key in sub_keys:

1111

if next_sub_key in node.keys:

1112

value, refs = node.keys[next_sub_key]

1113

if self.node_ref_lists:

1114

yield (self, next_sub_key, value, refs)

1115

else:

1116

yield (self, next_sub_key, value)

1117

1118

def iter_entries_prefix(self, keys):

1119

"""Iterate over keys within the index using prefix matching.

1120

1121

Prefix matching is applied within the tuple of a key, not to within

1122

the bytestring of each key element. e.g. if you have the keys ('foo',

1123

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1124

only the former key is returned.

1125

1126

WARNING: Note that this method currently causes a full index parse

1127

unconditionally (which is reasonably appropriate as it is a means for

1128

thunking many small indices into one larger one and still supplies

1129

iter_all_entries at the thunk layer).

1130

1131

:param keys: An iterable providing the key prefixes to be retrieved.

1132

Each key prefix takes the form of a tuple the length of a key, but

1133

with the last N elements 'None' rather than a regular bytestring.

1134

The first element cannot be 'None'.

1135

:return: An iterable as per iter_all_entries, but restricted to the

1136

keys with a matching prefix to those supplied. No additional keys

1137

will be returned, and every match that is in the index will be

1138

returned.

1139

"""

1140

keys = sorted(set(keys))

1141

if not keys:

1142

return

1143

# Load if needed to check key lengths

1144

if self._key_count is None:

1145

self._get_root_node()

1146

# TODO: only access nodes that can satisfy the prefixes we are looking

1147

# for. For now, to meet API usage (as this function is not used by

1148

# current bzrlib) just suck the entire index and iterate in memory.

1149

nodes = {}

1150

if self.node_ref_lists:

1151

if self._key_length == 1:

1152

for _1, key, value, refs in self.iter_all_entries():

1153

nodes[key] = value, refs

1154

else:

1155

nodes_by_key = {}

1156

for _1, key, value, refs in self.iter_all_entries():

1157

key_value = key, value, refs

1158

# For a key of (foo, bar, baz) create

1159

# _nodes_by_key[foo][bar][baz] = key_value

1160

key_dict = nodes_by_key

1161

for subkey in key[:-1]:

1162

key_dict = key_dict.setdefault(subkey, {})

1163

key_dict[key[-1]] = key_value

1164

else:

1165

if self._key_length == 1:

1166

for _1, key, value in self.iter_all_entries():

1167

nodes[key] = value

1168

else:

1169

nodes_by_key = {}

1170

for _1, key, value in self.iter_all_entries():

1171

key_value = key, value

1172

# For a key of (foo, bar, baz) create

1173

# _nodes_by_key[foo][bar][baz] = key_value

1174

key_dict = nodes_by_key

1175

for subkey in key[:-1]:

1176

key_dict = key_dict.setdefault(subkey, {})

1177

key_dict[key[-1]] = key_value

1178

if self._key_length == 1:

1179

for key in keys:

1180

# sanity check

1181

if key[0] is None:

1182

raise errors.BadIndexKey(key)

1183

if len(key) != self._key_length:

1184

raise errors.BadIndexKey(key)

1185

try:

1186

if self.node_ref_lists:

1187

value, node_refs = nodes[key]

1188

yield self, key, value, node_refs

1189

else:

1190

yield self, key, nodes[key]

1191

except KeyError:

1192

pass

1193

return

1194

for key in keys:

1195

# sanity check

1196

if key[0] is None:

1197

raise errors.BadIndexKey(key)

1198

if len(key) != self._key_length:

1199

raise errors.BadIndexKey(key)

1200

# find what it refers to:

1201

key_dict = nodes_by_key

1202

elements = list(key)

1203

# find the subdict whose contents should be returned.

1204

try:

1205

while len(elements) and elements[0] is not None:

1206

key_dict = key_dict[elements[0]]

1207

elements.pop(0)

1208

except KeyError:

1209

# a non-existant lookup.

1210

continue

1211

if len(elements):

1212

dicts = [key_dict]

1213

while dicts:

1214

key_dict = dicts.pop(-1)

1215

# can't be empty or would not exist

1216

item, value = key_dict.iteritems().next()

1217

if type(value) == dict:

1218

# push keys

1219

dicts.extend(key_dict.itervalues())

1220

else:

1221

# yield keys

1222

for value in key_dict.itervalues():

1223

# each value is the key:value:node refs tuple

1224

# ready to yield.

1225

yield (self, ) + value

1226

else:

1227

# the last thing looked up was a terminal element

1228

yield (self, ) + key_dict

1229

1230

def key_count(self):

1231

"""Return an estimate of the number of keys in this index.

1232

1233

For BTreeGraphIndex the estimate is exact as it is contained in the

1234

header.

1235

"""

1236

if self._key_count is None:

1237

self._get_root_node()

1238

return self._key_count

1239

1240

def _compute_row_offsets(self):

1241

"""Fill out the _row_offsets attribute based on _row_lengths."""

1242

offsets = []

1243

row_offset = 0

1244

for row in self._row_lengths:

1245

offsets.append(row_offset)

1246

row_offset += row

1247

offsets.append(row_offset)

1248

self._row_offsets = offsets

1249

1250

def _parse_header_from_bytes(self, bytes):

1251

"""Parse the header from a region of bytes.

1252

1253

:param bytes: The data to parse.

1254

:return: An offset, data tuple such as readv yields, for the unparsed

1255

data. (which may be of length 0).

1256

"""

1257

signature = bytes[0:len(self._signature())]

1258

if not signature == self._signature():

1259

raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)

1260

lines = bytes[len(self._signature()):].splitlines()

1261

options_line = lines[0]

1262

if not options_line.startswith(_OPTION_NODE_REFS):

1263

raise errors.BadIndexOptions(self)

1264

try:

1265

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

1266

except ValueError:

1267

raise errors.BadIndexOptions(self)

1268

options_line = lines[1]

1269

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

1270

raise errors.BadIndexOptions(self)

1271

try:

1272

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

1273

except ValueError:

1274

raise errors.BadIndexOptions(self)

1275

options_line = lines[2]

1276

if not options_line.startswith(_OPTION_LEN):

1277

raise errors.BadIndexOptions(self)

1278

try:

1279

self._key_count = int(options_line[len(_OPTION_LEN):])

1280

except ValueError:

1281

raise errors.BadIndexOptions(self)

1282

options_line = lines[3]

1283

if not options_line.startswith(_OPTION_ROW_LENGTHS):

1284

raise errors.BadIndexOptions(self)

1285

try:

1286

self._row_lengths = map(int, [length for length in

1287

options_line[len(_OPTION_ROW_LENGTHS):].split(',')

1288

if len(length)])

1289

except ValueError:

1290

raise errors.BadIndexOptions(self)

1291

self._compute_row_offsets()

1292

1293

# calculate the bytes we have processed

1294

header_end = (len(signature) + sum(map(len, lines[0:4])) + 4)

1295

return header_end, bytes[header_end:]

1296

1297

def _read_nodes(self, nodes):

1298

"""Read some nodes from disk into the LRU cache.

1299

1300

This performs a readv to get the node data into memory, and parses each

1301

node, then yields it to the caller. The nodes are requested in the

1302

supplied order. If possible doing sort() on the list before requesting

1303

a read may improve performance.

1304

1305

:param nodes: The nodes to read. 0 - first node, 1 - second node etc.

1306

:return: None

1307

"""

1308

# may be the byte string of the whole file

1309

bytes = None

1310

# list of (offset, length) regions of the file that should, evenually

1311

# be read in to data_ranges, either from 'bytes' or from the transport

1312

ranges = []

1313

for index in nodes:

1314

offset = index * _PAGE_SIZE

1315

size = _PAGE_SIZE

1316

if index == 0:

1317

# Root node - special case

1318

if self._size:

1319

size = min(_PAGE_SIZE, self._size)

1320

else:

1321

# The only case where we don't know the size, is for very

1322

# small indexes. So we read the whole thing

1323

bytes = self._transport.get_bytes(self._name)

1324

self._size = len(bytes)

1325

# the whole thing should be parsed out of 'bytes'

1326

ranges.append((0, len(bytes)))

1327

break

1328

else:

1329

if offset > self._size:

1330

raise AssertionError('tried to read past the end'

1331

' of the file %s > %s'

1332

% (offset, self._size))

1333

size = min(size, self._size - offset)

1334

ranges.append((offset, size))

1335

if not ranges:

1336

return

1337

elif bytes is not None:

1338

# already have the whole file

1339

data_ranges = [(start, bytes[start:start+_PAGE_SIZE])

1340

for start in xrange(0, len(bytes), _PAGE_SIZE)]

1341

elif self._file is None:

1342

data_ranges = self._transport.readv(self._name, ranges)

1343

else:

1344

data_ranges = []

1345

for offset, size in ranges:

1346

self._file.seek(offset)

1347

data_ranges.append((offset, self._file.read(size)))

1348

for offset, data in data_ranges:

1349

if offset == 0:

1350

# extract the header

1351

offset, data = self._parse_header_from_bytes(data)

1352

if len(data) == 0:

1353

continue

1354

bytes = zlib.decompress(data)

1355

if bytes.startswith(_LEAF_FLAG):

1356

node = _LeafNode(bytes, self._key_length, self.node_ref_lists)

1357

elif bytes.startswith(_INTERNAL_FLAG):

1358

node = _InternalNode(bytes)

1359

else:

1360

raise AssertionError("Unknown node type for %r" % bytes)

1361

yield offset / _PAGE_SIZE, node

1362

1363

def _signature(self):

1364

"""The file signature for this index type."""

1365

return _BTSIGNATURE

1366

1367

def validate(self):

1368

"""Validate that everything in the index can be accessed."""

1369

# just read and parse every node.

1370

self._get_root_node()

1371

if len(self._row_lengths) > 1:

1372

start_node = self._row_offsets[1]

1373

else:

1374

# We shouldn't be reading anything anyway

1375

start_node = 1

1376

node_end = self._row_offsets[-1]

1377

for node in self._read_nodes(range(start_node, node_end)):

1378

pass

1379

1380

1381

try:

1382

from bzrlib import _btree_serializer_c as _btree_serializer

1383

except ImportError:

1384

from bzrlib import _btree_serializer_py as _btree_serializer

Older »