/brz/remove-bazaar : revision 4132.2.3

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: Ian Clatworthy
Date: 2009-03-13 06:22:58 UTC
mto: (4159.1.1 ianc-integration)
mto: This revision was merged to the branch mainline in revision 4161.
Revision ID: ian.clatworthy@canonical.com-20090313062258-tmiy9u7oq2yhsvwg

add test as suggested by poolie's review

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2-windows.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/check-newsbugs.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""DirState objects record the state of a directory and its bzr metadata.

Pseudo EBNF grammar for the state file. Fields are separated by NULLs, and

lines by NL. The field delimiters are ommitted in the grammar, line delimiters

are not - this is done for clarity of reading. All string data is in utf8.

MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";

NL = "\n";

NULL = "\0";

WHOLE_NUMBER = {digit}, digit;

BOOLEAN = "y" | "n";

REVISION_ID = a non-empty utf8 string;

dirstate format = header line, full checksum, row count, parent details,

ghost_details, entries;

header line = "#bazaar dirstate flat format 3", NL;

full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;

row count = "num_entries: ", WHOLE_NUMBER, NL;

parent_details = WHOLE NUMBER, {REVISION_ID}* NL;

ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;

entries = {entry};

entry = entry_key, current_entry_details, {parent_entry_details};

entry_key = dirname, basename, fileid;

current_entry_details = common_entry_details, working_entry_details;

parent_entry_details = common_entry_details, history_entry_details;

common_entry_details = MINIKIND, fingerprint, size, executable

working_entry_details = packed_stat

history_entry_details = REVISION_ID;

executable = BOOLEAN;

size = WHOLE_NUMBER;

fingerprint = a nonempty utf8 sequence with meaning defined by minikind.

Given this definition, the following is useful to know:

entry (aka row) - all the data for a given key.

entry[0]: The key (dirname, basename, fileid)

entry[0][0]: dirname

entry[0][1]: basename

entry[0][2]: fileid

entry[1]: The tree(s) data for this path and id combination.

entry[1][0]: The current tree

entry[1][1]: The second tree

For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:

entry[1][0][0]: minikind

entry[1][0][1]: fingerprint

entry[1][0][2]: size

entry[1][0][3]: executable

entry[1][0][4]: packed_stat

OR (for non tree-0)

entry[1][1][4]: revision_id

There may be multiple rows at the root, one per id present in the root, so the

in memory root row is now:

self._dirblocks[0] -> ('', [entry ...]),

and the entries in there are

entries[0][0]: ''

entries[0][1]: ''

entries[0][2]: file_id

entries[1][0]: The tree data for the current tree for this fileid at /

etc.

Kinds:

'r' is a relocated entry: This path is not present in this tree with this id,

but the id can be found at another location. The fingerprint is used to

point to the target location.

'a' is an absent entry: In that tree the id is not present at this path.

'd' is a directory entry: This path in this tree is a directory with the

current file id. There is no fingerprint for directories.

'f' is a file entry: As for directory, but it's a file. The fingerprint is the

sha1 value of the file's canonical form, i.e. after any read filters have

been applied to the convenience form stored in the working tree.

'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the

link target.

't' is a reference to a nested subtree; the fingerprint is the referenced

revision.

Ordering:

The entries on disk and in memory are ordered according to the following keys:

directory, as a list of components

filename

file-id

100

101

--- Format 1 had the following different definition: ---

102

rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,

103

WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,

104

{PARENT ROW}

105

PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,

106

basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,

107

SHA1

108

109

PARENT ROW's are emitted for every parent that is not in the ghosts details

110

line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then

111

each row will have a PARENT ROW for foo and baz, but not for bar.

112

113

114

In any tree, a kind of 'moved' indicates that the fingerprint field

115

(which we treat as opaque data specific to the 'kind' anyway) has the

116

details for the id of this row in that tree.

117

118

I'm strongly tempted to add a id->path index as well, but I think that

119

where we need id->path mapping; we also usually read the whole file, so

120

I'm going to skip that for the moment, as we have the ability to locate

121

via bisect any path in any tree, and if we lookup things by path, we can

122

accumulate an id->path mapping as we go, which will tend to match what we

123

looked for.

124

125

I plan to implement this asap, so please speak up now to alter/tweak the

126

design - and once we stabilise on this, I'll update the wiki page for

127

it.

128

129

The rationale for all this is that we want fast operations for the

130

common case (diff/status/commit/merge on all files) and extremely fast

131

operations for the less common but still occurs a lot status/diff/commit

132

on specific files). Operations on specific files involve a scan for all

133

the children of a path, *in every involved tree*, which the current

134

format did not accommodate.

135

----

136

137

Design priorities:

138

1) Fast end to end use for bzr's top 5 uses cases. (commmit/diff/status/merge/???)

139

2) fall back current object model as needed.

140

3) scale usably to the largest trees known today - say 50K entries. (mozilla

141

is an example of this)

142

143

144

Locking:

145

Eventually reuse dirstate objects across locks IFF the dirstate file has not

146

been modified, but will require that we flush/ignore cached stat-hit data

147

because we won't want to restat all files on disk just because a lock was

148

acquired, yet we cannot trust the data after the previous lock was released.

149

150

Memory representation:

151

vector of all directories, and vector of the childen ?

152

i.e.

153

root_entrie = (direntry for root, [parent_direntries_for_root]),

154

dirblocks = [

155

('', ['data for achild', 'data for bchild', 'data for cchild'])

156

('dir', ['achild', 'cchild', 'echild'])

157

]

158

- single bisect to find N subtrees from a path spec

159

- in-order for serialisation - this is 'dirblock' grouping.

160

- insertion of a file '/a' affects only the '/' child-vector, that is, to

161

insert 10K elements from scratch does not generates O(N^2) memoves of a

162

single vector, rather each individual, which tends to be limited to a

163

manageable number. Will scale badly on trees with 10K entries in a

164

single directory. compare with Inventory.InventoryDirectory which has

165

a dictionary for the children. No bisect capability, can only probe for

166

exact matches, or grab all elements and sort.

167

- What's the risk of error here? Once we have the base format being processed

168

we should have a net win regardless of optimality. So we are going to

169

go with what seems reasonable.

170

open questions:

171

172

Maybe we should do a test profile of the core structure - 10K simulated

173

searches/lookups/etc?

174

175

Objects for each row?

176

The lifetime of Dirstate objects is current per lock, but see above for

177

possible extensions. The lifetime of a row from a dirstate is expected to be

178

very short in the optimistic case: which we are optimising for. For instance,

179

subtree status will determine from analysis of the disk data what rows need to

180

be examined at all, and will be able to determine from a single row whether

181

that file has altered or not, so we are aiming to process tens of thousands of

182

entries each second within the dirstate context, before exposing anything to

183

the larger codebase. This suggests we want the time for a single file

184

comparison to be < 0.1 milliseconds. That would give us 10000 paths per second

185

processed, and to scale to 100 thousand we'll another order of magnitude to do

186

that. Now, as the lifetime for all unchanged entries is the time to parse, stat

187

the file on disk, and then immediately discard, the overhead of object creation

188

becomes a significant cost.

189

190

Figures: Creating a tuple from from 3 elements was profiled at 0.0625

191

microseconds, whereas creating a object which is subclassed from tuple was

192

0.500 microseconds, and creating an object with 3 elements and slots was 3

193

microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get

194

down to 10 microseconds for the total processing - having 33% of that be object

195

creation is a huge overhead. There is a potential cost in using tuples within

196

each row which is that the conditional code to do comparisons may be slower

197

than method invocation, but method invocation is known to be slow due to stack

198

frame creation, so avoiding methods in these tight inner loops in unfortunately

199

desirable. We can consider a pyrex version of this with objects in future if

200

desired.

201

202

"""

203

204

import bisect

205

import binascii

206

import errno

207

import os

208

from stat import S_IEXEC

209

import stat

210

import struct

211

import sys

212

import time

213

import zlib

214

215

from bzrlib import (

216

cache_utf8,

217

debug,

218

errors,

219

inventory,

220

lock,

221

osutils,

222

trace,

223

)

224

225

226

# This is the Windows equivalent of ENOTDIR

227

# It is defined in pywin32.winerror, but we don't want a strong dependency for

228

# just an error code.

229

ERROR_PATH_NOT_FOUND = 3

230

ERROR_DIRECTORY = 267

231

232

233

if not getattr(struct, '_compile', None):

234

# Cannot pre-compile the dirstate pack_stat

235

def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):

236

"""Convert stat values into a packed representation."""

237

return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),

238

int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,

239

st.st_mode))[:-1]

240

else:

241

# compile the struct compiler we need, so as to only do it once

242

from _struct import Struct

243

_compiled_pack = Struct('>LLLLLL').pack

244

def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):

245

"""Convert stat values into a packed representation."""

246

# jam 20060614 it isn't really worth removing more entries if we

247

# are going to leave it in packed form.

248

# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms

249

# With all entries, filesize is 5.9M and read time is maybe 280ms

250

# well within the noise margin

251

252

# base64 encoding always adds a final newline, so strip it off

253

# The current version

254

return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),

255

st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]

256

# This is 0.060s / 1.520s faster by not encoding as much information

257

# return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]

258

# This is not strictly faster than _encode(_pack())[:-1]

259

# return '%X.%X.%X.%X.%X.%X' % (

260

# st.st_size, int(st.st_mtime), int(st.st_ctime),

261

# st.st_dev, st.st_ino, st.st_mode)

262

# Similar to the _encode(_pack('>LL'))

263

# return '%X.%X' % (int(st.st_mtime), st.st_mode)

264

265

266

class Sha1Provider(object):

267

"""An interface for getting sha1s of a file."""

268

269

def sha1(self, abspath):

270

"""Return the sha1 of a file given its absolute path."""

271

raise NotImplementedError(self.sha1)

272

273

def stat_and_sha1(self, abspath):

274

"""Return the stat and sha1 of a file given its absolute path."""

275

raise NotImplementedError(self.stat_and_sha1)

276

277

278

class DefaultSha1Provider(Sha1Provider):

279

"""A Sha1Provider that reads directly from the filesystem."""

280

281

def sha1(self, abspath):

282

"""Return the sha1 of a file given it's absolute path."""

283

return osutils.sha_file_by_name(abspath)

284

285

def stat_and_sha1(self, abspath):

286

"""Return the stat and sha1 of a file given it's absolute path."""

287

file_obj = file(abspath, 'rb')

288

try:

289

statvalue = os.fstat(file_obj.fileno())

290

sha1 = osutils.sha_file(file_obj)

291

finally:

292

file_obj.close()

293

return statvalue, sha1

294

295

296

class DirState(object):

297

"""Record directory and metadata state for fast access.

298

299

A dirstate is a specialised data structure for managing local working

300

tree state information. Its not yet well defined whether it is platform

301

specific, and if it is how we detect/parameterize that.

302

303

Dirstates use the usual lock_write, lock_read and unlock mechanisms.

304

Unlike most bzr disk formats, DirStates must be locked for reading, using

305

lock_read. (This is an os file lock internally.) This is necessary

306

because the file can be rewritten in place.

307

308

DirStates must be explicitly written with save() to commit changes; just

309

unlocking them does not write the changes to disk.

310

"""

311

312

_kind_to_minikind = {

313

'absent': 'a',

314

'file': 'f',

315

'directory': 'd',

316

'relocated': 'r',

317

'symlink': 'l',

318

'tree-reference': 't',

319

}

320

_minikind_to_kind = {

321

'a': 'absent',

322

'f': 'file',

323

'd': 'directory',

324

'l':'symlink',

325

'r': 'relocated',

326

't': 'tree-reference',

327

}

328

_stat_to_minikind = {

329

stat.S_IFDIR:'d',

330

stat.S_IFREG:'f',

331

stat.S_IFLNK:'l',

332

}

333

_to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain

334

# of using int conversion rather than a dict here. AND BLAME ANDREW IF

335

# it is faster.

336

337

# TODO: jam 20070221 Figure out what to do if we have a record that exceeds

338

# the BISECT_PAGE_SIZE. For now, we just have to make it large enough

339

# that we are sure a single record will always fit.

340

BISECT_PAGE_SIZE = 4096

341

342

NOT_IN_MEMORY = 0

343

IN_MEMORY_UNMODIFIED = 1

344

IN_MEMORY_MODIFIED = 2

345

346

# A pack_stat (the x's) that is just noise and will never match the output

347

# of base64 encode.

348

NULLSTAT = 'x' * 32

349

NULL_PARENT_DETAILS = ('a', '', 0, False, '')

350

351

HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'

352

HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'

353

354

def __init__(self, path, sha1_provider):

355

"""Create a DirState object.

356

357

:param path: The path at which the dirstate file on disk should live.

358

:param sha1_provider: an object meeting the Sha1Provider interface.

359

"""

360

# _header_state and _dirblock_state represent the current state

361

# of the dirstate metadata and the per-row data respectiely.

362

# NOT_IN_MEMORY indicates that no data is in memory

363

# IN_MEMORY_UNMODIFIED indicates that what we have in memory

364

# is the same as is on disk

365

# IN_MEMORY_MODIFIED indicates that we have a modified version

366

# of what is on disk.

367

# In future we will add more granularity, for instance _dirblock_state

368

# will probably support partially-in-memory as a separate variable,

369

# allowing for partially-in-memory unmodified and partially-in-memory

370

# modified states.

371

self._header_state = DirState.NOT_IN_MEMORY

372

self._dirblock_state = DirState.NOT_IN_MEMORY

373

# If true, an error has been detected while updating the dirstate, and

374

# for safety we're not going to commit to disk.

375

self._changes_aborted = False

376

self._dirblocks = []

377

self._ghosts = []

378

self._parents = []

379

self._state_file = None

380

self._filename = path

381

self._lock_token = None

382

self._lock_state = None

383

self._id_index = None

384

# a map from packed_stat to sha's.

385

self._packed_stat_index = None

386

self._end_of_header = None

387

self._cutoff_time = None

388

self._split_path_cache = {}

389

self._bisect_page_size = DirState.BISECT_PAGE_SIZE

390

self._sha1_provider = sha1_provider

391

if 'hashcache' in debug.debug_flags:

392

self._sha1_file = self._sha1_file_and_mutter

393

else:

394

self._sha1_file = self._sha1_provider.sha1

395

# These two attributes provide a simple cache for lookups into the

396

# dirstate in-memory vectors. By probing respectively for the last

397

# block, and for the next entry, we save nearly 2 bisections per path

398

# during commit.

399

self._last_block_index = None

400

self._last_entry_index = None

401

402

def __repr__(self):

403

return "%s(%r)" % \

404

(self.__class__.__name__, self._filename)

405

406

def add(self, path, file_id, kind, stat, fingerprint):

407

"""Add a path to be tracked.

408

409

:param path: The path within the dirstate - '' is the root, 'foo' is the

410

path foo within the root, 'foo/bar' is the path bar within foo

411

within the root.

412

:param file_id: The file id of the path being added.

413

:param kind: The kind of the path, as a string like 'file',

414

'directory', etc.

415

:param stat: The output of os.lstat for the path.

416

:param fingerprint: The sha value of the file's canonical form (i.e.

417

after any read filters have been applied),

418

or the target of a symlink,

419

or the referenced revision id for tree-references,

420

or '' for directories.

421

"""

422

# adding a file:

423

# find the block its in.

424

# find the location in the block.

425

# check its not there

426

# add it.

427

#------- copied from inventory.ensure_normalized_name - keep synced.

428

# --- normalized_filename wants a unicode basename only, so get one.

429

dirname, basename = osutils.split(path)

430

# we dont import normalized_filename directly because we want to be

431

# able to change the implementation at runtime for tests.

432

norm_name, can_access = osutils.normalized_filename(basename)

433

if norm_name != basename:

434

if can_access:

435

basename = norm_name

436

else:

437

raise errors.InvalidNormalization(path)

438

# you should never have files called . or ..; just add the directory

439

# in the parent, or according to the special treatment for the root

440

if basename == '.' or basename == '..':

441

raise errors.InvalidEntryName(path)

442

# now that we've normalised, we need the correct utf8 path and

443

# dirname and basename elements. This single encode and split should be

444

# faster than three separate encodes.

445

utf8path = (dirname + '/' + basename).strip('/').encode('utf8')

446

dirname, basename = osutils.split(utf8path)

447

# uses __class__ for speed; the check is needed for safety

448

if file_id.__class__ is not str:

449

raise AssertionError(

450

"must be a utf8 file_id not %s" % (type(file_id), ))

451

# Make sure the file_id does not exist in this tree

452

rename_from = None

453

file_id_entry = self._get_entry(0, fileid_utf8=file_id, include_deleted=True)

454

if file_id_entry != (None, None):

455

if file_id_entry[1][0][0] == 'a':

456

if file_id_entry[0] != (dirname, basename, file_id):

457

# set the old name's current operation to rename

458

self.update_minimal(file_id_entry[0],

459

'r',

460

path_utf8='',

461

packed_stat='',

462

fingerprint=utf8path

463

)

464

rename_from = file_id_entry[0][0:2]

465

else:

466

path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])

467

kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]

468

info = '%s:%s' % (kind, path)

469

raise errors.DuplicateFileId(file_id, info)

470

first_key = (dirname, basename, '')

471

block_index, present = self._find_block_index_from_key(first_key)

472

if present:

473

# check the path is not in the tree

474

block = self._dirblocks[block_index][1]

475

entry_index, _ = self._find_entry_index(first_key, block)

476

while (entry_index < len(block) and

477

block[entry_index][0][0:2] == first_key[0:2]):

478

if block[entry_index][1][0][0] not in 'ar':

479

# this path is in the dirstate in the current tree.

480

raise Exception, "adding already added path!"

481

entry_index += 1

482

else:

483

# The block where we want to put the file is not present. But it

484

# might be because the directory was empty, or not loaded yet. Look

485

# for a parent entry, if not found, raise NotVersionedError

486

parent_dir, parent_base = osutils.split(dirname)

487

parent_block_idx, parent_entry_idx, _, parent_present = \

488

self._get_block_entry_index(parent_dir, parent_base, 0)

489

if not parent_present:

490

raise errors.NotVersionedError(path, str(self))

491

self._ensure_block(parent_block_idx, parent_entry_idx, dirname)

492

block = self._dirblocks[block_index][1]

493

entry_key = (dirname, basename, file_id)

494

if stat is None:

495

size = 0

496

packed_stat = DirState.NULLSTAT

497

else:

498

size = stat.st_size

499

packed_stat = pack_stat(stat)

500

parent_info = self._empty_parent_info()

501

minikind = DirState._kind_to_minikind[kind]

502

if rename_from is not None:

503

if rename_from[0]:

504

old_path_utf8 = '%s/%s' % rename_from

505

else:

506

old_path_utf8 = rename_from[1]

507

parent_info[0] = ('r', old_path_utf8, 0, False, '')

508

if kind == 'file':

509

entry_data = entry_key, [

510

(minikind, fingerprint, size, False, packed_stat),

511

] + parent_info

512

elif kind == 'directory':

513

entry_data = entry_key, [

514

(minikind, '', 0, False, packed_stat),

515

] + parent_info

516

elif kind == 'symlink':

517

entry_data = entry_key, [

518

(minikind, fingerprint, size, False, packed_stat),

519

] + parent_info

520

elif kind == 'tree-reference':

521

entry_data = entry_key, [

522

(minikind, fingerprint, 0, False, packed_stat),

523

] + parent_info

524

else:

525

raise errors.BzrError('unknown kind %r' % kind)

526

entry_index, present = self._find_entry_index(entry_key, block)

527

if not present:

528

block.insert(entry_index, entry_data)

529

else:

530

if block[entry_index][1][0][0] != 'a':

531

raise AssertionError(" %r(%r) already added" % (basename, file_id))

532

block[entry_index][1][0] = entry_data[1][0]

533

534

if kind == 'directory':

535

# insert a new dirblock

536

self._ensure_block(block_index, entry_index, utf8path)

537

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

538

if self._id_index:

539

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

540

541

def _bisect(self, paths):

542

"""Bisect through the disk structure for specific rows.

543

544

:param paths: A list of paths to find

545

:return: A dict mapping path => entries for found entries. Missing

546

entries will not be in the map.

547

The list is not sorted, and entries will be populated

548

based on when they were read.

549

"""

550

self._requires_lock()

551

# We need the file pointer to be right after the initial header block

552

self._read_header_if_needed()

553

# If _dirblock_state was in memory, we should just return info from

554

# there, this function is only meant to handle when we want to read

555

# part of the disk.

556

if self._dirblock_state != DirState.NOT_IN_MEMORY:

557

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

558

559

# The disk representation is generally info + '\0\n\0' at the end. But

560

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

561

# Because it means we can sync on the '\n'

562

state_file = self._state_file

563

file_size = os.fstat(state_file.fileno()).st_size

564

# We end up with 2 extra fields, we should have a trailing '\n' to

565

# ensure that we read the whole record, and we should have a precursur

566

# '' which ensures that we start after the previous '\n'

567

entry_field_count = self._fields_per_entry() + 1

568

569

low = self._end_of_header

570

high = file_size - 1 # Ignore the final '\0'

571

# Map from (dir, name) => entry

572

found = {}

573

574

# Avoid infinite seeking

575

max_count = 30*len(paths)

576

count = 0

577

# pending is a list of places to look.

578

# each entry is a tuple of low, high, dir_names

579

# low -> the first byte offset to read (inclusive)

580

# high -> the last byte offset (inclusive)

581

# dir_names -> The list of (dir, name) pairs that should be found in

582

# the [low, high] range

583

pending = [(low, high, paths)]

584

585

page_size = self._bisect_page_size

586

587

fields_to_entry = self._get_fields_to_entry()

588

589

while pending:

590

low, high, cur_files = pending.pop()

591

592

if not cur_files or low >= high:

593

# Nothing to find

594

continue

595

596

count += 1

597

if count > max_count:

598

raise errors.BzrError('Too many seeks, most likely a bug.')

599

600

mid = max(low, (low+high-page_size)/2)

601

602

state_file.seek(mid)

603

# limit the read size, so we don't end up reading data that we have

604

# already read.

605

read_size = min(page_size, (high-mid)+1)

606

block = state_file.read(read_size)

607

608

start = mid

609

entries = block.split('\n')

610

611

if len(entries) < 2:

612

# We didn't find a '\n', so we cannot have found any records.

613

# So put this range back and try again. But we know we have to

614

# increase the page size, because a single read did not contain

615

# a record break (so records must be larger than page_size)

616

page_size *= 2

617

pending.append((low, high, cur_files))

618

continue

619

620

# Check the first and last entries, in case they are partial, or if

621

# we don't care about the rest of this page

622

first_entry_num = 0

623

first_fields = entries[0].split('\0')

624

if len(first_fields) < entry_field_count:

625

# We didn't get the complete first entry

626

# so move start, and grab the next, which

627

# should be a full entry

628

start += len(entries[0])+1

629

first_fields = entries[1].split('\0')

630

first_entry_num = 1

631

632

if len(first_fields) <= 2:

633

# We didn't even get a filename here... what do we do?

634

# Try a large page size and repeat this query

635

page_size *= 2

636

pending.append((low, high, cur_files))

637

continue

638

else:

639

# Find what entries we are looking for, which occur before and

640

# after this first record.

641

after = start

642

if first_fields[1]:

643

first_path = first_fields[1] + '/' + first_fields[2]

644

else:

645

first_path = first_fields[2]

646

first_loc = _bisect_path_left(cur_files, first_path)

647

648

# These exist before the current location

649

pre = cur_files[:first_loc]

650

# These occur after the current location, which may be in the

651

# data we read, or might be after the last entry

652

post = cur_files[first_loc:]

653

654

if post and len(first_fields) >= entry_field_count:

655

# We have files after the first entry

656

657

# Parse the last entry

658

last_entry_num = len(entries)-1

659

last_fields = entries[last_entry_num].split('\0')

660

if len(last_fields) < entry_field_count:

661

# The very last hunk was not complete,

662

# read the previous hunk

663

after = mid + len(block) - len(entries[-1])

664

last_entry_num -= 1

665

last_fields = entries[last_entry_num].split('\0')

666

else:

667

after = mid + len(block)

668

669

if last_fields[1]:

670

last_path = last_fields[1] + '/' + last_fields[2]

671

else:

672

last_path = last_fields[2]

673

last_loc = _bisect_path_right(post, last_path)

674

675

middle_files = post[:last_loc]

676

post = post[last_loc:]

677

678

if middle_files:

679

# We have files that should occur in this block

680

# (>= first, <= last)

681

# Either we will find them here, or we can mark them as

682

# missing.

683

684

if middle_files[0] == first_path:

685

# We might need to go before this location

686

pre.append(first_path)

687

if middle_files[-1] == last_path:

688

post.insert(0, last_path)

689

690

# Find out what paths we have

691

paths = {first_path:[first_fields]}

692

# last_path might == first_path so we need to be

693

# careful if we should append rather than overwrite

694

if last_entry_num != first_entry_num:

695

paths.setdefault(last_path, []).append(last_fields)

696

for num in xrange(first_entry_num+1, last_entry_num):

697

# TODO: jam 20070223 We are already splitting here, so

698

# shouldn't we just split the whole thing rather

699

# than doing the split again in add_one_record?

700

fields = entries[num].split('\0')

701

if fields[1]:

702

path = fields[1] + '/' + fields[2]

703

else:

704

path = fields[2]

705

paths.setdefault(path, []).append(fields)

706

707

for path in middle_files:

708

for fields in paths.get(path, []):

709

# offset by 1 because of the opening '\0'

710

# consider changing fields_to_entry to avoid the

711

# extra list slice

712

entry = fields_to_entry(fields[1:])

713

found.setdefault(path, []).append(entry)

714

715

# Now we have split up everything into pre, middle, and post, and

716

# we have handled everything that fell in 'middle'.

717

# We add 'post' first, so that we prefer to seek towards the

718

# beginning, so that we will tend to go as early as we need, and

719

# then only seek forward after that.

720

if post:

721

pending.append((after, high, post))

722

if pre:

723

pending.append((low, start-1, pre))

724

725

# Consider that we may want to return the directory entries in sorted

726

# order. For now, we just return them in whatever order we found them,

727

# and leave it up to the caller if they care if it is ordered or not.

728

return found

729

730

def _bisect_dirblocks(self, dir_list):

731

"""Bisect through the disk structure to find entries in given dirs.

732

733

_bisect_dirblocks is meant to find the contents of directories, which

734

differs from _bisect, which only finds individual entries.

735

736

:param dir_list: A sorted list of directory names ['', 'dir', 'foo'].

737

:return: A map from dir => entries_for_dir

738

"""

739

# TODO: jam 20070223 A lot of the bisecting logic could be shared

740

# between this and _bisect. It would require parameterizing the

741

# inner loop with a function, though. We should evaluate the

742

# performance difference.

743

self._requires_lock()

744

# We need the file pointer to be right after the initial header block

745

self._read_header_if_needed()

746

# If _dirblock_state was in memory, we should just return info from

747

# there, this function is only meant to handle when we want to read

748

# part of the disk.

749

if self._dirblock_state != DirState.NOT_IN_MEMORY:

750

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

751

# The disk representation is generally info + '\0\n\0' at the end. But

752

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

753

# Because it means we can sync on the '\n'

754

state_file = self._state_file

755

file_size = os.fstat(state_file.fileno()).st_size

756

# We end up with 2 extra fields, we should have a trailing '\n' to

757

# ensure that we read the whole record, and we should have a precursur

758

# '' which ensures that we start after the previous '\n'

759

entry_field_count = self._fields_per_entry() + 1

760

761

low = self._end_of_header

762

high = file_size - 1 # Ignore the final '\0'

763

# Map from dir => entry

764

found = {}

765

766

# Avoid infinite seeking

767

max_count = 30*len(dir_list)

768

count = 0

769

# pending is a list of places to look.

770

# each entry is a tuple of low, high, dir_names

771

# low -> the first byte offset to read (inclusive)

772

# high -> the last byte offset (inclusive)

773

# dirs -> The list of directories that should be found in

774

# the [low, high] range

775

pending = [(low, high, dir_list)]

776

777

page_size = self._bisect_page_size

778

779

fields_to_entry = self._get_fields_to_entry()

780

781

while pending:

782

low, high, cur_dirs = pending.pop()

783

784

if not cur_dirs or low >= high:

785

# Nothing to find

786

continue

787

788

count += 1

789

if count > max_count:

790

raise errors.BzrError('Too many seeks, most likely a bug.')

791

792

mid = max(low, (low+high-page_size)/2)

793

794

state_file.seek(mid)

795

# limit the read size, so we don't end up reading data that we have

796

# already read.

797

read_size = min(page_size, (high-mid)+1)

798

block = state_file.read(read_size)

799

800

start = mid

801

entries = block.split('\n')

802

803

if len(entries) < 2:

804

# We didn't find a '\n', so we cannot have found any records.

805

# So put this range back and try again. But we know we have to

806

# increase the page size, because a single read did not contain

807

# a record break (so records must be larger than page_size)

808

page_size *= 2

809

pending.append((low, high, cur_dirs))

810

continue

811

812

# Check the first and last entries, in case they are partial, or if

813

# we don't care about the rest of this page

814

first_entry_num = 0

815

first_fields = entries[0].split('\0')

816

if len(first_fields) < entry_field_count:

817

# We didn't get the complete first entry

818

# so move start, and grab the next, which

819

# should be a full entry

820

start += len(entries[0])+1

821

first_fields = entries[1].split('\0')

822

first_entry_num = 1

823

824

if len(first_fields) <= 1:

825

# We didn't even get a dirname here... what do we do?

826

# Try a large page size and repeat this query

827

page_size *= 2

828

pending.append((low, high, cur_dirs))

829

continue

830

else:

831

# Find what entries we are looking for, which occur before and

832

# after this first record.

833

after = start

834

first_dir = first_fields[1]

835

first_loc = bisect.bisect_left(cur_dirs, first_dir)

836

837

# These exist before the current location

838

pre = cur_dirs[:first_loc]

839

# These occur after the current location, which may be in the

840

# data we read, or might be after the last entry

841

post = cur_dirs[first_loc:]

842

843

if post and len(first_fields) >= entry_field_count:

844

# We have records to look at after the first entry

845

846

# Parse the last entry

847

last_entry_num = len(entries)-1

848

last_fields = entries[last_entry_num].split('\0')

849

if len(last_fields) < entry_field_count:

850

# The very last hunk was not complete,

851

# read the previous hunk

852

after = mid + len(block) - len(entries[-1])

853

last_entry_num -= 1

854

last_fields = entries[last_entry_num].split('\0')

855

else:

856

after = mid + len(block)

857

858

last_dir = last_fields[1]

859

last_loc = bisect.bisect_right(post, last_dir)

860

861

middle_files = post[:last_loc]

862

post = post[last_loc:]

863

864

if middle_files:

865

# We have files that should occur in this block

866

# (>= first, <= last)

867

# Either we will find them here, or we can mark them as

868

# missing.

869

870

if middle_files[0] == first_dir:

871

# We might need to go before this location

872

pre.append(first_dir)

873

if middle_files[-1] == last_dir:

874

post.insert(0, last_dir)

875

876

# Find out what paths we have

877

paths = {first_dir:[first_fields]}

878

# last_dir might == first_dir so we need to be

879

# careful if we should append rather than overwrite

880

if last_entry_num != first_entry_num:

881

paths.setdefault(last_dir, []).append(last_fields)

882

for num in xrange(first_entry_num+1, last_entry_num):

883

# TODO: jam 20070223 We are already splitting here, so

884

# shouldn't we just split the whole thing rather

885

# than doing the split again in add_one_record?

886

fields = entries[num].split('\0')

887

paths.setdefault(fields[1], []).append(fields)

888

889

for cur_dir in middle_files:

890

for fields in paths.get(cur_dir, []):

891

# offset by 1 because of the opening '\0'

892

# consider changing fields_to_entry to avoid the

893

# extra list slice

894

entry = fields_to_entry(fields[1:])

895

found.setdefault(cur_dir, []).append(entry)

896

897

# Now we have split up everything into pre, middle, and post, and

898

# we have handled everything that fell in 'middle'.

899

# We add 'post' first, so that we prefer to seek towards the

900

# beginning, so that we will tend to go as early as we need, and

901

# then only seek forward after that.

902

if post:

903

pending.append((after, high, post))

904

if pre:

905

pending.append((low, start-1, pre))

906

907

return found

908

909

def _bisect_recursive(self, paths):

910

"""Bisect for entries for all paths and their children.

911

912

This will use bisect to find all records for the supplied paths. It

913

will then continue to bisect for any records which are marked as

914

directories. (and renames?)

915

916

:param paths: A sorted list of (dir, name) pairs

917

eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]

918

:return: A dictionary mapping (dir, name, file_id) => [tree_info]

919

"""

920

# Map from (dir, name, file_id) => [tree_info]

921

found = {}

922

923

found_dir_names = set()

924

925

# Directories that have been read

926

processed_dirs = set()

927

# Get the ball rolling with the first bisect for all entries.

928

newly_found = self._bisect(paths)

929

930

while newly_found:

931

# Directories that need to be read

932

pending_dirs = set()

933

paths_to_search = set()

934

for entry_list in newly_found.itervalues():

935

for dir_name_id, trees_info in entry_list:

936

found[dir_name_id] = trees_info

937

found_dir_names.add(dir_name_id[:2])

938

is_dir = False

939

for tree_info in trees_info:

940

minikind = tree_info[0]

941

if minikind == 'd':

942

if is_dir:

943

# We already processed this one as a directory,

944

# we don't need to do the extra work again.

945

continue

946

subdir, name, file_id = dir_name_id

947

path = osutils.pathjoin(subdir, name)

948

is_dir = True

949

if path not in processed_dirs:

950

pending_dirs.add(path)

951

elif minikind == 'r':

952

# Rename, we need to directly search the target

953

# which is contained in the fingerprint column

954

dir_name = osutils.split(tree_info[1])

955

if dir_name[0] in pending_dirs:

956

# This entry will be found in the dir search

957

continue

958

if dir_name not in found_dir_names:

959

paths_to_search.add(tree_info[1])

960

# Now we have a list of paths to look for directly, and

961

# directory blocks that need to be read.

962

# newly_found is mixing the keys between (dir, name) and path

963

# entries, but that is okay, because we only really care about the

964

# targets.

965

newly_found = self._bisect(sorted(paths_to_search))

966

newly_found.update(self._bisect_dirblocks(sorted(pending_dirs)))

967

processed_dirs.update(pending_dirs)

968

return found

969

970

def _discard_merge_parents(self):

971

"""Discard any parents trees beyond the first.

972

973

Note that if this fails the dirstate is corrupted.

974

975

After this function returns the dirstate contains 2 trees, neither of

976

which are ghosted.

977

"""

978

self._read_header_if_needed()

979

parents = self.get_parent_ids()

980

if len(parents) < 1:

981

return

982

# only require all dirblocks if we are doing a full-pass removal.

983

self._read_dirblocks_if_needed()

984

dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')])

985

def iter_entries_removable():

986

for block in self._dirblocks:

987

deleted_positions = []

988

for pos, entry in enumerate(block[1]):

989

yield entry

990

if (entry[1][0][0], entry[1][1][0]) in dead_patterns:

991

deleted_positions.append(pos)

992

if deleted_positions:

993

if len(deleted_positions) == len(block[1]):

994

del block[1][:]

995

else:

996

for pos in reversed(deleted_positions):

997

del block[1][pos]

998

# if the first parent is a ghost:

999

if parents[0] in self.get_ghosts():

1000

empty_parent = [DirState.NULL_PARENT_DETAILS]

1001

for entry in iter_entries_removable():

1002

entry[1][1:] = empty_parent

1003

else:

1004

for entry in iter_entries_removable():

1005

del entry[1][2:]

1006

1007

self._ghosts = []

1008

self._parents = [parents[0]]

1009

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1010

self._header_state = DirState.IN_MEMORY_MODIFIED

1011

1012

def _empty_parent_info(self):

1013

return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -

1014

len(self._ghosts))

1015

1016

def _ensure_block(self, parent_block_index, parent_row_index, dirname):

1017

"""Ensure a block for dirname exists.

1018

1019

This function exists to let callers which know that there is a

1020

directory dirname ensure that the block for it exists. This block can

1021

fail to exist because of demand loading, or because a directory had no

1022

children. In either case it is not an error. It is however an error to

1023

call this if there is no parent entry for the directory, and thus the

1024

function requires the coordinates of such an entry to be provided.

1025

1026

The root row is special cased and can be indicated with a parent block

1027

and row index of -1

1028

1029

:param parent_block_index: The index of the block in which dirname's row

1030

exists.

1031

:param parent_row_index: The index in the parent block where the row

1032

exists.

1033

:param dirname: The utf8 dirname to ensure there is a block for.

1034

:return: The index for the block.

1035

"""

1036

if dirname == '' and parent_row_index == 0 and parent_block_index == 0:

1037

# This is the signature of the root row, and the

1038

# contents-of-root row is always index 1

1039

return 1

1040

# the basename of the directory must be the end of its full name.

1041

if not (parent_block_index == -1 and

1042

parent_block_index == -1 and dirname == ''):

1043

if not dirname.endswith(

1044

self._dirblocks[parent_block_index][1][parent_row_index][0][1]):

1045

raise AssertionError("bad dirname %r" % dirname)

1046

block_index, present = self._find_block_index_from_key((dirname, '', ''))

1047

if not present:

1048

## In future, when doing partial parsing, this should load and

1049

# populate the entire block.

1050

self._dirblocks.insert(block_index, (dirname, []))

1051

return block_index

1052

1053

def _entries_to_current_state(self, new_entries):

1054

"""Load new_entries into self.dirblocks.

1055

1056

Process new_entries into the current state object, making them the active

1057

state. The entries are grouped together by directory to form dirblocks.

1058

1059

:param new_entries: A sorted list of entries. This function does not sort

1060

to prevent unneeded overhead when callers have a sorted list already.

1061

:return: Nothing.

1062

"""

1063

if new_entries[0][0][0:2] != ('', ''):

1064

raise AssertionError(

1065

"Missing root row %r" % (new_entries[0][0],))

1066

# The two blocks here are deliberate: the root block and the

1067

# contents-of-root block.

1068

self._dirblocks = [('', []), ('', [])]

1069

current_block = self._dirblocks[0][1]

1070

current_dirname = ''

1071

root_key = ('', '')

1072

append_entry = current_block.append

1073

for entry in new_entries:

1074

if entry[0][0] != current_dirname:

1075

# new block - different dirname

1076

current_block = []

1077

current_dirname = entry[0][0]

1078

self._dirblocks.append((current_dirname, current_block))

1079

append_entry = current_block.append

1080

# append the entry to the current block

1081

append_entry(entry)

1082

self._split_root_dirblock_into_contents()

1083

1084

def _split_root_dirblock_into_contents(self):

1085

"""Split the root dirblocks into root and contents-of-root.

1086

1087

After parsing by path, we end up with root entries and contents-of-root

1088

entries in the same block. This loop splits them out again.

1089

"""

1090

# The above loop leaves the "root block" entries mixed with the

1091

# "contents-of-root block". But we don't want an if check on

1092

# all entries, so instead we just fix it up here.

1093

if self._dirblocks[1] != ('', []):

1094

raise ValueError("bad dirblock start %r" % (self._dirblocks[1],))

1095

root_block = []

1096

contents_of_root_block = []

1097

for entry in self._dirblocks[0][1]:

1098

if not entry[0][1]: # This is a root entry

1099

root_block.append(entry)

1100

else:

1101

contents_of_root_block.append(entry)

1102

self._dirblocks[0] = ('', root_block)

1103

self._dirblocks[1] = ('', contents_of_root_block)

1104

1105

def _entries_for_path(self, path):

1106

"""Return a list with all the entries that match path for all ids."""

1107

dirname, basename = os.path.split(path)

1108

key = (dirname, basename, '')

1109

block_index, present = self._find_block_index_from_key(key)

1110

if not present:

1111

# the block which should contain path is absent.

1112

return []

1113

result = []

1114

block = self._dirblocks[block_index][1]

1115

entry_index, _ = self._find_entry_index(key, block)

1116

# we may need to look at multiple entries at this path: walk while the specific_files match.

1117

while (entry_index < len(block) and

1118

block[entry_index][0][0:2] == key[0:2]):

1119

result.append(block[entry_index])

1120

entry_index += 1

1121

return result

1122

1123

def _entry_to_line(self, entry):

1124

"""Serialize entry to a NULL delimited line ready for _get_output_lines.

1125

1126

:param entry: An entry_tuple as defined in the module docstring.

1127

"""

1128

entire_entry = list(entry[0])

1129

for tree_number, tree_data in enumerate(entry[1]):

1130

# (minikind, fingerprint, size, executable, tree_specific_string)

1131

entire_entry.extend(tree_data)

1132

# 3 for the key, 5 for the fields per tree.

1133

tree_offset = 3 + tree_number * 5

1134

# minikind

1135

entire_entry[tree_offset + 0] = tree_data[0]

1136

# size

1137

entire_entry[tree_offset + 2] = str(tree_data[2])

1138

# executable

1139

entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]

1140

return '\0'.join(entire_entry)

1141

1142

def _fields_per_entry(self):

1143

"""How many null separated fields should be in each entry row.

1144

1145

Each line now has an extra '\n' field which is not used

1146

so we just skip over it

1147

entry size:

1148

3 fields for the key

1149

+ number of fields per tree_data (5) * tree count

1150

+ newline

1151

"""

1152

tree_count = 1 + self._num_present_parents()

1153

return 3 + 5 * tree_count + 1

1154

1155

def _find_block(self, key, add_if_missing=False):

1156

"""Return the block that key should be present in.

1157

1158

:param key: A dirstate entry key.

1159

:return: The block tuple.

1160

"""

1161

block_index, present = self._find_block_index_from_key(key)

1162

if not present:

1163

if not add_if_missing:

1164

# check to see if key is versioned itself - we might want to

1165

# add it anyway, because dirs with no entries dont get a

1166

# dirblock at parse time.

1167

# This is an uncommon branch to take: most dirs have children,

1168

# and most code works with versioned paths.

1169

parent_base, parent_name = osutils.split(key[0])

1170

if not self._get_block_entry_index(parent_base, parent_name, 0)[3]:

1171

# some parent path has not been added - its an error to add

1172

# this child

1173

raise errors.NotVersionedError(key[0:2], str(self))

1174

self._dirblocks.insert(block_index, (key[0], []))

1175

return self._dirblocks[block_index]

1176

1177

def _find_block_index_from_key(self, key):

1178

"""Find the dirblock index for a key.

1179

1180

:return: The block index, True if the block for the key is present.

1181

"""

1182

if key[0:2] == ('', ''):

1183

return 0, True

1184

try:

1185

if (self._last_block_index is not None and

1186

self._dirblocks[self._last_block_index][0] == key[0]):

1187

return self._last_block_index, True

1188

except IndexError:

1189

pass

1190

block_index = bisect_dirblock(self._dirblocks, key[0], 1,

1191

cache=self._split_path_cache)

1192

# _right returns one-past-where-key is so we have to subtract

1193

# one to use it. we use _right here because there are two

1194

# '' blocks - the root, and the contents of root

1195

# we always have a minimum of 2 in self._dirblocks: root and

1196

# root-contents, and for '', we get 2 back, so this is

1197

# simple and correct:

1198

present = (block_index < len(self._dirblocks) and

1199

self._dirblocks[block_index][0] == key[0])

1200

self._last_block_index = block_index

1201

# Reset the entry index cache to the beginning of the block.

1202

self._last_entry_index = -1

1203

return block_index, present

1204

1205

def _find_entry_index(self, key, block):

1206

"""Find the entry index for a key in a block.

1207

1208

:return: The entry index, True if the entry for the key is present.

1209

"""

1210

len_block = len(block)

1211

try:

1212

if self._last_entry_index is not None:

1213

# mini-bisect here.

1214

entry_index = self._last_entry_index + 1

1215

# A hit is when the key is after the last slot, and before or

1216

# equal to the next slot.

1217

if ((entry_index > 0 and block[entry_index - 1][0] < key) and

1218

key <= block[entry_index][0]):

1219

self._last_entry_index = entry_index

1220

present = (block[entry_index][0] == key)

1221

return entry_index, present

1222

except IndexError:

1223

pass

1224

entry_index = bisect.bisect_left(block, (key, []))

1225

present = (entry_index < len_block and

1226

block[entry_index][0] == key)

1227

self._last_entry_index = entry_index

1228

return entry_index, present

1229

1230

@staticmethod

1231

def from_tree(tree, dir_state_filename, sha1_provider=None):

1232

"""Create a dirstate from a bzr Tree.

1233

1234

:param tree: The tree which should provide parent information and

1235

inventory ids.

1236

:param sha1_provider: an object meeting the Sha1Provider interface.

1237

If None, a DefaultSha1Provider is used.

1238

:return: a DirState object which is currently locked for writing.

1239

(it was locked by DirState.initialize)

1240

"""

1241

result = DirState.initialize(dir_state_filename,

1242

sha1_provider=sha1_provider)

1243

try:

1244

tree.lock_read()

1245

try:

1246

parent_ids = tree.get_parent_ids()

1247

num_parents = len(parent_ids)

1248

parent_trees = []

1249

for parent_id in parent_ids:

1250

parent_tree = tree.branch.repository.revision_tree(parent_id)

1251

parent_trees.append((parent_id, parent_tree))

1252

parent_tree.lock_read()

1253

result.set_parent_trees(parent_trees, [])

1254

result.set_state_from_inventory(tree.inventory)

1255

finally:

1256

for revid, parent_tree in parent_trees:

1257

parent_tree.unlock()

1258

tree.unlock()

1259

except:

1260

# The caller won't have a chance to unlock this, so make sure we

1261

# cleanup ourselves

1262

result.unlock()

1263

raise

1264

return result

1265

1266

def update_by_delta(self, delta):

1267

"""Apply an inventory delta to the dirstate for tree 0

1268

1269

:param delta: An inventory delta. See Inventory.apply_delta for

1270

details.

1271

"""

1272

self._read_dirblocks_if_needed()

1273

insertions = {}

1274

removals = {}

1275

for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):

1276

if (file_id in insertions) or (file_id in removals):

1277

raise AssertionError("repeated file id in delta %r" % (file_id,))

1278

if old_path is not None:

1279

old_path = old_path.encode('utf-8')

1280

removals[file_id] = old_path

1281

if new_path is not None:

1282

new_path = new_path.encode('utf-8')

1283

dirname, basename = osutils.split(new_path)

1284

key = (dirname, basename, file_id)

1285

minikind = DirState._kind_to_minikind[inv_entry.kind]

1286

if minikind == 't':

1287

fingerprint = inv_entry.reference_revision

1288

else:

1289

fingerprint = ''

1290

insertions[file_id] = (key, minikind, inv_entry.executable,

1291

fingerprint, new_path)

1292

# Transform moves into delete+add pairs

1293

if None not in (old_path, new_path):

1294

for child in self._iter_child_entries(0, old_path):

1295

if child[0][2] in insertions or child[0][2] in removals:

1296

continue

1297

child_dirname = child[0][0]

1298

child_basename = child[0][1]

1299

minikind = child[1][0][0]

1300

fingerprint = child[1][0][4]

1301

executable = child[1][0][3]

1302

old_child_path = osutils.pathjoin(child[0][0],

1303

child[0][1])

1304

removals[child[0][2]] = old_child_path

1305

child_suffix = child_dirname[len(old_path):]

1306

new_child_dirname = (new_path + child_suffix)

1307

key = (new_child_dirname, child_basename, child[0][2])

1308

new_child_path = os.path.join(new_child_dirname,

1309

child_basename)

1310

insertions[child[0][2]] = (key, minikind, executable,

1311

fingerprint, new_child_path)

1312

self._apply_removals(removals.values())

1313

self._apply_insertions(insertions.values())

1314

1315

def _apply_removals(self, removals):

1316

for path in sorted(removals, reverse=True):

1317

dirname, basename = osutils.split(path)

1318

block_i, entry_i, d_present, f_present = \

1319

self._get_block_entry_index(dirname, basename, 0)

1320

entry = self._dirblocks[block_i][1][entry_i]

1321

self._make_absent(entry)

1322

# See if we have a malformed delta: deleting a directory must not

1323

# leave crud behind. This increases the number of bisects needed

1324

# substantially, but deletion or renames of large numbers of paths

1325

# is rare enough it shouldn't be an issue (famous last words?) RBC

1326

# 20080730.

1327

block_i, entry_i, d_present, f_present = \

1328

self._get_block_entry_index(path, '', 0)

1329

if d_present:

1330

# The dir block is still present in the dirstate; this could

1331

# be due to it being in a parent tree, or a corrupt delta.

1332

for child_entry in self._dirblocks[block_i][1]:

1333

if child_entry[1][0][0] not in ('r', 'a'):

1334

raise errors.InconsistentDelta(path, entry[0][2],

1335

"The file id was deleted but its children were "

1336

"not deleted.")

1337

1338

def _apply_insertions(self, adds):

1339

for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):

1340

self.update_minimal(key, minikind, executable, fingerprint,

1341

path_utf8=path_utf8)

1342

1343

def update_basis_by_delta(self, delta, new_revid):

1344

"""Update the parents of this tree after a commit.

1345

1346

This gives the tree one parent, with revision id new_revid. The

1347

inventory delta is applied to the current basis tree to generate the

1348

inventory for the parent new_revid, and all other parent trees are

1349

discarded.

1350

1351

Note that an exception during the operation of this method will leave

1352

the dirstate in a corrupt state where it should not be saved.

1353

1354

Finally, we expect all changes to be synchronising the basis tree with

1355

the working tree.

1356

1357

:param new_revid: The new revision id for the trees parent.

1358

:param delta: An inventory delta (see apply_inventory_delta) describing

1359

the changes from the current left most parent revision to new_revid.

1360

"""

1361

self._read_dirblocks_if_needed()

1362

self._discard_merge_parents()

1363

if self._ghosts != []:

1364

raise NotImplementedError(self.update_basis_by_delta)

1365

if len(self._parents) == 0:

1366

# setup a blank tree, the most simple way.

1367

empty_parent = DirState.NULL_PARENT_DETAILS

1368

for entry in self._iter_entries():

1369

entry[1].append(empty_parent)

1370

self._parents.append(new_revid)

1371

1372

self._parents[0] = new_revid

1373

1374

delta = sorted(delta, reverse=True)

1375

adds = []

1376

changes = []

1377

deletes = []

1378

# The paths this function accepts are unicode and must be encoded as we

1379

# go.

1380

encode = cache_utf8.encode

1381

inv_to_entry = self._inv_entry_to_details

1382

# delta is now (deletes, changes), (adds) in reverse lexographical

1383

# order.

1384

# deletes in reverse lexographic order are safe to process in situ.

1385

# renames are not, as a rename from any path could go to a path

1386

# lexographically lower, so we transform renames into delete, add pairs,

1387

# expanding them recursively as needed.

1388

# At the same time, to reduce interface friction we convert the input

1389

# inventory entries to dirstate.

1390

root_only = ('', '')

1391

for old_path, new_path, file_id, inv_entry in delta:

1392

if old_path is None:

1393

adds.append((None, encode(new_path), file_id,

1394

inv_to_entry(inv_entry), True))

1395

elif new_path is None:

1396

deletes.append((encode(old_path), None, file_id, None, True))

1397

elif (old_path, new_path) != root_only:

1398

# Renames:

1399

# Because renames must preserve their children we must have

1400

# processed all relocations and removes before hand. The sort

1401

# order ensures we've examined the child paths, but we also

1402

# have to execute the removals, or the split to an add/delete

1403

# pair will result in the deleted item being reinserted, or

1404

# renamed items being reinserted twice - and possibly at the

1405

# wrong place. Splitting into a delete/add pair also simplifies

1406

# the handling of entries with ('f', ...), ('r' ...) because

1407

# the target of the 'r' is old_path here, and we add that to

1408

# deletes, meaning that the add handler does not need to check

1409

# for 'r' items on every pass.

1410

self._update_basis_apply_deletes(deletes)

1411

deletes = []

1412

new_path_utf8 = encode(new_path)

1413

# Split into an add/delete pair recursively.

1414

adds.append((None, new_path_utf8, file_id,

1415

inv_to_entry(inv_entry), False))

1416

# Expunge deletes that we've seen so that deleted/renamed

1417

# children of a rename directory are handled correctly.

1418

new_deletes = reversed(list(self._iter_child_entries(1,

1419

encode(old_path))))

1420

# Remove the current contents of the tree at orig_path, and

1421

# reinsert at the correct new path.

1422

for entry in new_deletes:

1423

if entry[0][0]:

1424

source_path = entry[0][0] + '/' + entry[0][1]

1425

else:

1426

source_path = entry[0][1]

1427

if new_path_utf8:

1428

target_path = new_path_utf8 + source_path[len(old_path):]

1429

else:

1430

if old_path == '':

1431

raise AssertionError("cannot rename directory to"

1432

" itself")

1433

target_path = source_path[len(old_path) + 1:]

1434

adds.append((None, target_path, entry[0][2], entry[1][1], False))

1435

deletes.append(

1436

(source_path, target_path, entry[0][2], None, False))

1437

deletes.append(

1438

(encode(old_path), new_path, file_id, None, False))

1439

else:

1440

# changes to just the root should not require remove/insertion

1441

# of everything.

1442

changes.append((encode(old_path), encode(new_path), file_id,

1443

inv_to_entry(inv_entry)))

1444

1445

# Finish expunging deletes/first half of renames.

1446

self._update_basis_apply_deletes(deletes)

1447

# Reinstate second half of renames and new paths.

1448

self._update_basis_apply_adds(adds)

1449

# Apply in-situ changes.

1450

self._update_basis_apply_changes(changes)

1451

1452

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1453

self._header_state = DirState.IN_MEMORY_MODIFIED

1454

self._id_index = None

1455

return

1456

1457

def _update_basis_apply_adds(self, adds):

1458

"""Apply a sequence of adds to tree 1 during update_basis_by_delta.

1459

1460

They may be adds, or renames that have been split into add/delete

1461

pairs.

1462

1463

:param adds: A sequence of adds. Each add is a tuple:

1464

(None, new_path_utf8, file_id, (entry_details), real_add). real_add

1465

is False when the add is the second half of a remove-and-reinsert

1466

pair created to handle renames and deletes.

1467

"""

1468

# Adds are accumulated partly from renames, so can be in any input

1469

# order - sort it.

1470

adds.sort()

1471

# adds is now in lexographic order, which places all parents before

1472

# their children, so we can process it linearly.

1473

absent = 'ar'

1474

for old_path, new_path, file_id, new_details, real_add in adds:

1475

# the entry for this file_id must be in tree 0.

1476

entry = self._get_entry(0, file_id, new_path)

1477

if entry[0] is None or entry[0][2] != file_id:

1478

self._changes_aborted = True

1479

raise errors.InconsistentDelta(new_path, file_id,

1480

'working tree does not contain new entry')

1481

if real_add and entry[1][1][0] not in absent:

1482

self._changes_aborted = True

1483

raise errors.InconsistentDelta(new_path, file_id,

1484

'The entry was considered to be a genuinely new record,'

1485

' but there was already an old record for it.')

1486

# We don't need to update the target of an 'r' because the handling

1487

# of renames turns all 'r' situations into a delete at the original

1488

# location.

1489

entry[1][1] = new_details

1490

1491

def _update_basis_apply_changes(self, changes):

1492

"""Apply a sequence of changes to tree 1 during update_basis_by_delta.

1493

1494

:param adds: A sequence of changes. Each change is a tuple:

1495

(path_utf8, path_utf8, file_id, (entry_details))

1496

"""

1497

absent = 'ar'

1498

for old_path, new_path, file_id, new_details in changes:

1499

# the entry for this file_id must be in tree 0.

1500

entry = self._get_entry(0, file_id, new_path)

1501

if entry[0] is None or entry[0][2] != file_id:

1502

self._changes_aborted = True

1503

raise errors.InconsistentDelta(new_path, file_id,

1504

'working tree does not contain new entry')

1505

if (entry[1][0][0] in absent or

1506

entry[1][1][0] in absent):

1507

self._changes_aborted = True

1508

raise errors.InconsistentDelta(new_path, file_id,

1509

'changed considered absent')

1510

entry[1][1] = new_details

1511

1512

def _update_basis_apply_deletes(self, deletes):

1513

"""Apply a sequence of deletes to tree 1 during update_basis_by_delta.

1514

1515

They may be deletes, or renames that have been split into add/delete

1516

pairs.

1517

1518

:param deletes: A sequence of deletes. Each delete is a tuple:

1519

(old_path_utf8, new_path_utf8, file_id, None, real_delete).

1520

real_delete is True when the desired outcome is an actual deletion

1521

rather than the rename handling logic temporarily deleting a path

1522

during the replacement of a parent.

1523

"""

1524

null = DirState.NULL_PARENT_DETAILS

1525

for old_path, new_path, file_id, _, real_delete in deletes:

1526

if real_delete != (new_path is None):

1527

raise AssertionError("bad delete delta")

1528

# the entry for this file_id must be in tree 1.

1529

dirname, basename = osutils.split(old_path)

1530

block_index, entry_index, dir_present, file_present = \

1531

self._get_block_entry_index(dirname, basename, 1)

1532

if not file_present:

1533

self._changes_aborted = True

1534

raise errors.InconsistentDelta(old_path, file_id,

1535

'basis tree does not contain removed entry')

1536

entry = self._dirblocks[block_index][1][entry_index]

1537

if entry[0][2] != file_id:

1538

self._changes_aborted = True

1539

raise errors.InconsistentDelta(old_path, file_id,

1540

'mismatched file_id in tree 1')

1541

if real_delete:

1542

if entry[1][0][0] != 'a':

1543

self._changes_aborted = True

1544

raise errors.InconsistentDelta(old_path, file_id,

1545

'This was marked as a real delete, but the WT state'

1546

' claims that it still exists and is versioned.')

1547

del self._dirblocks[block_index][1][entry_index]

1548

else:

1549

if entry[1][0][0] == 'a':

1550

self._changes_aborted = True

1551

raise errors.InconsistentDelta(old_path, file_id,

1552

'The entry was considered a rename, but the source path'

1553

' is marked as absent.')

1554

# For whatever reason, we were asked to rename an entry

1555

# that was originally marked as deleted. This could be

1556

# because we are renaming the parent directory, and the WT

1557

# current state has the file marked as deleted.

1558

elif entry[1][0][0] == 'r':

1559

# implement the rename

1560

del self._dirblocks[block_index][1][entry_index]

1561

else:

1562

# it is being resurrected here, so blank it out temporarily.

1563

self._dirblocks[block_index][1][entry_index][1][1] = null

1564

1565

def _observed_sha1(self, entry, sha1, stat_value,

1566

_stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):

1567

"""Note the sha1 of a file.

1568

1569

:param entry: The entry the sha1 is for.

1570

:param sha1: The observed sha1.

1571

:param stat_value: The os.lstat for the file.

1572

"""

1573

try:

1574

minikind = _stat_to_minikind[stat_value.st_mode & 0170000]

1575

except KeyError:

1576

# Unhandled kind

1577

return None

1578

packed_stat = _pack_stat(stat_value)

1579

if minikind == 'f':

1580

if self._cutoff_time is None:

1581

self._sha_cutoff_time()

1582

if (stat_value.st_mtime < self._cutoff_time

1583

and stat_value.st_ctime < self._cutoff_time):

1584

entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],

1585

packed_stat)

1586

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1587

1588

def _sha_cutoff_time(self):

1589

"""Return cutoff time.

1590

1591

Files modified more recently than this time are at risk of being

1592

undetectably modified and so can't be cached.

1593

"""

1594

# Cache the cutoff time as long as we hold a lock.

1595

# time.time() isn't super expensive (approx 3.38us), but

1596

# when you call it 50,000 times it adds up.

1597

# For comparison, os.lstat() costs 7.2us if it is hot.

1598

self._cutoff_time = int(time.time()) - 3

1599

return self._cutoff_time

1600

1601

def _lstat(self, abspath, entry):

1602

"""Return the os.lstat value for this path."""

1603

return os.lstat(abspath)

1604

1605

def _sha1_file_and_mutter(self, abspath):

1606

# when -Dhashcache is turned on, this is monkey-patched in to log

1607

# file reads

1608

trace.mutter("dirstate sha1 " + abspath)

1609

return self._sha1_provider.sha1(abspath)

1610

1611

def _is_executable(self, mode, old_executable):

1612

"""Is this file executable?"""

1613

return bool(S_IEXEC & mode)

1614

1615

def _is_executable_win32(self, mode, old_executable):

1616

"""On win32 the executable bit is stored in the dirstate."""

1617

return old_executable

1618

1619

if sys.platform == 'win32':

1620

_is_executable = _is_executable_win32

1621

1622

def _read_link(self, abspath, old_link):

1623

"""Read the target of a symlink"""

1624

# TODO: jam 200700301 On Win32, this could just return the value

1625

# already in memory. However, this really needs to be done at a

1626

# higher level, because there either won't be anything on disk,

1627

# or the thing on disk will be a file.

1628

fs_encoding = osutils._fs_enc

1629

if isinstance(abspath, unicode):

1630

# abspath is defined as the path to pass to lstat. readlink is

1631

# buggy in python < 2.6 (it doesn't encode unicode path into FS

1632

# encoding), so we need to encode ourselves knowing that unicode

1633

# paths are produced by UnicodeDirReader on purpose.

1634

abspath = abspath.encode(fs_encoding)

1635

target = os.readlink(abspath)

1636

if fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):

1637

# Change encoding if needed

1638

target = target.decode(fs_encoding).encode('UTF-8')

1639

return target

1640

1641

def get_ghosts(self):

1642

"""Return a list of the parent tree revision ids that are ghosts."""

1643

self._read_header_if_needed()

1644

return self._ghosts

1645

1646

def get_lines(self):

1647

"""Serialise the entire dirstate to a sequence of lines."""

1648

if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and

1649

self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):

1650

# read whats on disk.

1651

self._state_file.seek(0)

1652

return self._state_file.readlines()

1653

lines = []

1654

lines.append(self._get_parents_line(self.get_parent_ids()))

1655

lines.append(self._get_ghosts_line(self._ghosts))

1656

# append the root line which is special cased

1657

lines.extend(map(self._entry_to_line, self._iter_entries()))

1658

return self._get_output_lines(lines)

1659

1660

def _get_ghosts_line(self, ghost_ids):

1661

"""Create a line for the state file for ghost information."""

1662

return '\0'.join([str(len(ghost_ids))] + ghost_ids)

1663

1664

def _get_parents_line(self, parent_ids):

1665

"""Create a line for the state file for parents information."""

1666

return '\0'.join([str(len(parent_ids))] + parent_ids)

1667

1668

def _get_fields_to_entry(self):

1669

"""Get a function which converts entry fields into a entry record.

1670

1671

This handles size and executable, as well as parent records.

1672

1673

:return: A function which takes a list of fields, and returns an

1674

appropriate record for storing in memory.

1675

"""

1676

# This is intentionally unrolled for performance

1677

num_present_parents = self._num_present_parents()

1678

if num_present_parents == 0:

1679

def fields_to_entry_0_parents(fields, _int=int):

1680

path_name_file_id_key = (fields[0], fields[1], fields[2])

1681

return (path_name_file_id_key, [

1682

( # Current tree

1683

fields[3], # minikind

1684

fields[4], # fingerprint

1685

_int(fields[5]), # size

1686

fields[6] == 'y', # executable

1687

fields[7], # packed_stat or revision_id

1688

)])

1689

return fields_to_entry_0_parents

1690

elif num_present_parents == 1:

1691

def fields_to_entry_1_parent(fields, _int=int):

1692

path_name_file_id_key = (fields[0], fields[1], fields[2])

1693

return (path_name_file_id_key, [

1694

( # Current tree

1695

fields[3], # minikind

1696

fields[4], # fingerprint

1697

_int(fields[5]), # size

1698

fields[6] == 'y', # executable

1699

fields[7], # packed_stat or revision_id

1700

1701

( # Parent 1

1702

fields[8], # minikind

1703

fields[9], # fingerprint

1704

_int(fields[10]), # size

1705

fields[11] == 'y', # executable

1706

fields[12], # packed_stat or revision_id

1707

1708

])

1709

return fields_to_entry_1_parent

1710

elif num_present_parents == 2:

1711

def fields_to_entry_2_parents(fields, _int=int):

1712

path_name_file_id_key = (fields[0], fields[1], fields[2])

1713

return (path_name_file_id_key, [

1714

( # Current tree

1715

fields[3], # minikind

1716

fields[4], # fingerprint

1717

_int(fields[5]), # size

1718

fields[6] == 'y', # executable

1719

fields[7], # packed_stat or revision_id

1720

1721

( # Parent 1

1722

fields[8], # minikind

1723

fields[9], # fingerprint

1724

_int(fields[10]), # size

1725

fields[11] == 'y', # executable

1726

fields[12], # packed_stat or revision_id

1727

1728

( # Parent 2

1729

fields[13], # minikind

1730

fields[14], # fingerprint

1731

_int(fields[15]), # size

1732

fields[16] == 'y', # executable

1733

fields[17], # packed_stat or revision_id

1734

1735

])

1736

return fields_to_entry_2_parents

1737

else:

1738

def fields_to_entry_n_parents(fields, _int=int):

1739

path_name_file_id_key = (fields[0], fields[1], fields[2])

1740

trees = [(fields[cur], # minikind

1741

fields[cur+1], # fingerprint

1742

_int(fields[cur+2]), # size

1743

fields[cur+3] == 'y', # executable

1744

fields[cur+4], # stat or revision_id

1745

) for cur in xrange(3, len(fields)-1, 5)]

1746

return path_name_file_id_key, trees

1747

return fields_to_entry_n_parents

1748

1749

def get_parent_ids(self):

1750

"""Return a list of the parent tree ids for the directory state."""

1751

self._read_header_if_needed()

1752

return list(self._parents)

1753

1754

def _get_block_entry_index(self, dirname, basename, tree_index):

1755

"""Get the coordinates for a path in the state structure.

1756

1757

:param dirname: The utf8 dirname to lookup.

1758

:param basename: The utf8 basename to lookup.

1759

:param tree_index: The index of the tree for which this lookup should

1760

be attempted.

1761

:return: A tuple describing where the path is located, or should be

1762

inserted. The tuple contains four fields: the block index, the row

1763

index, the directory is present (boolean), the entire path is

1764

present (boolean). There is no guarantee that either

1765

coordinate is currently reachable unless the found field for it is

1766

True. For instance, a directory not present in the searched tree

1767

may be returned with a value one greater than the current highest

1768

block offset. The directory present field will always be True when

1769

the path present field is True. The directory present field does

1770

NOT indicate that the directory is present in the searched tree,

1771

rather it indicates that there are at least some files in some

1772

tree present there.

1773

"""

1774

self._read_dirblocks_if_needed()

1775

key = dirname, basename, ''

1776

block_index, present = self._find_block_index_from_key(key)

1777

if not present:

1778

# no such directory - return the dir index and 0 for the row.

1779

return block_index, 0, False, False

1780

block = self._dirblocks[block_index][1] # access the entries only

1781

entry_index, present = self._find_entry_index(key, block)

1782

# linear search through entries at this path to find the one

1783

# requested.

1784

while entry_index < len(block) and block[entry_index][0][1] == basename:

1785

if block[entry_index][1][tree_index][0] not in 'ar':

1786

# neither absent or relocated

1787

return block_index, entry_index, True, True

1788

entry_index += 1

1789

return block_index, entry_index, True, False

1790

1791

def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None, include_deleted=False):

1792

"""Get the dirstate entry for path in tree tree_index.

1793

1794

If either file_id or path is supplied, it is used as the key to lookup.

1795

If both are supplied, the fastest lookup is used, and an error is

1796

raised if they do not both point at the same row.

1797

1798

:param tree_index: The index of the tree we wish to locate this path

1799

in. If the path is present in that tree, the entry containing its

1800

details is returned, otherwise (None, None) is returned

1801

0 is the working tree, higher indexes are successive parent

1802

trees.

1803

:param fileid_utf8: A utf8 file_id to look up.

1804

:param path_utf8: An utf8 path to be looked up.

1805

:param include_deleted: If True, and performing a lookup via

1806

fileid_utf8 rather than path_utf8, return an entry for deleted

1807

(absent) paths.

1808

:return: The dirstate entry tuple for path, or (None, None)

1809

"""

1810

self._read_dirblocks_if_needed()

1811

if path_utf8 is not None:

1812

if type(path_utf8) is not str:

1813

raise AssertionError('path_utf8 is not a str: %s %s'

1814

% (type(path_utf8), path_utf8))

1815

# path lookups are faster

1816

dirname, basename = osutils.split(path_utf8)

1817

block_index, entry_index, dir_present, file_present = \

1818

self._get_block_entry_index(dirname, basename, tree_index)

1819

if not file_present:

1820

return None, None

1821

entry = self._dirblocks[block_index][1][entry_index]

1822

if not (entry[0][2] and entry[1][tree_index][0] not in ('a', 'r')):

1823

raise AssertionError('unversioned entry?')

1824

if fileid_utf8:

1825

if entry[0][2] != fileid_utf8:

1826

self._changes_aborted = True

1827

raise errors.BzrError('integrity error ? : mismatching'

1828

' tree_index, file_id and path')

1829

return entry

1830

else:

1831

possible_keys = self._get_id_index().get(fileid_utf8, None)

1832

if not possible_keys:

1833

return None, None

1834

for key in possible_keys:

1835

block_index, present = \

1836

self._find_block_index_from_key(key)

1837

# strange, probably indicates an out of date

1838

# id index - for now, allow this.

1839

if not present:

1840

continue

1841

# WARNING: DO not change this code to use _get_block_entry_index

1842

# as that function is not suitable: it does not use the key

1843

# to lookup, and thus the wrong coordinates are returned.

1844

block = self._dirblocks[block_index][1]

1845

entry_index, present = self._find_entry_index(key, block)

1846

if present:

1847

entry = self._dirblocks[block_index][1][entry_index]

1848

if entry[1][tree_index][0] in 'fdlt':

1849

# this is the result we are looking for: the

1850

# real home of this file_id in this tree.

1851

return entry

1852

if entry[1][tree_index][0] == 'a':

1853

# there is no home for this entry in this tree

1854

if include_deleted:

1855

return entry

1856

return None, None

1857

if entry[1][tree_index][0] != 'r':

1858

raise AssertionError(

1859

"entry %r has invalid minikind %r for tree %r" \

1860

% (entry,

1861

entry[1][tree_index][0],

1862

tree_index))

1863

real_path = entry[1][tree_index][1]

1864

return self._get_entry(tree_index, fileid_utf8=fileid_utf8,

1865

path_utf8=real_path)

1866

return None, None

1867

1868

@classmethod

1869

def initialize(cls, path, sha1_provider=None):

1870

"""Create a new dirstate on path.

1871

1872

The new dirstate will be an empty tree - that is it has no parents,

1873

and only a root node - which has id ROOT_ID.

1874

1875

:param path: The name of the file for the dirstate.

1876

:param sha1_provider: an object meeting the Sha1Provider interface.

1877

If None, a DefaultSha1Provider is used.

1878

:return: A write-locked DirState object.

1879

"""

1880

# This constructs a new DirState object on a path, sets the _state_file

1881

# to a new empty file for that path. It then calls _set_data() with our

1882

# stock empty dirstate information - a root with ROOT_ID, no children,

1883

# and no parents. Finally it calls save() to ensure that this data will

1884

# persist.

1885

if sha1_provider is None:

1886

sha1_provider = DefaultSha1Provider()

1887

result = cls(path, sha1_provider)

1888

# root dir and root dir contents with no children.

1889

empty_tree_dirblocks = [('', []), ('', [])]

1890

# a new root directory, with a NULLSTAT.

1891

empty_tree_dirblocks[0][1].append(

1892

(('', '', inventory.ROOT_ID), [

1893

('d', '', 0, False, DirState.NULLSTAT),

1894

]))

1895

result.lock_write()

1896

try:

1897

result._set_data([], empty_tree_dirblocks)

1898

result.save()

1899

except:

1900

result.unlock()

1901

raise

1902

return result

1903

1904

@staticmethod

1905

def _inv_entry_to_details(inv_entry):

1906

"""Convert an inventory entry (from a revision tree) to state details.

1907

1908

:param inv_entry: An inventory entry whose sha1 and link targets can be

1909

relied upon, and which has a revision set.

1910

:return: A details tuple - the details for a single tree at a path +

1911

id.

1912

"""

1913

kind = inv_entry.kind

1914

minikind = DirState._kind_to_minikind[kind]

1915

tree_data = inv_entry.revision

1916

if kind == 'directory':

1917

fingerprint = ''

1918

size = 0

1919

executable = False

1920

elif kind == 'symlink':

1921

if inv_entry.symlink_target is None:

1922

fingerprint = ''

1923

else:

1924

fingerprint = inv_entry.symlink_target.encode('utf8')

1925

size = 0

1926

executable = False

1927

elif kind == 'file':

1928

fingerprint = inv_entry.text_sha1 or ''

1929

size = inv_entry.text_size or 0

1930

executable = inv_entry.executable

1931

elif kind == 'tree-reference':

1932

fingerprint = inv_entry.reference_revision or ''

1933

size = 0

1934

executable = False

1935

else:

1936

raise Exception("can't pack %s" % inv_entry)

1937

return (minikind, fingerprint, size, executable, tree_data)

1938

1939

def _iter_child_entries(self, tree_index, path_utf8):

1940

"""Iterate over all the entries that are children of path_utf.

1941

1942

This only returns entries that are present (not in 'a', 'r') in

1943

tree_index. tree_index data is not refreshed, so if tree 0 is used,

1944

results may differ from that obtained if paths were statted to

1945

determine what ones were directories.

1946

1947

Asking for the children of a non-directory will return an empty

1948

iterator.

1949

"""

1950

pending_dirs = []

1951

next_pending_dirs = [path_utf8]

1952

absent = 'ar'

1953

while next_pending_dirs:

1954

pending_dirs = next_pending_dirs

1955

next_pending_dirs = []

1956

for path in pending_dirs:

1957

block_index, present = self._find_block_index_from_key(

1958

(path, '', ''))

1959

if block_index == 0:

1960

block_index = 1

1961

if len(self._dirblocks) == 1:

1962

# asked for the children of the root with no other

1963

# contents.

1964

return

1965

if not present:

1966

# children of a non-directory asked for.

1967

continue

1968

block = self._dirblocks[block_index]

1969

for entry in block[1]:

1970

kind = entry[1][tree_index][0]

1971

if kind not in absent:

1972

yield entry

1973

if kind == 'd':

1974

if entry[0][0]:

1975

path = entry[0][0] + '/' + entry[0][1]

1976

else:

1977

path = entry[0][1]

1978

next_pending_dirs.append(path)

1979

1980

def _iter_entries(self):

1981

"""Iterate over all the entries in the dirstate.

1982

1983

Each yelt item is an entry in the standard format described in the

1984

docstring of bzrlib.dirstate.

1985

"""

1986

self._read_dirblocks_if_needed()

1987

for directory in self._dirblocks:

1988

for entry in directory[1]:

1989

yield entry

1990

1991

def _get_id_index(self):

1992

"""Get an id index of self._dirblocks."""

1993

if self._id_index is None:

1994

id_index = {}

1995

for key, tree_details in self._iter_entries():

1996

id_index.setdefault(key[2], set()).add(key)

1997

self._id_index = id_index

1998

return self._id_index

1999

2000

def _get_output_lines(self, lines):

2001

"""Format lines for final output.

2002

2003

:param lines: A sequence of lines containing the parents list and the

2004

path lines.

2005

"""

2006

output_lines = [DirState.HEADER_FORMAT_3]

2007

lines.append('') # a final newline

2008

inventory_text = '\0\n\0'.join(lines)

2009

output_lines.append('crc32: %s\n' % (zlib.crc32(inventory_text),))

2010

# -3, 1 for num parents, 1 for ghosts, 1 for final newline

2011

num_entries = len(lines)-3

2012

output_lines.append('num_entries: %s\n' % (num_entries,))

2013

output_lines.append(inventory_text)

2014

return output_lines

2015

2016

def _make_deleted_row(self, fileid_utf8, parents):

2017

"""Return a deleted row for fileid_utf8."""

2018

return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,

2019

''), parents

2020

2021

def _num_present_parents(self):

2022

"""The number of parent entries in each record row."""

2023

return len(self._parents) - len(self._ghosts)

2024

2025

@staticmethod

2026

def on_file(path, sha1_provider=None):

2027

"""Construct a DirState on the file at path path.

2028

2029

:param path: The path at which the dirstate file on disk should live.

2030

:param sha1_provider: an object meeting the Sha1Provider interface.

2031

If None, a DefaultSha1Provider is used.

2032

:return: An unlocked DirState object, associated with the given path.

2033

"""

2034

if sha1_provider is None:

2035

sha1_provider = DefaultSha1Provider()

2036

result = DirState(path, sha1_provider)

2037

return result

2038

2039

def _read_dirblocks_if_needed(self):

2040

"""Read in all the dirblocks from the file if they are not in memory.

2041

2042

This populates self._dirblocks, and sets self._dirblock_state to

2043

IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block

2044

2045

"""

2046

self._read_header_if_needed()

2047

if self._dirblock_state == DirState.NOT_IN_MEMORY:

2048

_read_dirblocks(self)

2049

2050

def _read_header(self):

2051

"""This reads in the metadata header, and the parent ids.

2052

2053

After reading in, the file should be positioned at the null

2054

just before the start of the first record in the file.

2055

2056

:return: (expected crc checksum, number of entries, parent list)

2057

"""

2058

self._read_prelude()

2059

parent_line = self._state_file.readline()

2060

info = parent_line.split('\0')

2061

num_parents = int(info[0])

2062

self._parents = info[1:-1]

2063

ghost_line = self._state_file.readline()

2064

info = ghost_line.split('\0')

2065

num_ghosts = int(info[1])

2066

self._ghosts = info[2:-1]

2067

self._header_state = DirState.IN_MEMORY_UNMODIFIED

2068

self._end_of_header = self._state_file.tell()

2069

2070

def _read_header_if_needed(self):

2071

"""Read the header of the dirstate file if needed."""

2072

# inline this as it will be called a lot

2073

if not self._lock_token:

2074

raise errors.ObjectNotLocked(self)

2075

if self._header_state == DirState.NOT_IN_MEMORY:

2076

self._read_header()

2077

2078

def _read_prelude(self):

2079

"""Read in the prelude header of the dirstate file.

2080

2081

This only reads in the stuff that is not connected to the crc

2082

checksum. The position will be correct to read in the rest of

2083

the file and check the checksum after this point.

2084

The next entry in the file should be the number of parents,

2085

and their ids. Followed by a newline.

2086

"""

2087

header = self._state_file.readline()

2088

if header != DirState.HEADER_FORMAT_3:

2089

raise errors.BzrError(

2090

'invalid header line: %r' % (header,))

2091

crc_line = self._state_file.readline()

2092

if not crc_line.startswith('crc32: '):

2093

raise errors.BzrError('missing crc32 checksum: %r' % crc_line)

2094

self.crc_expected = int(crc_line[len('crc32: '):-1])

2095

num_entries_line = self._state_file.readline()

2096

if not num_entries_line.startswith('num_entries: '):

2097

raise errors.BzrError('missing num_entries line')

2098

self._num_entries = int(num_entries_line[len('num_entries: '):-1])

2099

2100

def sha1_from_stat(self, path, stat_result, _pack_stat=pack_stat):

2101

"""Find a sha1 given a stat lookup."""

2102

return self._get_packed_stat_index().get(_pack_stat(stat_result), None)

2103

2104

def _get_packed_stat_index(self):

2105

"""Get a packed_stat index of self._dirblocks."""

2106

if self._packed_stat_index is None:

2107

index = {}

2108

for key, tree_details in self._iter_entries():

2109

if tree_details[0][0] == 'f':

2110

index[tree_details[0][4]] = tree_details[0][1]

2111

self._packed_stat_index = index

2112

return self._packed_stat_index

2113

2114

def save(self):

2115

"""Save any pending changes created during this session.

2116

2117

We reuse the existing file, because that prevents race conditions with

2118

file creation, and use oslocks on it to prevent concurrent modification

2119

and reads - because dirstate's incremental data aggregation is not

2120

compatible with reading a modified file, and replacing a file in use by

2121

another process is impossible on Windows.

2122

2123

A dirstate in read only mode should be smart enough though to validate

2124

that the file has not changed, and otherwise discard its cache and

2125

start over, to allow for fine grained read lock duration, so 'status'

2126

wont block 'commit' - for example.

2127

"""

2128

if self._changes_aborted:

2129

# Should this be a warning? For now, I'm expecting that places that

2130

# mark it inconsistent will warn, making a warning here redundant.

2131

trace.mutter('Not saving DirState because '

2132

'_changes_aborted is set.')

2133

return

2134

if (self._header_state == DirState.IN_MEMORY_MODIFIED or

2135

self._dirblock_state == DirState.IN_MEMORY_MODIFIED):

2136

2137

grabbed_write_lock = False

2138

if self._lock_state != 'w':

2139

grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()

2140

# Switch over to the new lock, as the old one may be closed.

2141

# TODO: jam 20070315 We should validate the disk file has

2142

# not changed contents. Since temporary_write_lock may

2143

# not be an atomic operation.

2144

self._lock_token = new_lock

2145

self._state_file = new_lock.f

2146

if not grabbed_write_lock:

2147

# We couldn't grab a write lock, so we switch back to a read one

2148

return

2149

try:

2150

self._state_file.seek(0)

2151

self._state_file.writelines(self.get_lines())

2152

self._state_file.truncate()

2153

self._state_file.flush()

2154

self._header_state = DirState.IN_MEMORY_UNMODIFIED

2155

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

2156

finally:

2157

if grabbed_write_lock:

2158

self._lock_token = self._lock_token.restore_read_lock()

2159

self._state_file = self._lock_token.f

2160

# TODO: jam 20070315 We should validate the disk file has

2161

# not changed contents. Since restore_read_lock may

2162

# not be an atomic operation.

2163

2164

def _set_data(self, parent_ids, dirblocks):

2165

"""Set the full dirstate data in memory.

2166

2167

This is an internal function used to completely replace the objects

2168

in memory state. It puts the dirstate into state 'full-dirty'.

2169

2170

:param parent_ids: A list of parent tree revision ids.

2171

:param dirblocks: A list containing one tuple for each directory in the

2172

tree. Each tuple contains the directory path and a list of entries

2173

found in that directory.

2174

"""

2175

# our memory copy is now authoritative.

2176

self._dirblocks = dirblocks

2177

self._header_state = DirState.IN_MEMORY_MODIFIED

2178

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2179

self._parents = list(parent_ids)

2180

self._id_index = None

2181

self._packed_stat_index = None

2182

2183

def set_path_id(self, path, new_id):

2184

"""Change the id of path to new_id in the current working tree.

2185

2186

:param path: The path inside the tree to set - '' is the root, 'foo'

2187

is the path foo in the root.

2188

:param new_id: The new id to assign to the path. This must be a utf8

2189

file id (not unicode, and not None).

2190

"""

2191

self._read_dirblocks_if_needed()

2192

if len(path):

2193

# TODO: logic not written

2194

raise NotImplementedError(self.set_path_id)

2195

# TODO: check new id is unique

2196

entry = self._get_entry(0, path_utf8=path)

2197

if entry[0][2] == new_id:

2198

# Nothing to change.

2199

return

2200

# mark the old path absent, and insert a new root path

2201

self._make_absent(entry)

2202

self.update_minimal(('', '', new_id), 'd',

2203

path_utf8='', packed_stat=entry[1][0][4])

2204

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2205

if self._id_index is not None:

2206

self._id_index.setdefault(new_id, set()).add(entry[0])

2207

2208

def set_parent_trees(self, trees, ghosts):

2209

"""Set the parent trees for the dirstate.

2210

2211

:param trees: A list of revision_id, tree tuples. tree must be provided

2212

even if the revision_id refers to a ghost: supply an empty tree in

2213

this case.

2214

:param ghosts: A list of the revision_ids that are ghosts at the time

2215

of setting.

2216

"""

2217

# TODO: generate a list of parent indexes to preserve to save

2218

# processing specific parent trees. In the common case one tree will

2219

# be preserved - the left most parent.

2220

# TODO: if the parent tree is a dirstate, we might want to walk them

2221

# all by path in parallel for 'optimal' common-case performance.

2222

# generate new root row.

2223

self._read_dirblocks_if_needed()

2224

# TODO future sketch: Examine the existing parents to generate a change

2225

# map and then walk the new parent trees only, mapping them into the

2226

# dirstate. Walk the dirstate at the same time to remove unreferenced

2227

# entries.

2228

# for now:

2229

# sketch: loop over all entries in the dirstate, cherry picking

2230

# entries from the parent trees, if they are not ghost trees.

2231

# after we finish walking the dirstate, all entries not in the dirstate

2232

# are deletes, so we want to append them to the end as per the design

2233

# discussions. So do a set difference on ids with the parents to

2234

# get deletes, and add them to the end.

2235

# During the update process we need to answer the following questions:

2236

# - find other keys containing a fileid in order to create cross-path

2237

# links. We dont't trivially use the inventory from other trees

2238

# because this leads to either double touching, or to accessing

2239

# missing keys,

2240

# - find other keys containing a path

2241

# We accumulate each entry via this dictionary, including the root

2242

by_path = {}

2243

id_index = {}

2244

# we could do parallel iterators, but because file id data may be

2245

# scattered throughout, we dont save on index overhead: we have to look

2246

# at everything anyway. We can probably save cycles by reusing parent

2247

# data and doing an incremental update when adding an additional

2248

# parent, but for now the common cases are adding a new parent (merge),

2249

# and replacing completely (commit), and commit is more common: so

2250

# optimise merge later.

2251

2252

# ---- start generation of full tree mapping data

2253

# what trees should we use?

2254

parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]

2255

# how many trees do we end up with

2256

parent_count = len(parent_trees)

2257

2258

# one: the current tree

2259

for entry in self._iter_entries():

2260

# skip entries not in the current tree

2261

if entry[1][0][0] in 'ar': # absent, relocated

2262

continue

2263

by_path[entry[0]] = [entry[1][0]] + \

2264

[DirState.NULL_PARENT_DETAILS] * parent_count

2265

id_index[entry[0][2]] = set([entry[0]])

2266

2267

# now the parent trees:

2268

for tree_index, tree in enumerate(parent_trees):

2269

# the index is off by one, adjust it.

2270

tree_index = tree_index + 1

2271

# when we add new locations for a fileid we need these ranges for

2272

# any fileid in this tree as we set the by_path[id] to:

2273

# already_processed_tree_details + new_details + new_location_suffix

2274

# the suffix is from tree_index+1:parent_count+1.

2275

new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)

2276

# now stitch in all the entries from this tree

2277

for path, entry in tree.inventory.iter_entries_by_dir():

2278

# here we process each trees details for each item in the tree.

2279

# we first update any existing entries for the id at other paths,

2280

# then we either create or update the entry for the id at the

2281

# right path, and finally we add (if needed) a mapping from

2282

# file_id to this path. We do it in this order to allow us to

2283

# avoid checking all known paths for the id when generating a

2284

# new entry at this path: by adding the id->path mapping last,

2285

# all the mappings are valid and have correct relocation

2286

# records where needed.

2287

file_id = entry.file_id

2288

path_utf8 = path.encode('utf8')

2289

dirname, basename = osutils.split(path_utf8)

2290

new_entry_key = (dirname, basename, file_id)

2291

# tree index consistency: All other paths for this id in this tree

2292

# index must point to the correct path.

2293

for entry_key in id_index.setdefault(file_id, set()):

2294

# TODO:PROFILING: It might be faster to just update

2295

# rather than checking if we need to, and then overwrite

2296

# the one we are located at.

2297

if entry_key != new_entry_key:

2298

# this file id is at a different path in one of the

2299

# other trees, so put absent pointers there

2300

# This is the vertical axis in the matrix, all pointing

2301

# to the real path.

2302

by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')

2303

# by path consistency: Insert into an existing path record (trivial), or

2304

# add a new one with relocation pointers for the other tree indexes.

2305

if new_entry_key in id_index[file_id]:

2306

# there is already an entry where this data belongs, just insert it.

2307

by_path[new_entry_key][tree_index] = \

2308

self._inv_entry_to_details(entry)

2309

else:

2310

# add relocated entries to the horizontal axis - this row

2311

# mapping from path,id. We need to look up the correct path

2312

# for the indexes from 0 to tree_index -1

2313

new_details = []

2314

for lookup_index in xrange(tree_index):

2315

# boundary case: this is the first occurence of file_id

2316

# so there are no id_indexs, possibly take this out of

2317

# the loop?

2318

if not len(id_index[file_id]):

2319

new_details.append(DirState.NULL_PARENT_DETAILS)

2320

else:

2321

# grab any one entry, use it to find the right path.

2322

# TODO: optimise this to reduce memory use in highly

2323

# fragmented situations by reusing the relocation

2324

# records.

2325

a_key = iter(id_index[file_id]).next()

2326

if by_path[a_key][lookup_index][0] in ('r', 'a'):

2327

# its a pointer or missing statement, use it as is.

2328

new_details.append(by_path[a_key][lookup_index])

2329

else:

2330

# we have the right key, make a pointer to it.

2331

real_path = ('/'.join(a_key[0:2])).strip('/')

2332

new_details.append(('r', real_path, 0, False, ''))

2333

new_details.append(self._inv_entry_to_details(entry))

2334

new_details.extend(new_location_suffix)

2335

by_path[new_entry_key] = new_details

2336

id_index[file_id].add(new_entry_key)

2337

# --- end generation of full tree mappings

2338

2339

# sort and output all the entries

2340

new_entries = self._sort_entries(by_path.items())

2341

self._entries_to_current_state(new_entries)

2342

self._parents = [rev_id for rev_id, tree in trees]

2343

self._ghosts = list(ghosts)

2344

self._header_state = DirState.IN_MEMORY_MODIFIED

2345

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2346

self._id_index = id_index

2347

2348

def _sort_entries(self, entry_list):

2349

"""Given a list of entries, sort them into the right order.

2350

2351

This is done when constructing a new dirstate from trees - normally we

2352

try to keep everything in sorted blocks all the time, but sometimes

2353

it's easier to sort after the fact.

2354

"""

2355

def _key(entry):

2356

# sort by: directory parts, file name, file id

2357

return entry[0][0].split('/'), entry[0][1], entry[0][2]

2358

return sorted(entry_list, key=_key)

2359

2360

def set_state_from_inventory(self, new_inv):

2361

"""Set new_inv as the current state.

2362

2363

This API is called by tree transform, and will usually occur with

2364

existing parent trees.

2365

2366

:param new_inv: The inventory object to set current state from.

2367

"""

2368

if 'evil' in debug.debug_flags:

2369

trace.mutter_callsite(1,

2370

"set_state_from_inventory called; please mutate the tree instead")

2371

self._read_dirblocks_if_needed()

2372

# sketch:

2373

# Two iterators: current data and new data, both in dirblock order.

2374

# We zip them together, which tells about entries that are new in the

2375

# inventory, or removed in the inventory, or present in both and

2376

# possibly changed.

2377

2378

# You might think we could just synthesize a new dirstate directly

2379

# since we're processing it in the right order. However, we need to

2380

# also consider there may be any number of parent trees and relocation

2381

# pointers, and we don't want to duplicate that here.

2382

new_iterator = new_inv.iter_entries_by_dir()

2383

# we will be modifying the dirstate, so we need a stable iterator. In

2384

# future we might write one, for now we just clone the state into a

2385

# list - which is a shallow copy.

2386

old_iterator = iter(list(self._iter_entries()))

2387

# both must have roots so this is safe:

2388

current_new = new_iterator.next()

2389

current_old = old_iterator.next()

2390

def advance(iterator):

2391

try:

2392

return iterator.next()

2393

except StopIteration:

2394

return None

2395

while current_new or current_old:

2396

# skip entries in old that are not really there

2397

if current_old and current_old[1][0][0] in 'ar':

2398

# relocated or absent

2399

current_old = advance(old_iterator)

2400

continue

2401

if current_new:

2402

# convert new into dirblock style

2403

new_path_utf8 = current_new[0].encode('utf8')

2404

new_dirname, new_basename = osutils.split(new_path_utf8)

2405

new_id = current_new[1].file_id

2406

new_entry_key = (new_dirname, new_basename, new_id)

2407

current_new_minikind = \

2408

DirState._kind_to_minikind[current_new[1].kind]

2409

if current_new_minikind == 't':

2410

fingerprint = current_new[1].reference_revision or ''

2411

else:

2412

# We normally only insert or remove records, or update

2413

# them when it has significantly changed. Then we want to

2414

# erase its fingerprint. Unaffected records should

2415

# normally not be updated at all.

2416

fingerprint = ''

2417

else:

2418

# for safety disable variables

2419

new_path_utf8 = new_dirname = new_basename = new_id = \

2420

new_entry_key = None

2421

# 5 cases, we dont have a value that is strictly greater than everything, so

2422

# we make both end conditions explicit

2423

if not current_old:

2424

# old is finished: insert current_new into the state.

2425

self.update_minimal(new_entry_key, current_new_minikind,

2426

executable=current_new[1].executable,

2427

path_utf8=new_path_utf8, fingerprint=fingerprint)

2428

current_new = advance(new_iterator)

2429

elif not current_new:

2430

# new is finished

2431

self._make_absent(current_old)

2432

current_old = advance(old_iterator)

2433

elif new_entry_key == current_old[0]:

2434

# same - common case

2435

# We're looking at the same path and id in both the dirstate

2436

# and inventory, so just need to update the fields in the

2437

# dirstate from the one in the inventory.

2438

# TODO: update the record if anything significant has changed.

2439

# the minimal required trigger is if the execute bit or cached

2440

# kind has changed.

2441

if (current_old[1][0][3] != current_new[1].executable or

2442

current_old[1][0][0] != current_new_minikind):

2443

self.update_minimal(current_old[0], current_new_minikind,

2444

executable=current_new[1].executable,

2445

path_utf8=new_path_utf8, fingerprint=fingerprint)

2446

# both sides are dealt with, move on

2447

current_old = advance(old_iterator)

2448

current_new = advance(new_iterator)

2449

elif (cmp_by_dirs(new_dirname, current_old[0][0]) < 0

2450

or (new_dirname == current_old[0][0]

2451

and new_entry_key[1:] < current_old[0][1:])):

2452

# new comes before:

2453

# add a entry for this and advance new

2454

self.update_minimal(new_entry_key, current_new_minikind,

2455

executable=current_new[1].executable,

2456

path_utf8=new_path_utf8, fingerprint=fingerprint)

2457

current_new = advance(new_iterator)

2458

else:

2459

# we've advanced past the place where the old key would be,

2460

# without seeing it in the new list. so it must be gone.

2461

self._make_absent(current_old)

2462

current_old = advance(old_iterator)

2463

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2464

self._id_index = None

2465

self._packed_stat_index = None

2466

2467

def _make_absent(self, current_old):

2468

"""Mark current_old - an entry - as absent for tree 0.

2469

2470

:return: True if this was the last details entry for the entry key:

2471

that is, if the underlying block has had the entry removed, thus

2472

shrinking in length.

2473

"""

2474

# build up paths that this id will be left at after the change is made,

2475

# so we can update their cross references in tree 0

2476

all_remaining_keys = set()

2477

# Dont check the working tree, because it's going.

2478

for details in current_old[1][1:]:

2479

if details[0] not in 'ar': # absent, relocated

2480

all_remaining_keys.add(current_old[0])

2481

elif details[0] == 'r': # relocated

2482

# record the key for the real path.

2483

all_remaining_keys.add(tuple(osutils.split(details[1])) + (current_old[0][2],))

2484

# absent rows are not present at any path.

2485

last_reference = current_old[0] not in all_remaining_keys

2486

if last_reference:

2487

# the current row consists entire of the current item (being marked

2488

# absent), and relocated or absent entries for the other trees:

2489

# Remove it, its meaningless.

2490

block = self._find_block(current_old[0])

2491

entry_index, present = self._find_entry_index(current_old[0], block[1])

2492

if not present:

2493

raise AssertionError('could not find entry for %s' % (current_old,))

2494

block[1].pop(entry_index)

2495

# if we have an id_index in use, remove this key from it for this id.

2496

if self._id_index is not None:

2497

self._id_index[current_old[0][2]].remove(current_old[0])

2498

# update all remaining keys for this id to record it as absent. The

2499

# existing details may either be the record we are marking as deleted

2500

# (if there were other trees with the id present at this path), or may

2501

# be relocations.

2502

for update_key in all_remaining_keys:

2503

update_block_index, present = \

2504

self._find_block_index_from_key(update_key)

2505

if not present:

2506

raise AssertionError('could not find block for %s' % (update_key,))

2507

update_entry_index, present = \

2508

self._find_entry_index(update_key, self._dirblocks[update_block_index][1])

2509

if not present:

2510

raise AssertionError('could not find entry for %s' % (update_key,))

2511

update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]

2512

# it must not be absent at the moment

2513

if update_tree_details[0][0] == 'a': # absent

2514

raise AssertionError('bad row %r' % (update_tree_details,))

2515

update_tree_details[0] = DirState.NULL_PARENT_DETAILS

2516

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2517

return last_reference

2518

2519

def update_minimal(self, key, minikind, executable=False, fingerprint='',

2520

packed_stat=None, size=0, path_utf8=None):

2521

"""Update an entry to the state in tree 0.

2522

2523

This will either create a new entry at 'key' or update an existing one.

2524

It also makes sure that any other records which might mention this are

2525

updated as well.

2526

2527

:param key: (dir, name, file_id) for the new entry

2528

:param minikind: The type for the entry ('f' == 'file', 'd' ==

2529

'directory'), etc.

2530

:param executable: Should the executable bit be set?

2531

:param fingerprint: Simple fingerprint for new entry: canonical-form

2532

sha1 for files, referenced revision id for subtrees, etc.

2533

:param packed_stat: Packed stat value for new entry.

2534

:param size: Size information for new entry

2535

:param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing

2536

extra computation.

2537

2538

If packed_stat and fingerprint are not given, they're invalidated in

2539

the entry.

2540

"""

2541

block = self._find_block(key)[1]

2542

if packed_stat is None:

2543

packed_stat = DirState.NULLSTAT

2544

# XXX: Some callers pass '' as the packed_stat, and it seems to be

2545

# sometimes present in the dirstate - this seems oddly inconsistent.

2546

# mbp 20071008

2547

entry_index, present = self._find_entry_index(key, block)

2548

new_details = (minikind, fingerprint, size, executable, packed_stat)

2549

id_index = self._get_id_index()

2550

if not present:

2551

# new entry, synthesis cross reference here,

2552

existing_keys = id_index.setdefault(key[2], set())

2553

if not existing_keys:

2554

# not currently in the state, simplest case

2555

new_entry = key, [new_details] + self._empty_parent_info()

2556

else:

2557

# present at one or more existing other paths.

2558

# grab one of them and use it to generate parent

2559

# relocation/absent entries.

2560

new_entry = key, [new_details]

2561

for other_key in existing_keys:

2562

# change the record at other to be a pointer to this new

2563

# record. The loop looks similar to the change to

2564

# relocations when updating an existing record but its not:

2565

# the test for existing kinds is different: this can be

2566

# factored out to a helper though.

2567

other_block_index, present = self._find_block_index_from_key(other_key)

2568

if not present:

2569

raise AssertionError('could not find block for %s' % (other_key,))

2570

other_entry_index, present = self._find_entry_index(other_key,

2571

self._dirblocks[other_block_index][1])

2572

if not present:

2573

raise AssertionError('could not find entry for %s' % (other_key,))

2574

if path_utf8 is None:

2575

raise AssertionError('no path')

2576

self._dirblocks[other_block_index][1][other_entry_index][1][0] = \

2577

('r', path_utf8, 0, False, '')

2578

2579

num_present_parents = self._num_present_parents()

2580

for lookup_index in xrange(1, num_present_parents + 1):

2581

# grab any one entry, use it to find the right path.

2582

# TODO: optimise this to reduce memory use in highly

2583

# fragmented situations by reusing the relocation

2584

# records.

2585

update_block_index, present = \

2586

self._find_block_index_from_key(other_key)

2587

if not present:

2588

raise AssertionError('could not find block for %s' % (other_key,))

2589

update_entry_index, present = \

2590

self._find_entry_index(other_key, self._dirblocks[update_block_index][1])

2591

if not present:

2592

raise AssertionError('could not find entry for %s' % (other_key,))

2593

update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]

2594

if update_details[0] in 'ar': # relocated, absent

2595

# its a pointer or absent in lookup_index's tree, use

2596

# it as is.

2597

new_entry[1].append(update_details)

2598

else:

2599

# we have the right key, make a pointer to it.

2600

pointer_path = osutils.pathjoin(*other_key[0:2])

2601

new_entry[1].append(('r', pointer_path, 0, False, ''))

2602

block.insert(entry_index, new_entry)

2603

existing_keys.add(key)

2604

else:

2605

# Does the new state matter?

2606

block[entry_index][1][0] = new_details

2607

# parents cannot be affected by what we do.

2608

# other occurences of this id can be found

2609

# from the id index.

2610

# ---

2611

# tree index consistency: All other paths for this id in this tree

2612

# index must point to the correct path. We have to loop here because

2613

# we may have passed entries in the state with this file id already

2614

# that were absent - where parent entries are - and they need to be

2615

# converted to relocated.

2616

if path_utf8 is None:

2617

raise AssertionError('no path')

2618

for entry_key in id_index.setdefault(key[2], set()):

2619

# TODO:PROFILING: It might be faster to just update

2620

# rather than checking if we need to, and then overwrite

2621

# the one we are located at.

2622

if entry_key != key:

2623

# this file id is at a different path in one of the

2624

# other trees, so put absent pointers there

2625

# This is the vertical axis in the matrix, all pointing

2626

# to the real path.

2627

block_index, present = self._find_block_index_from_key(entry_key)

2628

if not present:

2629

raise AssertionError('not present: %r', entry_key)

2630

entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])

2631

if not present:

2632

raise AssertionError('not present: %r', entry_key)

2633

self._dirblocks[block_index][1][entry_index][1][0] = \

2634

('r', path_utf8, 0, False, '')

2635

# add a containing dirblock if needed.

2636

if new_details[0] == 'd':

2637

subdir_key = (osutils.pathjoin(*key[0:2]), '', '')

2638

block_index, present = self._find_block_index_from_key(subdir_key)

2639

if not present:

2640

self._dirblocks.insert(block_index, (subdir_key[0], []))

2641

2642

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2643

2644

def _validate(self):

2645

"""Check that invariants on the dirblock are correct.

2646

2647

This can be useful in debugging; it shouldn't be necessary in

2648

normal code.

2649

2650

This must be called with a lock held.

2651

"""

2652

# NOTE: This must always raise AssertionError not just assert,

2653

# otherwise it may not behave properly under python -O

2654

2655

# TODO: All entries must have some content that's not 'a' or 'r',

2656

# otherwise it could just be removed.

2657

2658

# TODO: All relocations must point directly to a real entry.

2659

2660

# TODO: No repeated keys.

2661

2662

# -- mbp 20070325

2663

from pprint import pformat

2664

self._read_dirblocks_if_needed()

2665

if len(self._dirblocks) > 0:

2666

if not self._dirblocks[0][0] == '':

2667

raise AssertionError(

2668

"dirblocks don't start with root block:\n" + \

2669

pformat(self._dirblocks))

2670

if len(self._dirblocks) > 1:

2671

if not self._dirblocks[1][0] == '':

2672

raise AssertionError(

2673

"dirblocks missing root directory:\n" + \

2674

pformat(self._dirblocks))

2675

# the dirblocks are sorted by their path components, name, and dir id

2676

dir_names = [d[0].split('/')

2677

for d in self._dirblocks[1:]]

2678

if dir_names != sorted(dir_names):

2679

raise AssertionError(

2680

"dir names are not in sorted order:\n" + \

2681

pformat(self._dirblocks) + \

2682

"\nkeys:\n" +

2683

pformat(dir_names))

2684

for dirblock in self._dirblocks:

2685

# within each dirblock, the entries are sorted by filename and

2686

# then by id.

2687

for entry in dirblock[1]:

2688

if dirblock[0] != entry[0][0]:

2689

raise AssertionError(

2690

"entry key for %r"

2691

"doesn't match directory name in\n%r" %

2692

(entry, pformat(dirblock)))

2693

if dirblock[1] != sorted(dirblock[1]):

2694

raise AssertionError(

2695

"dirblock for %r is not sorted:\n%s" % \

2696

(dirblock[0], pformat(dirblock)))

2697

2698

def check_valid_parent():

2699

"""Check that the current entry has a valid parent.

2700

2701

This makes sure that the parent has a record,

2702

and that the parent isn't marked as "absent" in the

2703

current tree. (It is invalid to have a non-absent file in an absent

2704

directory.)

2705

"""

2706

if entry[0][0:2] == ('', ''):

2707

# There should be no parent for the root row

2708

return

2709

parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0])

2710

if parent_entry == (None, None):

2711

raise AssertionError(

2712

"no parent entry for: %s in tree %s"

2713

% (this_path, tree_index))

2714

if parent_entry[1][tree_index][0] != 'd':

2715

raise AssertionError(

2716

"Parent entry for %s is not marked as a valid"

2717

" directory. %s" % (this_path, parent_entry,))

2718

2719

# For each file id, for each tree: either

2720

# the file id is not present at all; all rows with that id in the

2721

# key have it marked as 'absent'

2722

# OR the file id is present under exactly one name; any other entries

2723

# that mention that id point to the correct name.

2724

2725

# We check this with a dict per tree pointing either to the present

2726

# name, or None if absent.

2727

tree_count = self._num_present_parents() + 1

2728

id_path_maps = [dict() for i in range(tree_count)]

2729

# Make sure that all renamed entries point to the correct location.

2730

for entry in self._iter_entries():

2731

file_id = entry[0][2]

2732

this_path = osutils.pathjoin(entry[0][0], entry[0][1])

2733

if len(entry[1]) != tree_count:

2734

raise AssertionError(

2735

"wrong number of entry details for row\n%s" \

2736

",\nexpected %d" % \

2737

(pformat(entry), tree_count))

2738

absent_positions = 0

2739

for tree_index, tree_state in enumerate(entry[1]):

2740

this_tree_map = id_path_maps[tree_index]

2741

minikind = tree_state[0]

2742

if minikind in 'ar':

2743

absent_positions += 1

2744

# have we seen this id before in this column?

2745

if file_id in this_tree_map:

2746

previous_path, previous_loc = this_tree_map[file_id]

2747

# any later mention of this file must be consistent with

2748

# what was said before

2749

if minikind == 'a':

2750

if previous_path is not None:

2751

raise AssertionError(

2752

"file %s is absent in row %r but also present " \

2753

"at %r"% \

2754

(file_id, entry, previous_path))

2755

elif minikind == 'r':

2756

target_location = tree_state[1]

2757

if previous_path != target_location:

2758

raise AssertionError(

2759

"file %s relocation in row %r but also at %r" \

2760

% (file_id, entry, previous_path))

2761

else:

2762

# a file, directory, etc - may have been previously

2763

# pointed to by a relocation, which must point here

2764

if previous_path != this_path:

2765

raise AssertionError(

2766

"entry %r inconsistent with previous path %r "

2767

"seen at %r" %

2768

(entry, previous_path, previous_loc))

2769

check_valid_parent()

2770

else:

2771

if minikind == 'a':

2772

# absent; should not occur anywhere else

2773

this_tree_map[file_id] = None, this_path

2774

elif minikind == 'r':

2775

# relocation, must occur at expected location

2776

this_tree_map[file_id] = tree_state[1], this_path

2777

else:

2778

this_tree_map[file_id] = this_path, this_path

2779

check_valid_parent()

2780

if absent_positions == tree_count:

2781

raise AssertionError(

2782

"entry %r has no data for any tree." % (entry,))

2783

2784

def _wipe_state(self):

2785

"""Forget all state information about the dirstate."""

2786

self._header_state = DirState.NOT_IN_MEMORY

2787

self._dirblock_state = DirState.NOT_IN_MEMORY

2788

self._changes_aborted = False

2789

self._parents = []

2790

self._ghosts = []

2791

self._dirblocks = []

2792

self._id_index = None

2793

self._packed_stat_index = None

2794

self._end_of_header = None

2795

self._cutoff_time = None

2796

self._split_path_cache = {}

2797

2798

def lock_read(self):

2799

"""Acquire a read lock on the dirstate."""

2800

if self._lock_token is not None:

2801

raise errors.LockContention(self._lock_token)

2802

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2803

# already in memory, we could read just the header and check for

2804

# any modification. If not modified, we can just leave things

2805

# alone

2806

self._lock_token = lock.ReadLock(self._filename)

2807

self._lock_state = 'r'

2808

self._state_file = self._lock_token.f

2809

self._wipe_state()

2810

2811

def lock_write(self):

2812

"""Acquire a write lock on the dirstate."""

2813

if self._lock_token is not None:

2814

raise errors.LockContention(self._lock_token)

2815

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2816

# already in memory, we could read just the header and check for

2817

# any modification. If not modified, we can just leave things

2818

# alone

2819

self._lock_token = lock.WriteLock(self._filename)

2820

self._lock_state = 'w'

2821

self._state_file = self._lock_token.f

2822

self._wipe_state()

2823

2824

def unlock(self):

2825

"""Drop any locks held on the dirstate."""

2826

if self._lock_token is None:

2827

raise errors.LockNotHeld(self)

2828

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2829

# already in memory, we could read just the header and check for

2830

# any modification. If not modified, we can just leave things

2831

# alone

2832

self._state_file = None

2833

self._lock_state = None

2834

self._lock_token.unlock()

2835

self._lock_token = None

2836

self._split_path_cache = {}

2837

2838

def _requires_lock(self):

2839

"""Check that a lock is currently held by someone on the dirstate."""

2840

if not self._lock_token:

2841

raise errors.ObjectNotLocked(self)

2842

2843

2844

def py_update_entry(state, entry, abspath, stat_value,

2845

_stat_to_minikind=DirState._stat_to_minikind,

2846

_pack_stat=pack_stat):

2847

"""Update the entry based on what is actually on disk.

2848

2849

This function only calculates the sha if it needs to - if the entry is

2850

uncachable, or clearly different to the first parent's entry, no sha

2851

is calculated, and None is returned.

2852

2853

:param state: The dirstate this entry is in.

2854

:param entry: This is the dirblock entry for the file in question.

2855

:param abspath: The path on disk for this file.

2856

:param stat_value: The stat value done on the path.

2857

:return: None, or The sha1 hexdigest of the file (40 bytes) or link

2858

target of a symlink.

2859

"""

2860

try:

2861

minikind = _stat_to_minikind[stat_value.st_mode & 0170000]

2862

except KeyError:

2863

# Unhandled kind

2864

return None

2865

packed_stat = _pack_stat(stat_value)

2866

(saved_minikind, saved_link_or_sha1, saved_file_size,

2867

saved_executable, saved_packed_stat) = entry[1][0]

2868

2869

if minikind == 'd' and saved_minikind == 't':

2870

minikind = 't'

2871

if (minikind == saved_minikind

2872

and packed_stat == saved_packed_stat):

2873

# The stat hasn't changed since we saved, so we can re-use the

2874

# saved sha hash.

2875

if minikind == 'd':

2876

return None

2877

2878

# size should also be in packed_stat

2879

if saved_file_size == stat_value.st_size:

2880

return saved_link_or_sha1

2881

2882

# If we have gotten this far, that means that we need to actually

2883

# process this entry.

2884

link_or_sha1 = None

2885

if minikind == 'f':

2886

executable = state._is_executable(stat_value.st_mode,

2887

saved_executable)

2888

if state._cutoff_time is None:

2889

state._sha_cutoff_time()

2890

if (stat_value.st_mtime < state._cutoff_time

2891

and stat_value.st_ctime < state._cutoff_time

2892

and len(entry[1]) > 1

2893

and entry[1][1][0] != 'a'):

2894

# Could check for size changes for further optimised

2895

# avoidance of sha1's. However the most prominent case of

2896

# over-shaing is during initial add, which this catches.

2897

# Besides, if content filtering happens, size and sha

2898

# are calculated at the same time, so checking just the size

2899

# gains nothing w.r.t. performance.

2900

link_or_sha1 = state._sha1_file(abspath)

2901

entry[1][0] = ('f', link_or_sha1, stat_value.st_size,

2902

executable, packed_stat)

2903

else:

2904

entry[1][0] = ('f', '', stat_value.st_size,

2905

executable, DirState.NULLSTAT)

2906

elif minikind == 'd':

2907

link_or_sha1 = None

2908

entry[1][0] = ('d', '', 0, False, packed_stat)

2909

if saved_minikind != 'd':

2910

# This changed from something into a directory. Make sure we

2911

# have a directory block for it. This doesn't happen very

2912

# often, so this doesn't have to be super fast.

2913

block_index, entry_index, dir_present, file_present = \

2914

state._get_block_entry_index(entry[0][0], entry[0][1], 0)

2915

state._ensure_block(block_index, entry_index,

2916

osutils.pathjoin(entry[0][0], entry[0][1]))

2917

elif minikind == 'l':

2918

link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)

2919

if state._cutoff_time is None:

2920

state._sha_cutoff_time()

2921

if (stat_value.st_mtime < state._cutoff_time

2922

and stat_value.st_ctime < state._cutoff_time):

2923

entry[1][0] = ('l', link_or_sha1, stat_value.st_size,

2924

False, packed_stat)

2925

else:

2926

entry[1][0] = ('l', '', stat_value.st_size,

2927

False, DirState.NULLSTAT)

2928

state._dirblock_state = DirState.IN_MEMORY_MODIFIED

2929

return link_or_sha1

2930

update_entry = py_update_entry

2931

2932

2933

class ProcessEntryPython(object):

2934

2935

__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",

2936

"last_source_parent", "last_target_parent", "include_unchanged",

2937

"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",

2938

"search_specific_files", "state", "source_index", "target_index",

2939

"want_unversioned", "tree"]

2940

2941

def __init__(self, include_unchanged, use_filesystem_for_exec,

2942

search_specific_files, state, source_index, target_index,

2943

want_unversioned, tree):

2944

self.old_dirname_to_file_id = {}

2945

self.new_dirname_to_file_id = {}

2946

# Just a sentry, so that _process_entry can say that this

2947

# record is handled, but isn't interesting to process (unchanged)

2948

self.uninteresting = object()

2949

# Using a list so that we can access the values and change them in

2950

# nested scope. Each one is [path, file_id, entry]

2951

self.last_source_parent = [None, None]

2952

self.last_target_parent = [None, None]

2953

self.include_unchanged = include_unchanged

2954

self.use_filesystem_for_exec = use_filesystem_for_exec

2955

self.utf8_decode = cache_utf8._utf8_decode

2956

# for all search_indexs in each path at or under each element of

2957

# search_specific_files, if the detail is relocated: add the id, and add the

2958

# relocated path as one to search if its not searched already. If the

2959

# detail is not relocated, add the id.

2960

self.searched_specific_files = set()

2961

self.search_specific_files = search_specific_files

2962

self.state = state

2963

self.source_index = source_index

2964

self.target_index = target_index

2965

self.want_unversioned = want_unversioned

2966

self.tree = tree

2967

2968

def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):

2969

"""Compare an entry and real disk to generate delta information.

2970

2971

:param path_info: top_relpath, basename, kind, lstat, abspath for

2972

the path of entry. If None, then the path is considered absent.

2973

(Perhaps we should pass in a concrete entry for this ?)

2974

Basename is returned as a utf8 string because we expect this

2975

tuple will be ignored, and don't want to take the time to

2976

decode.

2977

:return: None if these don't match

2978

A tuple of information about the change, or

2979

the object 'uninteresting' if these match, but are

2980

basically identical.

2981

"""

2982

if self.source_index is None:

2983

source_details = DirState.NULL_PARENT_DETAILS

2984

else:

2985

source_details = entry[1][self.source_index]

2986

target_details = entry[1][self.target_index]

2987

target_minikind = target_details[0]

2988

if path_info is not None and target_minikind in 'fdlt':

2989

if not (self.target_index == 0):

2990

raise AssertionError()

2991

link_or_sha1 = update_entry(self.state, entry,

2992

abspath=path_info[4], stat_value=path_info[3])

2993

# The entry may have been modified by update_entry

2994

target_details = entry[1][self.target_index]

2995

target_minikind = target_details[0]

2996

else:

2997

link_or_sha1 = None

2998

file_id = entry[0][2]

2999

source_minikind = source_details[0]

3000

if source_minikind in 'fdltr' and target_minikind in 'fdlt':

3001

# claimed content in both: diff

3002

# r | fdlt | | add source to search, add id path move and perform

3003

# | | | diff check on source-target

3004

# r | fdlt | a | dangling file that was present in the basis.

3005

# | | | ???

3006

if source_minikind in 'r':

3007

# add the source to the search path to find any children it

3008

# has. TODO ? : only add if it is a container ?

3009

if not osutils.is_inside_any(self.searched_specific_files,

3010

source_details[1]):

3011

self.search_specific_files.add(source_details[1])

3012

# generate the old path; this is needed for stating later

3013

# as well.

3014

old_path = source_details[1]

3015

old_dirname, old_basename = os.path.split(old_path)

3016

path = pathjoin(entry[0][0], entry[0][1])

3017

old_entry = self.state._get_entry(self.source_index,

3018

path_utf8=old_path)

3019

# update the source details variable to be the real

3020

# location.

3021

if old_entry == (None, None):

3022

raise errors.CorruptDirstate(self.state._filename,

3023

"entry '%s/%s' is considered renamed from %r"

3024

" but source does not exist\n"

3025

"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))

3026

source_details = old_entry[1][self.source_index]

3027

source_minikind = source_details[0]

3028

else:

3029

old_dirname = entry[0][0]

3030

old_basename = entry[0][1]

3031

old_path = path = None

3032

if path_info is None:

3033

# the file is missing on disk, show as removed.

3034

content_change = True

3035

target_kind = None

3036

target_exec = False

3037

else:

3038

# source and target are both versioned and disk file is present.

3039

target_kind = path_info[2]

3040

if target_kind == 'directory':

3041

if path is None:

3042

old_path = path = pathjoin(old_dirname, old_basename)

3043

self.new_dirname_to_file_id[path] = file_id

3044

if source_minikind != 'd':

3045

content_change = True

3046

else:

3047

# directories have no fingerprint

3048

content_change = False

3049

target_exec = False

3050

elif target_kind == 'file':

3051

if source_minikind != 'f':

3052

content_change = True

3053

else:

3054

# If the size is the same, check the sha:

3055

if target_details[2] == source_details[2]:

3056

if link_or_sha1 is None:

3057

# Stat cache miss:

3058

statvalue, link_or_sha1 = \

3059

self.sha1_provider.stat_and_sha1(

3060

path_info[4])

3061

self.state._observed_sha1(entry, link_or_sha1,

3062

statvalue)

3063

content_change = (link_or_sha1 != source_details[1])

3064

else:

3065

# Size changed, so must be different

3066

content_change = True

3067

# Target details is updated at update_entry time

3068

if self.use_filesystem_for_exec:

3069

# We don't need S_ISREG here, because we are sure

3070

# we are dealing with a file.

3071

target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)

3072

else:

3073

target_exec = target_details[3]

3074

elif target_kind == 'symlink':

3075

if source_minikind != 'l':

3076

content_change = True

3077

else:

3078

content_change = (link_or_sha1 != source_details[1])

3079

target_exec = False

3080

elif target_kind == 'tree-reference':

3081

if source_minikind != 't':

3082

content_change = True

3083

else:

3084

content_change = False

3085

target_exec = False

3086

else:

3087

raise Exception, "unknown kind %s" % path_info[2]

3088

if source_minikind == 'd':

3089

if path is None:

3090

old_path = path = pathjoin(old_dirname, old_basename)

3091

self.old_dirname_to_file_id[old_path] = file_id

3092

# parent id is the entry for the path in the target tree

3093

if old_dirname == self.last_source_parent[0]:

3094

source_parent_id = self.last_source_parent[1]

3095

else:

3096

try:

3097

source_parent_id = self.old_dirname_to_file_id[old_dirname]

3098

except KeyError:

3099

source_parent_entry = self.state._get_entry(self.source_index,

3100

path_utf8=old_dirname)

3101

source_parent_id = source_parent_entry[0][2]

3102

if source_parent_id == entry[0][2]:

3103

# This is the root, so the parent is None

3104

source_parent_id = None

3105

else:

3106

self.last_source_parent[0] = old_dirname

3107

self.last_source_parent[1] = source_parent_id

3108

new_dirname = entry[0][0]

3109

if new_dirname == self.last_target_parent[0]:

3110

target_parent_id = self.last_target_parent[1]

3111

else:

3112

try:

3113

target_parent_id = self.new_dirname_to_file_id[new_dirname]

3114

except KeyError:

3115

# TODO: We don't always need to do the lookup, because the

3116

# parent entry will be the same as the source entry.

3117

target_parent_entry = self.state._get_entry(self.target_index,

3118

path_utf8=new_dirname)

3119

if target_parent_entry == (None, None):

3120

raise AssertionError(

3121

"Could not find target parent in wt: %s\nparent of: %s"

3122

% (new_dirname, entry))

3123

target_parent_id = target_parent_entry[0][2]

3124

if target_parent_id == entry[0][2]:

3125

# This is the root, so the parent is None

3126

target_parent_id = None

3127

else:

3128

self.last_target_parent[0] = new_dirname

3129

self.last_target_parent[1] = target_parent_id

3130

3131

source_exec = source_details[3]

3132

if (self.include_unchanged

3133

or content_change

3134

or source_parent_id != target_parent_id

3135

or old_basename != entry[0][1]

3136

or source_exec != target_exec

3137

3138

if old_path is None:

3139

old_path = path = pathjoin(old_dirname, old_basename)

3140

old_path_u = self.utf8_decode(old_path)[0]

3141

path_u = old_path_u

3142

else:

3143

old_path_u = self.utf8_decode(old_path)[0]

3144

if old_path == path:

3145

path_u = old_path_u

3146

else:

3147

path_u = self.utf8_decode(path)[0]

3148

source_kind = DirState._minikind_to_kind[source_minikind]

3149

return (entry[0][2],

3150

(old_path_u, path_u),

3151

content_change,

3152

(True, True),

3153

(source_parent_id, target_parent_id),

3154

(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),

3155

(source_kind, target_kind),

3156

(source_exec, target_exec))

3157

else:

3158

return self.uninteresting

3159

elif source_minikind in 'a' and target_minikind in 'fdlt':

3160

# looks like a new file

3161

path = pathjoin(entry[0][0], entry[0][1])

3162

# parent id is the entry for the path in the target tree

3163

# TODO: these are the same for an entire directory: cache em.

3164

parent_id = self.state._get_entry(self.target_index,

3165

path_utf8=entry[0][0])[0][2]

3166

if parent_id == entry[0][2]:

3167

parent_id = None

3168

if path_info is not None:

3169

# Present on disk:

3170

if self.use_filesystem_for_exec:

3171

# We need S_ISREG here, because we aren't sure if this

3172

# is a file or not.

3173

target_exec = bool(

3174

stat.S_ISREG(path_info[3].st_mode)

3175

and stat.S_IEXEC & path_info[3].st_mode)

3176

else:

3177

target_exec = target_details[3]

3178

return (entry[0][2],

3179

(None, self.utf8_decode(path)[0]),

3180

True,

3181

(False, True),

3182

(None, parent_id),

3183

(None, self.utf8_decode(entry[0][1])[0]),

3184

(None, path_info[2]),

3185

(None, target_exec))

3186

else:

3187

# Its a missing file, report it as such.

3188

return (entry[0][2],

3189

(None, self.utf8_decode(path)[0]),

3190

False,

3191

(False, True),

3192

(None, parent_id),

3193

(None, self.utf8_decode(entry[0][1])[0]),

3194

(None, None),

3195

(None, False))

3196

elif source_minikind in 'fdlt' and target_minikind in 'a':

3197

# unversioned, possibly, or possibly not deleted: we dont care.

3198

# if its still on disk, *and* theres no other entry at this

3199

# path [we dont know this in this routine at the moment -

3200

# perhaps we should change this - then it would be an unknown.

3201

old_path = pathjoin(entry[0][0], entry[0][1])

3202

# parent id is the entry for the path in the target tree

3203

parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]

3204

if parent_id == entry[0][2]:

3205

parent_id = None

3206

return (entry[0][2],

3207

(self.utf8_decode(old_path)[0], None),

3208

True,

3209

(True, False),

3210

(parent_id, None),

3211

(self.utf8_decode(entry[0][1])[0], None),

3212

(DirState._minikind_to_kind[source_minikind], None),

3213

(source_details[3], None))

3214

elif source_minikind in 'fdlt' and target_minikind in 'r':

3215

# a rename; could be a true rename, or a rename inherited from

3216

# a renamed parent. TODO: handle this efficiently. Its not

3217

# common case to rename dirs though, so a correct but slow

3218

# implementation will do.

3219

if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):

3220

self.search_specific_files.add(target_details[1])

3221

elif source_minikind in 'ra' and target_minikind in 'ra':

3222

# neither of the selected trees contain this file,

3223

# so skip over it. This is not currently directly tested, but

3224

# is indirectly via test_too_much.TestCommands.test_conflicts.

3225

pass

3226

else:

3227

raise AssertionError("don't know how to compare "

3228

"source_minikind=%r, target_minikind=%r"

3229

% (source_minikind, target_minikind))

3230

## import pdb;pdb.set_trace()

3231

return None

3232

3233

def __iter__(self):

3234

return self

3235

3236

def iter_changes(self):

3237

"""Iterate over the changes."""

3238

utf8_decode = cache_utf8._utf8_decode

3239

_cmp_by_dirs = cmp_by_dirs

3240

_process_entry = self._process_entry

3241

uninteresting = self.uninteresting

3242

search_specific_files = self.search_specific_files

3243

searched_specific_files = self.searched_specific_files

3244

splitpath = osutils.splitpath

3245

# sketch:

3246

# compare source_index and target_index at or under each element of search_specific_files.

3247

# follow the following comparison table. Note that we only want to do diff operations when

3248

# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo

3249

# for the target.

3250

# cases:

3251

3252

# Source | Target | disk | action

3253

# r | fdlt | | add source to search, add id path move and perform

3254

# | | | diff check on source-target

3255

# r | fdlt | a | dangling file that was present in the basis.

3256

# | | | ???

3257

# r | a | | add source to search

3258

# r | a | a |

3259

# r | r | | this path is present in a non-examined tree, skip.

3260

# r | r | a | this path is present in a non-examined tree, skip.

3261

# a | fdlt | | add new id

3262

# a | fdlt | a | dangling locally added file, skip

3263

# a | a | | not present in either tree, skip

3264

# a | a | a | not present in any tree, skip

3265

# a | r | | not present in either tree at this path, skip as it

3266

# | | | may not be selected by the users list of paths.

3267

# a | r | a | not present in either tree at this path, skip as it

3268

# | | | may not be selected by the users list of paths.

3269

# fdlt | fdlt | | content in both: diff them

3270

# fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ?

3271

# fdlt | a | | unversioned: output deleted id for now

3272

# fdlt | a | a | unversioned and deleted: output deleted id

3273

# fdlt | r | | relocated in this tree, so add target to search.

3274

# | | | Dont diff, we will see an r,fd; pair when we reach

3275

# | | | this id at the other path.

3276

# fdlt | r | a | relocated in this tree, so add target to search.

3277

# | | | Dont diff, we will see an r,fd; pair when we reach

3278

# | | | this id at the other path.

3279

3280

# TODO: jam 20070516 - Avoid the _get_entry lookup overhead by

3281

# keeping a cache of directories that we have seen.

3282

3283

while search_specific_files:

3284

# TODO: the pending list should be lexically sorted? the

3285

# interface doesn't require it.

3286

current_root = search_specific_files.pop()

3287

current_root_unicode = current_root.decode('utf8')

3288

searched_specific_files.add(current_root)

3289

# process the entries for this containing directory: the rest will be

3290

# found by their parents recursively.

3291

root_entries = self.state._entries_for_path(current_root)

3292

root_abspath = self.tree.abspath(current_root_unicode)

3293

try:

3294

root_stat = os.lstat(root_abspath)

3295

except OSError, e:

3296

if e.errno == errno.ENOENT:

3297

# the path does not exist: let _process_entry know that.

3298

root_dir_info = None

3299

else:

3300

# some other random error: hand it up.

3301

raise

3302

else:

3303

root_dir_info = ('', current_root,

3304

osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,

3305

root_abspath)

3306

if root_dir_info[2] == 'directory':

3307

if self.tree._directory_is_tree_reference(

3308

current_root.decode('utf8')):

3309

root_dir_info = root_dir_info[:2] + \

3310

('tree-reference',) + root_dir_info[3:]

3311

3312

if not root_entries and not root_dir_info:

3313

# this specified path is not present at all, skip it.

3314

continue

3315

path_handled = False

3316

for entry in root_entries:

3317

result = _process_entry(entry, root_dir_info)

3318

if result is not None:

3319

path_handled = True

3320

if result is not uninteresting:

3321

yield result

3322

if self.want_unversioned and not path_handled and root_dir_info:

3323

new_executable = bool(

3324

stat.S_ISREG(root_dir_info[3].st_mode)

3325

and stat.S_IEXEC & root_dir_info[3].st_mode)

3326

yield (None,

3327

(None, current_root_unicode),

3328

True,

3329

(False, False),

3330

(None, None),

3331

(None, splitpath(current_root_unicode)[-1]),

3332

(None, root_dir_info[2]),

3333

(None, new_executable)

3334

)

3335

initial_key = (current_root, '', '')

3336

block_index, _ = self.state._find_block_index_from_key(initial_key)

3337

if block_index == 0:

3338

# we have processed the total root already, but because the

3339

# initial key matched it we should skip it here.

3340

block_index +=1

3341

if root_dir_info and root_dir_info[2] == 'tree-reference':

3342

current_dir_info = None

3343

else:

3344

dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)

3345

try:

3346

current_dir_info = dir_iterator.next()

3347

except OSError, e:

3348

# on win32, python2.4 has e.errno == ERROR_DIRECTORY, but

3349

# python 2.5 has e.errno == EINVAL,

3350

# and e.winerror == ERROR_DIRECTORY

3351

e_winerror = getattr(e, 'winerror', None)

3352

win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)

3353

# there may be directories in the inventory even though

3354

# this path is not a file on disk: so mark it as end of

3355

# iterator

3356

if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):

3357

current_dir_info = None

3358

elif (sys.platform == 'win32'

3359

and (e.errno in win_errors

3360

or e_winerror in win_errors)):

3361

current_dir_info = None

3362

else:

3363

raise

3364

else:

3365

if current_dir_info[0][0] == '':

3366

# remove .bzr from iteration

3367

bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))

3368

if current_dir_info[1][bzr_index][0] != '.bzr':

3369

raise AssertionError()

3370

del current_dir_info[1][bzr_index]

3371

# walk until both the directory listing and the versioned metadata

3372

# are exhausted.

3373

if (block_index < len(self.state._dirblocks) and

3374

osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):

3375

current_block = self.state._dirblocks[block_index]

3376

else:

3377

current_block = None

3378

while (current_dir_info is not None or

3379

current_block is not None):

3380

if (current_dir_info and current_block

3381

and current_dir_info[0][0] != current_block[0]):

3382

if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:

3383

# filesystem data refers to paths not covered by the dirblock.

3384

# this has two possibilities:

3385

# A) it is versioned but empty, so there is no block for it

3386

# B) it is not versioned.

3387

3388

# if (A) then we need to recurse into it to check for

3389

# new unknown files or directories.

3390

# if (B) then we should ignore it, because we don't

3391

# recurse into unknown directories.

3392

path_index = 0

3393

while path_index < len(current_dir_info[1]):

3394

current_path_info = current_dir_info[1][path_index]

3395

if self.want_unversioned:

3396

if current_path_info[2] == 'directory':

3397

if self.tree._directory_is_tree_reference(

3398

current_path_info[0].decode('utf8')):

3399

current_path_info = current_path_info[:2] + \

3400

('tree-reference',) + current_path_info[3:]

3401

new_executable = bool(

3402

stat.S_ISREG(current_path_info[3].st_mode)

3403

and stat.S_IEXEC & current_path_info[3].st_mode)

3404

yield (None,

3405

(None, utf8_decode(current_path_info[0])[0]),

3406

True,

3407

(False, False),

3408

(None, None),

3409

(None, utf8_decode(current_path_info[1])[0]),

3410

(None, current_path_info[2]),

3411

(None, new_executable))

3412

# dont descend into this unversioned path if it is

3413

# a dir

3414

if current_path_info[2] in ('directory',

3415

'tree-reference'):

3416

del current_dir_info[1][path_index]

3417

path_index -= 1

3418

path_index += 1

3419

3420

# This dir info has been handled, go to the next

3421

try:

3422

current_dir_info = dir_iterator.next()

3423

except StopIteration:

3424

current_dir_info = None

3425

else:

3426

# We have a dirblock entry for this location, but there

3427

# is no filesystem path for this. This is most likely

3428

# because a directory was removed from the disk.

3429

# We don't have to report the missing directory,

3430

# because that should have already been handled, but we

3431

# need to handle all of the files that are contained

3432

# within.

3433

for current_entry in current_block[1]:

3434

# entry referring to file not present on disk.

3435

# advance the entry only, after processing.

3436

result = _process_entry(current_entry, None)

3437

if result is not None:

3438

if result is not uninteresting:

3439

yield result

3440

block_index +=1

3441

if (block_index < len(self.state._dirblocks) and

3442

osutils.is_inside(current_root,

3443

self.state._dirblocks[block_index][0])):

3444

current_block = self.state._dirblocks[block_index]

3445

else:

3446

current_block = None

3447

continue

3448

entry_index = 0

3449

if current_block and entry_index < len(current_block[1]):

3450

current_entry = current_block[1][entry_index]

3451

else:

3452

current_entry = None

3453

advance_entry = True

3454

path_index = 0

3455

if current_dir_info and path_index < len(current_dir_info[1]):

3456

current_path_info = current_dir_info[1][path_index]

3457

if current_path_info[2] == 'directory':

3458

if self.tree._directory_is_tree_reference(

3459

current_path_info[0].decode('utf8')):

3460

current_path_info = current_path_info[:2] + \

3461

('tree-reference',) + current_path_info[3:]

3462

else:

3463

current_path_info = None

3464

advance_path = True

3465

path_handled = False

3466

while (current_entry is not None or

3467

current_path_info is not None):

3468

if current_entry is None:

3469

# the check for path_handled when the path is advanced

3470

# will yield this path if needed.

3471

pass

3472

elif current_path_info is None:

3473

# no path is fine: the per entry code will handle it.

3474

result = _process_entry(current_entry, current_path_info)

3475

if result is not None:

3476

if result is not uninteresting:

3477

yield result

3478

elif (current_entry[0][1] != current_path_info[1]

3479

or current_entry[1][self.target_index][0] in 'ar'):

3480

# The current path on disk doesn't match the dirblock

3481

# record. Either the dirblock is marked as absent, or

3482

# the file on disk is not present at all in the

3483

# dirblock. Either way, report about the dirblock

3484

# entry, and let other code handle the filesystem one.

3485

3486

# Compare the basename for these files to determine

3487

# which comes first

3488

if current_path_info[1] < current_entry[0][1]:

3489

# extra file on disk: pass for now, but only

3490

# increment the path, not the entry

3491

advance_entry = False

3492

else:

3493

# entry referring to file not present on disk.

3494

# advance the entry only, after processing.

3495

result = _process_entry(current_entry, None)

3496

if result is not None:

3497

if result is not uninteresting:

3498

yield result

3499

advance_path = False

3500

else:

3501

result = _process_entry(current_entry, current_path_info)

3502

if result is not None:

3503

path_handled = True

3504

if result is not uninteresting:

3505

yield result

3506

if advance_entry and current_entry is not None:

3507

entry_index += 1

3508

if entry_index < len(current_block[1]):

3509

current_entry = current_block[1][entry_index]

3510

else:

3511

current_entry = None

3512

else:

3513

advance_entry = True # reset the advance flaga

3514

if advance_path and current_path_info is not None:

3515

if not path_handled:

3516

# unversioned in all regards

3517

if self.want_unversioned:

3518

new_executable = bool(

3519

stat.S_ISREG(current_path_info[3].st_mode)

3520

and stat.S_IEXEC & current_path_info[3].st_mode)

3521

try:

3522

relpath_unicode = utf8_decode(current_path_info[0])[0]

3523

except UnicodeDecodeError:

3524

raise errors.BadFilenameEncoding(

3525

current_path_info[0], osutils._fs_enc)

3526

yield (None,

3527

(None, relpath_unicode),

3528

True,

3529

(False, False),

3530

(None, None),

3531

(None, utf8_decode(current_path_info[1])[0]),

3532

(None, current_path_info[2]),

3533

(None, new_executable))

3534

# dont descend into this unversioned path if it is

3535

# a dir

3536

if current_path_info[2] in ('directory'):

3537

del current_dir_info[1][path_index]

3538

path_index -= 1

3539

# dont descend the disk iterator into any tree

3540

# paths.

3541

if current_path_info[2] == 'tree-reference':

3542

del current_dir_info[1][path_index]

3543

path_index -= 1

3544

path_index += 1

3545

if path_index < len(current_dir_info[1]):

3546

current_path_info = current_dir_info[1][path_index]

3547

if current_path_info[2] == 'directory':

3548

if self.tree._directory_is_tree_reference(

3549

current_path_info[0].decode('utf8')):

3550

current_path_info = current_path_info[:2] + \

3551

('tree-reference',) + current_path_info[3:]

3552

else:

3553

current_path_info = None

3554

path_handled = False

3555

else:

3556

advance_path = True # reset the advance flagg.

3557

if current_block is not None:

3558

block_index += 1

3559

if (block_index < len(self.state._dirblocks) and

3560

osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):

3561

current_block = self.state._dirblocks[block_index]

3562

else:

3563

current_block = None

3564

if current_dir_info is not None:

3565

try:

3566

current_dir_info = dir_iterator.next()

3567

except StopIteration:

3568

current_dir_info = None

3569

_process_entry = ProcessEntryPython

3570

3571

3572

# Try to load the compiled form if possible

3573

try:

3574

from bzrlib._dirstate_helpers_c import (

3575

_read_dirblocks_c as _read_dirblocks,

3576

bisect_dirblock_c as bisect_dirblock,

3577

_bisect_path_left_c as _bisect_path_left,

3578

_bisect_path_right_c as _bisect_path_right,

3579

cmp_by_dirs_c as cmp_by_dirs,

3580

ProcessEntryC as _process_entry,

3581

update_entry as update_entry,

3582

)

3583

except ImportError:

3584

from bzrlib._dirstate_helpers_py import (

3585

_read_dirblocks_py as _read_dirblocks,

3586

bisect_dirblock_py as bisect_dirblock,

3587

_bisect_path_left_py as _bisect_path_left,

3588

_bisect_path_right_py as _bisect_path_right,

3589

cmp_by_dirs_py as cmp_by_dirs,

3590

)

Older »