/brz/remove-bazaar : revision 3696.4.8

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: Robert Collins
Date: 2008-09-15 03:23:53 UTC
mto: (3696.6.1 commit-updates) (3737.1.2 trivial_python_compat)
mto: This revision was merged to the branch mainline in revision 3739.
Revision ID: robertc@robertcollins.net-20080915032353-s0i0zt6b4rob77vp

Fix up inter_changes with dirstate both C and python.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.h

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_add_fallback_repository.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""DirState objects record the state of a directory and its bzr metadata.

Pseudo EBNF grammar for the state file. Fields are separated by NULLs, and

lines by NL. The field delimiters are ommitted in the grammar, line delimiters

are not - this is done for clarity of reading. All string data is in utf8.

MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";

NL = "\n";

NULL = "\0";

WHOLE_NUMBER = {digit}, digit;

BOOLEAN = "y" | "n";

REVISION_ID = a non-empty utf8 string;

dirstate format = header line, full checksum, row count, parent details,

ghost_details, entries;

header line = "#bazaar dirstate flat format 3", NL;

full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;

row count = "num_entries: ", WHOLE_NUMBER, NL;

parent_details = WHOLE NUMBER, {REVISION_ID}* NL;

ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;

entries = {entry};

entry = entry_key, current_entry_details, {parent_entry_details};

entry_key = dirname, basename, fileid;

current_entry_details = common_entry_details, working_entry_details;

parent_entry_details = common_entry_details, history_entry_details;

common_entry_details = MINIKIND, fingerprint, size, executable

working_entry_details = packed_stat

history_entry_details = REVISION_ID;

executable = BOOLEAN;

size = WHOLE_NUMBER;

fingerprint = a nonempty utf8 sequence with meaning defined by minikind.

Given this definition, the following is useful to know:

entry (aka row) - all the data for a given key.

entry[0]: The key (dirname, basename, fileid)

entry[0][0]: dirname

entry[0][1]: basename

entry[0][2]: fileid

entry[1]: The tree(s) data for this path and id combination.

entry[1][0]: The current tree

entry[1][1]: The second tree

For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:

entry[1][0][0]: minikind

entry[1][0][1]: fingerprint

entry[1][0][2]: size

entry[1][0][3]: executable

entry[1][0][4]: packed_stat

OR (for non tree-0)

entry[1][1][4]: revision_id

There may be multiple rows at the root, one per id present in the root, so the

in memory root row is now:

self._dirblocks[0] -> ('', [entry ...]),

and the entries in there are

entries[0][0]: ''

entries[0][1]: ''

entries[0][2]: file_id

entries[1][0]: The tree data for the current tree for this fileid at /

etc.

Kinds:

'r' is a relocated entry: This path is not present in this tree with this id,

but the id can be found at another location. The fingerprint is used to

point to the target location.

'a' is an absent entry: In that tree the id is not present at this path.

'd' is a directory entry: This path in this tree is a directory with the

current file id. There is no fingerprint for directories.

'f' is a file entry: As for directory, but its a file. The fingerprint is a

sha1 value.

'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the

link target.

't' is a reference to a nested subtree; the fingerprint is the referenced

revision.

Ordering:

The entries on disk and in memory are ordered according to the following keys:

directory, as a list of components

filename

file-id

100

--- Format 1 had the following different definition: ---

101

rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,

102

WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,

103

{PARENT ROW}

104

PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,

105

basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,

106

SHA1

107

108

PARENT ROW's are emitted for every parent that is not in the ghosts details

109

line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then

110

each row will have a PARENT ROW for foo and baz, but not for bar.

111

112

113

In any tree, a kind of 'moved' indicates that the fingerprint field

114

(which we treat as opaque data specific to the 'kind' anyway) has the

115

details for the id of this row in that tree.

116

117

I'm strongly tempted to add a id->path index as well, but I think that

118

where we need id->path mapping; we also usually read the whole file, so

119

I'm going to skip that for the moment, as we have the ability to locate

120

via bisect any path in any tree, and if we lookup things by path, we can

121

accumulate an id->path mapping as we go, which will tend to match what we

122

looked for.

123

124

I plan to implement this asap, so please speak up now to alter/tweak the

125

design - and once we stabilise on this, I'll update the wiki page for

126

it.

127

128

The rationale for all this is that we want fast operations for the

129

common case (diff/status/commit/merge on all files) and extremely fast

130

operations for the less common but still occurs a lot status/diff/commit

131

on specific files). Operations on specific files involve a scan for all

132

the children of a path, *in every involved tree*, which the current

133

format did not accommodate.

134

----

135

136

Design priorities:

137

1) Fast end to end use for bzr's top 5 uses cases. (commmit/diff/status/merge/???)

138

2) fall back current object model as needed.

139

3) scale usably to the largest trees known today - say 50K entries. (mozilla

140

is an example of this)

141

142

143

Locking:

144

Eventually reuse dirstate objects across locks IFF the dirstate file has not

145

been modified, but will require that we flush/ignore cached stat-hit data

146

because we won't want to restat all files on disk just because a lock was

147

acquired, yet we cannot trust the data after the previous lock was released.

148

149

Memory representation:

150

vector of all directories, and vector of the childen ?

151

i.e.

152

root_entrie = (direntry for root, [parent_direntries_for_root]),

153

dirblocks = [

154

('', ['data for achild', 'data for bchild', 'data for cchild'])

155

('dir', ['achild', 'cchild', 'echild'])

156

]

157

- single bisect to find N subtrees from a path spec

158

- in-order for serialisation - this is 'dirblock' grouping.

159

- insertion of a file '/a' affects only the '/' child-vector, that is, to

160

insert 10K elements from scratch does not generates O(N^2) memoves of a

161

single vector, rather each individual, which tends to be limited to a

162

manageable number. Will scale badly on trees with 10K entries in a

163

single directory. compare with Inventory.InventoryDirectory which has

164

a dictionary for the children. No bisect capability, can only probe for

165

exact matches, or grab all elements and sort.

166

- What's the risk of error here? Once we have the base format being processed

167

we should have a net win regardless of optimality. So we are going to

168

go with what seems reasonable.

169

open questions:

170

171

Maybe we should do a test profile of the core structure - 10K simulated

172

searches/lookups/etc?

173

174

Objects for each row?

175

The lifetime of Dirstate objects is current per lock, but see above for

176

possible extensions. The lifetime of a row from a dirstate is expected to be

177

very short in the optimistic case: which we are optimising for. For instance,

178

subtree status will determine from analysis of the disk data what rows need to

179

be examined at all, and will be able to determine from a single row whether

180

that file has altered or not, so we are aiming to process tens of thousands of

181

entries each second within the dirstate context, before exposing anything to

182

the larger codebase. This suggests we want the time for a single file

183

comparison to be < 0.1 milliseconds. That would give us 10000 paths per second

184

processed, and to scale to 100 thousand we'll another order of magnitude to do

185

that. Now, as the lifetime for all unchanged entries is the time to parse, stat

186

the file on disk, and then immediately discard, the overhead of object creation

187

becomes a significant cost.

188

189

Figures: Creating a tuple from from 3 elements was profiled at 0.0625

190

microseconds, whereas creating a object which is subclassed from tuple was

191

0.500 microseconds, and creating an object with 3 elements and slots was 3

192

microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get

193

down to 10 microseconds for the total processing - having 33% of that be object

194

creation is a huge overhead. There is a potential cost in using tuples within

195

each row which is that the conditional code to do comparisons may be slower

196

than method invocation, but method invocation is known to be slow due to stack

197

frame creation, so avoiding methods in these tight inner loops in unfortunately

198

desirable. We can consider a pyrex version of this with objects in future if

199

desired.

200

201

"""

202

203

import bisect

204

import binascii

205

import errno

206

import os

207

from stat import S_IEXEC

208

import stat

209

import struct

210

import sys

211

import time

212

import zlib

213

214

from bzrlib import (

215

cache_utf8,

216

debug,

217

errors,

218

inventory,

219

lock,

220

osutils,

221

trace,

222

)

223

from bzrlib.osutils import pathjoin

224

225

226

# compile the struct compiler we need, so as to only do it once

227

from _struct import Struct

228

_compiled_pack = Struct('>LLLLLL').pack

229

def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):

230

"""Convert stat values into a packed representation."""

231

# jam 20060614 it isn't really worth removing more entries if we

232

# are going to leave it in packed form.

233

# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms

234

# With all entries, filesize is 5.9M and read time is maybe 280ms

235

# well within the noise margin

236

237

# base64 encoding always adds a final newline, so strip it off

238

# The current version

239

return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime)

240

, st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]

241

# This is 0.060s / 1.520s faster by not encoding as much information

242

# return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]

243

# This is not strictly faster than _encode(_pack())[:-1]

244

# return '%X.%X.%X.%X.%X.%X' % (

245

# st.st_size, int(st.st_mtime), int(st.st_ctime),

246

# st.st_dev, st.st_ino, st.st_mode)

247

# Similar to the _encode(_pack('>LL'))

248

# return '%X.%X' % (int(st.st_mtime), st.st_mode)

249

250

251

class DirState(object):

252

"""Record directory and metadata state for fast access.

253

254

A dirstate is a specialised data structure for managing local working

255

tree state information. Its not yet well defined whether it is platform

256

specific, and if it is how we detect/parameterize that.

257

258

Dirstates use the usual lock_write, lock_read and unlock mechanisms.

259

Unlike most bzr disk formats, DirStates must be locked for reading, using

260

lock_read. (This is an os file lock internally.) This is necessary

261

because the file can be rewritten in place.

262

263

DirStates must be explicitly written with save() to commit changes; just

264

unlocking them does not write the changes to disk.

265

"""

266

267

_kind_to_minikind = {

268

'absent': 'a',

269

'file': 'f',

270

'directory': 'd',

271

'relocated': 'r',

272

'symlink': 'l',

273

'tree-reference': 't',

274

}

275

_minikind_to_kind = {

276

'a': 'absent',

277

'f': 'file',

278

'd': 'directory',

279

'l':'symlink',

280

'r': 'relocated',

281

't': 'tree-reference',

282

}

283

_stat_to_minikind = {

284

stat.S_IFDIR:'d',

285

stat.S_IFREG:'f',

286

stat.S_IFLNK:'l',

287

}

288

_to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain

289

# of using int conversion rather than a dict here. AND BLAME ANDREW IF

290

# it is faster.

291

292

# TODO: jam 20070221 Figure out what to do if we have a record that exceeds

293

# the BISECT_PAGE_SIZE. For now, we just have to make it large enough

294

# that we are sure a single record will always fit.

295

BISECT_PAGE_SIZE = 4096

296

297

NOT_IN_MEMORY = 0

298

IN_MEMORY_UNMODIFIED = 1

299

IN_MEMORY_MODIFIED = 2

300

301

# A pack_stat (the x's) that is just noise and will never match the output

302

# of base64 encode.

303

NULLSTAT = 'x' * 32

304

NULL_PARENT_DETAILS = ('a', '', 0, False, '')

305

306

HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'

307

HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'

308

309

def __init__(self, path):

310

"""Create a DirState object.

311

312

:param path: The path at which the dirstate file on disk should live.

313

"""

314

# _header_state and _dirblock_state represent the current state

315

# of the dirstate metadata and the per-row data respectiely.

316

# NOT_IN_MEMORY indicates that no data is in memory

317

# IN_MEMORY_UNMODIFIED indicates that what we have in memory

318

# is the same as is on disk

319

# IN_MEMORY_MODIFIED indicates that we have a modified version

320

# of what is on disk.

321

# In future we will add more granularity, for instance _dirblock_state

322

# will probably support partially-in-memory as a separate variable,

323

# allowing for partially-in-memory unmodified and partially-in-memory

324

# modified states.

325

self._header_state = DirState.NOT_IN_MEMORY

326

self._dirblock_state = DirState.NOT_IN_MEMORY

327

# If true, an error has been detected while updating the dirstate, and

328

# for safety we're not going to commit to disk.

329

self._changes_aborted = False

330

self._dirblocks = []

331

self._ghosts = []

332

self._parents = []

333

self._state_file = None

334

self._filename = path

335

self._lock_token = None

336

self._lock_state = None

337

self._id_index = None

338

# a map from packed_stat to sha's.

339

self._packed_stat_index = None

340

self._end_of_header = None

341

self._cutoff_time = None

342

self._split_path_cache = {}

343

self._bisect_page_size = DirState.BISECT_PAGE_SIZE

344

if 'hashcache' in debug.debug_flags:

345

self._sha1_file = self._sha1_file_and_mutter

346

else:

347

self._sha1_file = osutils.sha_file_by_name

348

# These two attributes provide a simple cache for lookups into the

349

# dirstate in-memory vectors. By probing respectively for the last

350

# block, and for the next entry, we save nearly 2 bisections per path

351

# during commit.

352

self._last_block_index = None

353

self._last_entry_index = None

354

355

def __repr__(self):

356

return "%s(%r)" % \

357

(self.__class__.__name__, self._filename)

358

359

def add(self, path, file_id, kind, stat, fingerprint):

360

"""Add a path to be tracked.

361

362

:param path: The path within the dirstate - '' is the root, 'foo' is the

363

path foo within the root, 'foo/bar' is the path bar within foo

364

within the root.

365

:param file_id: The file id of the path being added.

366

:param kind: The kind of the path, as a string like 'file',

367

'directory', etc.

368

:param stat: The output of os.lstat for the path.

369

:param fingerprint: The sha value of the file,

370

or the target of a symlink,

371

or the referenced revision id for tree-references,

372

or '' for directories.

373

"""

374

# adding a file:

375

# find the block its in.

376

# find the location in the block.

377

# check its not there

378

# add it.

379

#------- copied from inventory.ensure_normalized_name - keep synced.

380

# --- normalized_filename wants a unicode basename only, so get one.

381

dirname, basename = osutils.split(path)

382

# we dont import normalized_filename directly because we want to be

383

# able to change the implementation at runtime for tests.

384

norm_name, can_access = osutils.normalized_filename(basename)

385

if norm_name != basename:

386

if can_access:

387

basename = norm_name

388

else:

389

raise errors.InvalidNormalization(path)

390

# you should never have files called . or ..; just add the directory

391

# in the parent, or according to the special treatment for the root

392

if basename == '.' or basename == '..':

393

raise errors.InvalidEntryName(path)

394

# now that we've normalised, we need the correct utf8 path and

395

# dirname and basename elements. This single encode and split should be

396

# faster than three separate encodes.

397

utf8path = (dirname + '/' + basename).strip('/').encode('utf8')

398

dirname, basename = osutils.split(utf8path)

399

# uses __class__ for speed; the check is needed for safety

400

if file_id.__class__ is not str:

401

raise AssertionError(

402

"must be a utf8 file_id not %s" % (type(file_id), ))

403

# Make sure the file_id does not exist in this tree

404

file_id_entry = self._get_entry(0, fileid_utf8=file_id)

405

if file_id_entry != (None, None):

406

path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])

407

kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]

408

info = '%s:%s' % (kind, path)

409

raise errors.DuplicateFileId(file_id, info)

410

first_key = (dirname, basename, '')

411

block_index, present = self._find_block_index_from_key(first_key)

412

if present:

413

# check the path is not in the tree

414

block = self._dirblocks[block_index][1]

415

entry_index, _ = self._find_entry_index(first_key, block)

416

while (entry_index < len(block) and

417

block[entry_index][0][0:2] == first_key[0:2]):

418

if block[entry_index][1][0][0] not in 'ar':

419

# this path is in the dirstate in the current tree.

420

raise Exception, "adding already added path!"

421

entry_index += 1

422

else:

423

# The block where we want to put the file is not present. But it

424

# might be because the directory was empty, or not loaded yet. Look

425

# for a parent entry, if not found, raise NotVersionedError

426

parent_dir, parent_base = osutils.split(dirname)

427

parent_block_idx, parent_entry_idx, _, parent_present = \

428

self._get_block_entry_index(parent_dir, parent_base, 0)

429

if not parent_present:

430

raise errors.NotVersionedError(path, str(self))

431

self._ensure_block(parent_block_idx, parent_entry_idx, dirname)

432

block = self._dirblocks[block_index][1]

433

entry_key = (dirname, basename, file_id)

434

if stat is None:

435

size = 0

436

packed_stat = DirState.NULLSTAT

437

else:

438

size = stat.st_size

439

packed_stat = pack_stat(stat)

440

parent_info = self._empty_parent_info()

441

minikind = DirState._kind_to_minikind[kind]

442

if kind == 'file':

443

entry_data = entry_key, [

444

(minikind, fingerprint, size, False, packed_stat),

445

] + parent_info

446

elif kind == 'directory':

447

entry_data = entry_key, [

448

(minikind, '', 0, False, packed_stat),

449

] + parent_info

450

elif kind == 'symlink':

451

entry_data = entry_key, [

452

(minikind, fingerprint, size, False, packed_stat),

453

] + parent_info

454

elif kind == 'tree-reference':

455

entry_data = entry_key, [

456

(minikind, fingerprint, 0, False, packed_stat),

457

] + parent_info

458

else:

459

raise errors.BzrError('unknown kind %r' % kind)

460

entry_index, present = self._find_entry_index(entry_key, block)

461

if not present:

462

block.insert(entry_index, entry_data)

463

else:

464

if block[entry_index][1][0][0] != 'a':

465

raise AssertionError(" %r(%r) already added" % (basename, file_id))

466

block[entry_index][1][0] = entry_data[1][0]

467

468

if kind == 'directory':

469

# insert a new dirblock

470

self._ensure_block(block_index, entry_index, utf8path)

471

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

472

if self._id_index:

473

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

474

475

def _bisect(self, paths):

476

"""Bisect through the disk structure for specific rows.

477

478

:param paths: A list of paths to find

479

:return: A dict mapping path => entries for found entries. Missing

480

entries will not be in the map.

481

The list is not sorted, and entries will be populated

482

based on when they were read.

483

"""

484

self._requires_lock()

485

# We need the file pointer to be right after the initial header block

486

self._read_header_if_needed()

487

# If _dirblock_state was in memory, we should just return info from

488

# there, this function is only meant to handle when we want to read

489

# part of the disk.

490

if self._dirblock_state != DirState.NOT_IN_MEMORY:

491

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

492

493

# The disk representation is generally info + '\0\n\0' at the end. But

494

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

495

# Because it means we can sync on the '\n'

496

state_file = self._state_file

497

file_size = os.fstat(state_file.fileno()).st_size

498

# We end up with 2 extra fields, we should have a trailing '\n' to

499

# ensure that we read the whole record, and we should have a precursur

500

# '' which ensures that we start after the previous '\n'

501

entry_field_count = self._fields_per_entry() + 1

502

503

low = self._end_of_header

504

high = file_size - 1 # Ignore the final '\0'

505

# Map from (dir, name) => entry

506

found = {}

507

508

# Avoid infinite seeking

509

max_count = 30*len(paths)

510

count = 0

511

# pending is a list of places to look.

512

# each entry is a tuple of low, high, dir_names

513

# low -> the first byte offset to read (inclusive)

514

# high -> the last byte offset (inclusive)

515

# dir_names -> The list of (dir, name) pairs that should be found in

516

# the [low, high] range

517

pending = [(low, high, paths)]

518

519

page_size = self._bisect_page_size

520

521

fields_to_entry = self._get_fields_to_entry()

522

523

while pending:

524

low, high, cur_files = pending.pop()

525

526

if not cur_files or low >= high:

527

# Nothing to find

528

continue

529

530

count += 1

531

if count > max_count:

532

raise errors.BzrError('Too many seeks, most likely a bug.')

533

534

mid = max(low, (low+high-page_size)/2)

535

536

state_file.seek(mid)

537

# limit the read size, so we don't end up reading data that we have

538

# already read.

539

read_size = min(page_size, (high-mid)+1)

540

block = state_file.read(read_size)

541

542

start = mid

543

entries = block.split('\n')

544

545

if len(entries) < 2:

546

# We didn't find a '\n', so we cannot have found any records.

547

# So put this range back and try again. But we know we have to

548

# increase the page size, because a single read did not contain

549

# a record break (so records must be larger than page_size)

550

page_size *= 2

551

pending.append((low, high, cur_files))

552

continue

553

554

# Check the first and last entries, in case they are partial, or if

555

# we don't care about the rest of this page

556

first_entry_num = 0

557

first_fields = entries[0].split('\0')

558

if len(first_fields) < entry_field_count:

559

# We didn't get the complete first entry

560

# so move start, and grab the next, which

561

# should be a full entry

562

start += len(entries[0])+1

563

first_fields = entries[1].split('\0')

564

first_entry_num = 1

565

566

if len(first_fields) <= 2:

567

# We didn't even get a filename here... what do we do?

568

# Try a large page size and repeat this query

569

page_size *= 2

570

pending.append((low, high, cur_files))

571

continue

572

else:

573

# Find what entries we are looking for, which occur before and

574

# after this first record.

575

after = start

576

if first_fields[1]:

577

first_path = first_fields[1] + '/' + first_fields[2]

578

else:

579

first_path = first_fields[2]

580

first_loc = _bisect_path_left(cur_files, first_path)

581

582

# These exist before the current location

583

pre = cur_files[:first_loc]

584

# These occur after the current location, which may be in the

585

# data we read, or might be after the last entry

586

post = cur_files[first_loc:]

587

588

if post and len(first_fields) >= entry_field_count:

589

# We have files after the first entry

590

591

# Parse the last entry

592

last_entry_num = len(entries)-1

593

last_fields = entries[last_entry_num].split('\0')

594

if len(last_fields) < entry_field_count:

595

# The very last hunk was not complete,

596

# read the previous hunk

597

after = mid + len(block) - len(entries[-1])

598

last_entry_num -= 1

599

last_fields = entries[last_entry_num].split('\0')

600

else:

601

after = mid + len(block)

602

603

if last_fields[1]:

604

last_path = last_fields[1] + '/' + last_fields[2]

605

else:

606

last_path = last_fields[2]

607

last_loc = _bisect_path_right(post, last_path)

608

609

middle_files = post[:last_loc]

610

post = post[last_loc:]

611

612

if middle_files:

613

# We have files that should occur in this block

614

# (>= first, <= last)

615

# Either we will find them here, or we can mark them as

616

# missing.

617

618

if middle_files[0] == first_path:

619

# We might need to go before this location

620

pre.append(first_path)

621

if middle_files[-1] == last_path:

622

post.insert(0, last_path)

623

624

# Find out what paths we have

625

paths = {first_path:[first_fields]}

626

# last_path might == first_path so we need to be

627

# careful if we should append rather than overwrite

628

if last_entry_num != first_entry_num:

629

paths.setdefault(last_path, []).append(last_fields)

630

for num in xrange(first_entry_num+1, last_entry_num):

631

# TODO: jam 20070223 We are already splitting here, so

632

# shouldn't we just split the whole thing rather

633

# than doing the split again in add_one_record?

634

fields = entries[num].split('\0')

635

if fields[1]:

636

path = fields[1] + '/' + fields[2]

637

else:

638

path = fields[2]

639

paths.setdefault(path, []).append(fields)

640

641

for path in middle_files:

642

for fields in paths.get(path, []):

643

# offset by 1 because of the opening '\0'

644

# consider changing fields_to_entry to avoid the

645

# extra list slice

646

entry = fields_to_entry(fields[1:])

647

found.setdefault(path, []).append(entry)

648

649

# Now we have split up everything into pre, middle, and post, and

650

# we have handled everything that fell in 'middle'.

651

# We add 'post' first, so that we prefer to seek towards the

652

# beginning, so that we will tend to go as early as we need, and

653

# then only seek forward after that.

654

if post:

655

pending.append((after, high, post))

656

if pre:

657

pending.append((low, start-1, pre))

658

659

# Consider that we may want to return the directory entries in sorted

660

# order. For now, we just return them in whatever order we found them,

661

# and leave it up to the caller if they care if it is ordered or not.

662

return found

663

664

def _bisect_dirblocks(self, dir_list):

665

"""Bisect through the disk structure to find entries in given dirs.

666

667

_bisect_dirblocks is meant to find the contents of directories, which

668

differs from _bisect, which only finds individual entries.

669

670

:param dir_list: A sorted list of directory names ['', 'dir', 'foo'].

671

:return: A map from dir => entries_for_dir

672

"""

673

# TODO: jam 20070223 A lot of the bisecting logic could be shared

674

# between this and _bisect. It would require parameterizing the

675

# inner loop with a function, though. We should evaluate the

676

# performance difference.

677

self._requires_lock()

678

# We need the file pointer to be right after the initial header block

679

self._read_header_if_needed()

680

# If _dirblock_state was in memory, we should just return info from

681

# there, this function is only meant to handle when we want to read

682

# part of the disk.

683

if self._dirblock_state != DirState.NOT_IN_MEMORY:

684

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

685

# The disk representation is generally info + '\0\n\0' at the end. But

686

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

687

# Because it means we can sync on the '\n'

688

state_file = self._state_file

689

file_size = os.fstat(state_file.fileno()).st_size

690

# We end up with 2 extra fields, we should have a trailing '\n' to

691

# ensure that we read the whole record, and we should have a precursur

692

# '' which ensures that we start after the previous '\n'

693

entry_field_count = self._fields_per_entry() + 1

694

695

low = self._end_of_header

696

high = file_size - 1 # Ignore the final '\0'

697

# Map from dir => entry

698

found = {}

699

700

# Avoid infinite seeking

701

max_count = 30*len(dir_list)

702

count = 0

703

# pending is a list of places to look.

704

# each entry is a tuple of low, high, dir_names

705

# low -> the first byte offset to read (inclusive)

706

# high -> the last byte offset (inclusive)

707

# dirs -> The list of directories that should be found in

708

# the [low, high] range

709

pending = [(low, high, dir_list)]

710

711

page_size = self._bisect_page_size

712

713

fields_to_entry = self._get_fields_to_entry()

714

715

while pending:

716

low, high, cur_dirs = pending.pop()

717

718

if not cur_dirs or low >= high:

719

# Nothing to find

720

continue

721

722

count += 1

723

if count > max_count:

724

raise errors.BzrError('Too many seeks, most likely a bug.')

725

726

mid = max(low, (low+high-page_size)/2)

727

728

state_file.seek(mid)

729

# limit the read size, so we don't end up reading data that we have

730

# already read.

731

read_size = min(page_size, (high-mid)+1)

732

block = state_file.read(read_size)

733

734

start = mid

735

entries = block.split('\n')

736

737

if len(entries) < 2:

738

# We didn't find a '\n', so we cannot have found any records.

739

# So put this range back and try again. But we know we have to

740

# increase the page size, because a single read did not contain

741

# a record break (so records must be larger than page_size)

742

page_size *= 2

743

pending.append((low, high, cur_dirs))

744

continue

745

746

# Check the first and last entries, in case they are partial, or if

747

# we don't care about the rest of this page

748

first_entry_num = 0

749

first_fields = entries[0].split('\0')

750

if len(first_fields) < entry_field_count:

751

# We didn't get the complete first entry

752

# so move start, and grab the next, which

753

# should be a full entry

754

start += len(entries[0])+1

755

first_fields = entries[1].split('\0')

756

first_entry_num = 1

757

758

if len(first_fields) <= 1:

759

# We didn't even get a dirname here... what do we do?

760

# Try a large page size and repeat this query

761

page_size *= 2

762

pending.append((low, high, cur_dirs))

763

continue

764

else:

765

# Find what entries we are looking for, which occur before and

766

# after this first record.

767

after = start

768

first_dir = first_fields[1]

769

first_loc = bisect.bisect_left(cur_dirs, first_dir)

770

771

# These exist before the current location

772

pre = cur_dirs[:first_loc]

773

# These occur after the current location, which may be in the

774

# data we read, or might be after the last entry

775

post = cur_dirs[first_loc:]

776

777

if post and len(first_fields) >= entry_field_count:

778

# We have records to look at after the first entry

779

780

# Parse the last entry

781

last_entry_num = len(entries)-1

782

last_fields = entries[last_entry_num].split('\0')

783

if len(last_fields) < entry_field_count:

784

# The very last hunk was not complete,

785

# read the previous hunk

786

after = mid + len(block) - len(entries[-1])

787

last_entry_num -= 1

788

last_fields = entries[last_entry_num].split('\0')

789

else:

790

after = mid + len(block)

791

792

last_dir = last_fields[1]

793

last_loc = bisect.bisect_right(post, last_dir)

794

795

middle_files = post[:last_loc]

796

post = post[last_loc:]

797

798

if middle_files:

799

# We have files that should occur in this block

800

# (>= first, <= last)

801

# Either we will find them here, or we can mark them as

802

# missing.

803

804

if middle_files[0] == first_dir:

805

# We might need to go before this location

806

pre.append(first_dir)

807

if middle_files[-1] == last_dir:

808

post.insert(0, last_dir)

809

810

# Find out what paths we have

811

paths = {first_dir:[first_fields]}

812

# last_dir might == first_dir so we need to be

813

# careful if we should append rather than overwrite

814

if last_entry_num != first_entry_num:

815

paths.setdefault(last_dir, []).append(last_fields)

816

for num in xrange(first_entry_num+1, last_entry_num):

817

# TODO: jam 20070223 We are already splitting here, so

818

# shouldn't we just split the whole thing rather

819

# than doing the split again in add_one_record?

820

fields = entries[num].split('\0')

821

paths.setdefault(fields[1], []).append(fields)

822

823

for cur_dir in middle_files:

824

for fields in paths.get(cur_dir, []):

825

# offset by 1 because of the opening '\0'

826

# consider changing fields_to_entry to avoid the

827

# extra list slice

828

entry = fields_to_entry(fields[1:])

829

found.setdefault(cur_dir, []).append(entry)

830

831

# Now we have split up everything into pre, middle, and post, and

832

# we have handled everything that fell in 'middle'.

833

# We add 'post' first, so that we prefer to seek towards the

834

# beginning, so that we will tend to go as early as we need, and

835

# then only seek forward after that.

836

if post:

837

pending.append((after, high, post))

838

if pre:

839

pending.append((low, start-1, pre))

840

841

return found

842

843

def _bisect_recursive(self, paths):

844

"""Bisect for entries for all paths and their children.

845

846

This will use bisect to find all records for the supplied paths. It

847

will then continue to bisect for any records which are marked as

848

directories. (and renames?)

849

850

:param paths: A sorted list of (dir, name) pairs

851

eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]

852

:return: A dictionary mapping (dir, name, file_id) => [tree_info]

853

"""

854

# Map from (dir, name, file_id) => [tree_info]

855

found = {}

856

857

found_dir_names = set()

858

859

# Directories that have been read

860

processed_dirs = set()

861

# Get the ball rolling with the first bisect for all entries.

862

newly_found = self._bisect(paths)

863

864

while newly_found:

865

# Directories that need to be read

866

pending_dirs = set()

867

paths_to_search = set()

868

for entry_list in newly_found.itervalues():

869

for dir_name_id, trees_info in entry_list:

870

found[dir_name_id] = trees_info

871

found_dir_names.add(dir_name_id[:2])

872

is_dir = False

873

for tree_info in trees_info:

874

minikind = tree_info[0]

875

if minikind == 'd':

876

if is_dir:

877

# We already processed this one as a directory,

878

# we don't need to do the extra work again.

879

continue

880

subdir, name, file_id = dir_name_id

881

path = osutils.pathjoin(subdir, name)

882

is_dir = True

883

if path not in processed_dirs:

884

pending_dirs.add(path)

885

elif minikind == 'r':

886

# Rename, we need to directly search the target

887

# which is contained in the fingerprint column

888

dir_name = osutils.split(tree_info[1])

889

if dir_name[0] in pending_dirs:

890

# This entry will be found in the dir search

891

continue

892

if dir_name not in found_dir_names:

893

paths_to_search.add(tree_info[1])

894

# Now we have a list of paths to look for directly, and

895

# directory blocks that need to be read.

896

# newly_found is mixing the keys between (dir, name) and path

897

# entries, but that is okay, because we only really care about the

898

# targets.

899

newly_found = self._bisect(sorted(paths_to_search))

900

newly_found.update(self._bisect_dirblocks(sorted(pending_dirs)))

901

processed_dirs.update(pending_dirs)

902

return found

903

904

def _discard_merge_parents(self):

905

"""Discard any parents trees beyond the first.

906

907

Note that if this fails the dirstate is corrupted.

908

909

After this function returns the dirstate contains 2 trees, neither of

910

which are ghosted.

911

"""

912

self._read_header_if_needed()

913

parents = self.get_parent_ids()

914

if len(parents) < 1:

915

return

916

# only require all dirblocks if we are doing a full-pass removal.

917

self._read_dirblocks_if_needed()

918

dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')])

919

def iter_entries_removable():

920

for block in self._dirblocks:

921

deleted_positions = []

922

for pos, entry in enumerate(block[1]):

923

yield entry

924

if (entry[1][0][0], entry[1][1][0]) in dead_patterns:

925

deleted_positions.append(pos)

926

if deleted_positions:

927

if len(deleted_positions) == len(block[1]):

928

del block[1][:]

929

else:

930

for pos in reversed(deleted_positions):

931

del block[1][pos]

932

# if the first parent is a ghost:

933

if parents[0] in self.get_ghosts():

934

empty_parent = [DirState.NULL_PARENT_DETAILS]

935

for entry in iter_entries_removable():

936

entry[1][1:] = empty_parent

937

else:

938

for entry in iter_entries_removable():

939

del entry[1][2:]

940

941

self._ghosts = []

942

self._parents = [parents[0]]

943

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

944

self._header_state = DirState.IN_MEMORY_MODIFIED

945

946

def _empty_parent_info(self):

947

return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -

948

len(self._ghosts))

949

950

def _ensure_block(self, parent_block_index, parent_row_index, dirname):

951

"""Ensure a block for dirname exists.

952

953

This function exists to let callers which know that there is a

954

directory dirname ensure that the block for it exists. This block can

955

fail to exist because of demand loading, or because a directory had no

956

children. In either case it is not an error. It is however an error to

957

call this if there is no parent entry for the directory, and thus the

958

function requires the coordinates of such an entry to be provided.

959

960

The root row is special cased and can be indicated with a parent block

961

and row index of -1

962

963

:param parent_block_index: The index of the block in which dirname's row

964

exists.

965

:param parent_row_index: The index in the parent block where the row

966

exists.

967

:param dirname: The utf8 dirname to ensure there is a block for.

968

:return: The index for the block.

969

"""

970

if dirname == '' and parent_row_index == 0 and parent_block_index == 0:

971

# This is the signature of the root row, and the

972

# contents-of-root row is always index 1

973

return 1

974

# the basename of the directory must be the end of its full name.

975

if not (parent_block_index == -1 and

976

parent_block_index == -1 and dirname == ''):

977

if not dirname.endswith(

978

self._dirblocks[parent_block_index][1][parent_row_index][0][1]):

979

raise AssertionError("bad dirname %r" % dirname)

980

block_index, present = self._find_block_index_from_key((dirname, '', ''))

981

if not present:

982

## In future, when doing partial parsing, this should load and

983

# populate the entire block.

984

self._dirblocks.insert(block_index, (dirname, []))

985

return block_index

986

987

def _entries_to_current_state(self, new_entries):

988

"""Load new_entries into self.dirblocks.

989

990

Process new_entries into the current state object, making them the active

991

state. The entries are grouped together by directory to form dirblocks.

992

993

:param new_entries: A sorted list of entries. This function does not sort

994

to prevent unneeded overhead when callers have a sorted list already.

995

:return: Nothing.

996

"""

997

if new_entries[0][0][0:2] != ('', ''):

998

raise AssertionError(

999

"Missing root row %r" % (new_entries[0][0],))

1000

# The two blocks here are deliberate: the root block and the

1001

# contents-of-root block.

1002

self._dirblocks = [('', []), ('', [])]

1003

current_block = self._dirblocks[0][1]

1004

current_dirname = ''

1005

root_key = ('', '')

1006

append_entry = current_block.append

1007

for entry in new_entries:

1008

if entry[0][0] != current_dirname:

1009

# new block - different dirname

1010

current_block = []

1011

current_dirname = entry[0][0]

1012

self._dirblocks.append((current_dirname, current_block))

1013

append_entry = current_block.append

1014

# append the entry to the current block

1015

append_entry(entry)

1016

self._split_root_dirblock_into_contents()

1017

1018

def _split_root_dirblock_into_contents(self):

1019

"""Split the root dirblocks into root and contents-of-root.

1020

1021

After parsing by path, we end up with root entries and contents-of-root

1022

entries in the same block. This loop splits them out again.

1023

"""

1024

# The above loop leaves the "root block" entries mixed with the

1025

# "contents-of-root block". But we don't want an if check on

1026

# all entries, so instead we just fix it up here.

1027

if self._dirblocks[1] != ('', []):

1028

raise ValueError("bad dirblock start %r" % (self._dirblocks[1],))

1029

root_block = []

1030

contents_of_root_block = []

1031

for entry in self._dirblocks[0][1]:

1032

if not entry[0][1]: # This is a root entry

1033

root_block.append(entry)

1034

else:

1035

contents_of_root_block.append(entry)

1036

self._dirblocks[0] = ('', root_block)

1037

self._dirblocks[1] = ('', contents_of_root_block)

1038

1039

def _entry_to_line(self, entry):

1040

"""Serialize entry to a NULL delimited line ready for _get_output_lines.

1041

1042

:param entry: An entry_tuple as defined in the module docstring.

1043

"""

1044

entire_entry = list(entry[0])

1045

for tree_number, tree_data in enumerate(entry[1]):

1046

# (minikind, fingerprint, size, executable, tree_specific_string)

1047

entire_entry.extend(tree_data)

1048

# 3 for the key, 5 for the fields per tree.

1049

tree_offset = 3 + tree_number * 5

1050

# minikind

1051

entire_entry[tree_offset + 0] = tree_data[0]

1052

# size

1053

entire_entry[tree_offset + 2] = str(tree_data[2])

1054

# executable

1055

entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]

1056

return '\0'.join(entire_entry)

1057

1058

def _fields_per_entry(self):

1059

"""How many null separated fields should be in each entry row.

1060

1061

Each line now has an extra '\n' field which is not used

1062

so we just skip over it

1063

entry size:

1064

3 fields for the key

1065

+ number of fields per tree_data (5) * tree count

1066

+ newline

1067

"""

1068

tree_count = 1 + self._num_present_parents()

1069

return 3 + 5 * tree_count + 1

1070

1071

def _find_block(self, key, add_if_missing=False):

1072

"""Return the block that key should be present in.

1073

1074

:param key: A dirstate entry key.

1075

:return: The block tuple.

1076

"""

1077

block_index, present = self._find_block_index_from_key(key)

1078

if not present:

1079

if not add_if_missing:

1080

# check to see if key is versioned itself - we might want to

1081

# add it anyway, because dirs with no entries dont get a

1082

# dirblock at parse time.

1083

# This is an uncommon branch to take: most dirs have children,

1084

# and most code works with versioned paths.

1085

parent_base, parent_name = osutils.split(key[0])

1086

if not self._get_block_entry_index(parent_base, parent_name, 0)[3]:

1087

# some parent path has not been added - its an error to add

1088

# this child

1089

raise errors.NotVersionedError(key[0:2], str(self))

1090

self._dirblocks.insert(block_index, (key[0], []))

1091

return self._dirblocks[block_index]

1092

1093

def _find_block_index_from_key(self, key):

1094

"""Find the dirblock index for a key.

1095

1096

:return: The block index, True if the block for the key is present.

1097

"""

1098

if key[0:2] == ('', ''):

1099

return 0, True

1100

try:

1101

if (self._last_block_index is not None and

1102

self._dirblocks[self._last_block_index][0] == key[0]):

1103

return self._last_block_index, True

1104

except IndexError:

1105

pass

1106

block_index = bisect_dirblock(self._dirblocks, key[0], 1,

1107

cache=self._split_path_cache)

1108

# _right returns one-past-where-key is so we have to subtract

1109

# one to use it. we use _right here because there are two

1110

# '' blocks - the root, and the contents of root

1111

# we always have a minimum of 2 in self._dirblocks: root and

1112

# root-contents, and for '', we get 2 back, so this is

1113

# simple and correct:

1114

present = (block_index < len(self._dirblocks) and

1115

self._dirblocks[block_index][0] == key[0])

1116

self._last_block_index = block_index

1117

# Reset the entry index cache to the beginning of the block.

1118

self._last_entry_index = -1

1119

return block_index, present

1120

1121

def _find_entry_index(self, key, block):

1122

"""Find the entry index for a key in a block.

1123

1124

:return: The entry index, True if the entry for the key is present.

1125

"""

1126

len_block = len(block)

1127

try:

1128

if self._last_entry_index is not None:

1129

# mini-bisect here.

1130

entry_index = self._last_entry_index + 1

1131

# A hit is when the key is after the last slot, and before or

1132

# equal to the next slot.

1133

if ((entry_index > 0 and block[entry_index - 1][0] < key) and

1134

key <= block[entry_index][0]):

1135

self._last_entry_index = entry_index

1136

present = (block[entry_index][0] == key)

1137

return entry_index, present

1138

except IndexError:

1139

pass

1140

entry_index = bisect.bisect_left(block, (key, []))

1141

present = (entry_index < len_block and

1142

block[entry_index][0] == key)

1143

self._last_entry_index = entry_index

1144

return entry_index, present

1145

1146

@staticmethod

1147

def from_tree(tree, dir_state_filename):

1148

"""Create a dirstate from a bzr Tree.

1149

1150

:param tree: The tree which should provide parent information and

1151

inventory ids.

1152

:return: a DirState object which is currently locked for writing.

1153

(it was locked by DirState.initialize)

1154

"""

1155

result = DirState.initialize(dir_state_filename)

1156

try:

1157

tree.lock_read()

1158

try:

1159

parent_ids = tree.get_parent_ids()

1160

num_parents = len(parent_ids)

1161

parent_trees = []

1162

for parent_id in parent_ids:

1163

parent_tree = tree.branch.repository.revision_tree(parent_id)

1164

parent_trees.append((parent_id, parent_tree))

1165

parent_tree.lock_read()

1166

result.set_parent_trees(parent_trees, [])

1167

result.set_state_from_inventory(tree.inventory)

1168

finally:

1169

for revid, parent_tree in parent_trees:

1170

parent_tree.unlock()

1171

tree.unlock()

1172

except:

1173

# The caller won't have a chance to unlock this, so make sure we

1174

# cleanup ourselves

1175

result.unlock()

1176

raise

1177

return result

1178

1179

def update_by_delta(self, delta):

1180

"""Apply an inventory delta to the dirstate for tree 0

1181

1182

:param delta: An inventory delta. See Inventory.apply_delta for

1183

details.

1184

"""

1185

self._read_dirblocks_if_needed()

1186

insertions = {}

1187

removals = {}

1188

for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):

1189

if (file_id in insertions) or (file_id in removals):

1190

raise AssertionError("repeated file id in delta %r" % (file_id,))

1191

if old_path is not None:

1192

old_path = old_path.encode('utf-8')

1193

removals[file_id] = old_path

1194

if new_path is not None:

1195

new_path = new_path.encode('utf-8')

1196

dirname, basename = osutils.split(new_path)

1197

key = (dirname, basename, file_id)

1198

minikind = DirState._kind_to_minikind[inv_entry.kind]

1199

if minikind == 't':

1200

fingerprint = inv_entry.reference_revision

1201

else:

1202

fingerprint = ''

1203

insertions[file_id] = (key, minikind, inv_entry.executable,

1204

fingerprint, new_path)

1205

# Transform moves into delete+add pairs

1206

if None not in (old_path, new_path):

1207

for child in self._iter_child_entries(0, old_path):

1208

if child[0][2] in insertions or child[0][2] in removals:

1209

continue

1210

child_dirname = child[0][0]

1211

child_basename = child[0][1]

1212

minikind = child[1][0][0]

1213

fingerprint = child[1][0][4]

1214

executable = child[1][0][3]

1215

old_child_path = osutils.pathjoin(child[0][0],

1216

child[0][1])

1217

removals[child[0][2]] = old_child_path

1218

child_suffix = child_dirname[len(old_path):]

1219

new_child_dirname = (new_path + child_suffix)

1220

key = (new_child_dirname, child_basename, child[0][2])

1221

new_child_path = os.path.join(new_child_dirname,

1222

child_basename)

1223

insertions[child[0][2]] = (key, minikind, executable,

1224

fingerprint, new_child_path)

1225

self._apply_removals(removals.values())

1226

self._apply_insertions(insertions.values())

1227

1228

def _apply_removals(self, removals):

1229

for path in sorted(removals, reverse=True):

1230

dirname, basename = osutils.split(path)

1231

block_i, entry_i, d_present, f_present = \

1232

self._get_block_entry_index(dirname, basename, 0)

1233

entry = self._dirblocks[block_i][1][entry_i]

1234

self._make_absent(entry)

1235

# See if we have a malformed delta: deleting a directory must not

1236

# leave crud behind. This increases the number of bisects needed

1237

# substantially, but deletion or renames of large numbers of paths

1238

# is rare enough it shouldn't be an issue (famous last words?) RBC

1239

# 20080730.

1240

block_i, entry_i, d_present, f_present = \

1241

self._get_block_entry_index(path, '', 0)

1242

if d_present:

1243

# The dir block is still present in the dirstate; this could

1244

# be due to it being in a parent tree, or a corrupt delta.

1245

for child_entry in self._dirblocks[block_i][1]:

1246

if child_entry[1][0][0] not in ('r', 'a'):

1247

raise errors.InconsistentDelta(path, entry[0][2],

1248

"The file id was deleted but its children were "

1249

"not deleted.")

1250

1251

def _apply_insertions(self, adds):

1252

for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):

1253

self.update_minimal(key, minikind, executable, fingerprint,

1254

path_utf8=path_utf8)

1255

1256

def update_basis_by_delta(self, delta, new_revid):

1257

"""Update the parents of this tree after a commit.

1258

1259

This gives the tree one parent, with revision id new_revid. The

1260

inventory delta is applied to the current basis tree to generate the

1261

inventory for the parent new_revid, and all other parent trees are

1262

discarded.

1263

1264

Note that an exception during the operation of this method will leave

1265

the dirstate in a corrupt state where it should not be saved.

1266

1267

Finally, we expect all changes to be synchronising the basis tree with

1268

the working tree.

1269

1270

:param new_revid: The new revision id for the trees parent.

1271

:param delta: An inventory delta (see apply_inventory_delta) describing

1272

the changes from the current left most parent revision to new_revid.

1273

"""

1274

self._read_dirblocks_if_needed()

1275

self._discard_merge_parents()

1276

if self._ghosts != []:

1277

raise NotImplementedError(self.update_basis_by_delta)

1278

if len(self._parents) == 0:

1279

# setup a blank tree, the most simple way.

1280

empty_parent = DirState.NULL_PARENT_DETAILS

1281

for entry in self._iter_entries():

1282

entry[1].append(empty_parent)

1283

self._parents.append(new_revid)

1284

1285

self._parents[0] = new_revid

1286

1287

delta = sorted(delta, reverse=True)

1288

adds = []

1289

changes = []

1290

deletes = []

1291

# The paths this function accepts are unicode and must be encoded as we

1292

# go.

1293

encode = cache_utf8.encode

1294

inv_to_entry = self._inv_entry_to_details

1295

# delta is now (deletes, changes), (adds) in reverse lexographical

1296

# order.

1297

# deletes in reverse lexographic order are safe to process in situ.

1298

# renames are not, as a rename from any path could go to a path

1299

# lexographically lower, so we transform renames into delete, add pairs,

1300

# expanding them recursively as needed.

1301

# At the same time, to reduce interface friction we convert the input

1302

# inventory entries to dirstate.

1303

root_only = ('', '')

1304

for old_path, new_path, file_id, inv_entry in delta:

1305

if old_path is None:

1306

adds.append((None, encode(new_path), file_id,

1307

inv_to_entry(inv_entry), True))

1308

elif new_path is None:

1309

deletes.append((encode(old_path), None, file_id, None, True))

1310

elif (old_path, new_path) != root_only:

1311

# Renames:

1312

# Because renames must preserve their children we must have

1313

# processed all relocations and removes before hand. The sort

1314

# order ensures we've examined the child paths, but we also

1315

# have to execute the removals, or the split to an add/delete

1316

# pair will result in the deleted item being reinserted, or

1317

# renamed items being reinserted twice - and possibly at the

1318

# wrong place. Splitting into a delete/add pair also simplifies

1319

# the handling of entries with ('f', ...), ('r' ...) because

1320

# the target of the 'r' is old_path here, and we add that to

1321

# deletes, meaning that the add handler does not need to check

1322

# for 'r' items on every pass.

1323

self._update_basis_apply_deletes(deletes)

1324

deletes = []

1325

new_path_utf8 = encode(new_path)

1326

# Split into an add/delete pair recursively.

1327

adds.append((None, new_path_utf8, file_id,

1328

inv_to_entry(inv_entry), False))

1329

# Expunge deletes that we've seen so that deleted/renamed

1330

# children of a rename directory are handled correctly.

1331

new_deletes = reversed(list(self._iter_child_entries(1,

1332

encode(old_path))))

1333

# Remove the current contents of the tree at orig_path, and

1334

# reinsert at the correct new path.

1335

for entry in new_deletes:

1336

if entry[0][0]:

1337

source_path = entry[0][0] + '/' + entry[0][1]

1338

else:

1339

source_path = entry[0][1]

1340

if new_path_utf8:

1341

target_path = new_path_utf8 + source_path[len(old_path):]

1342

else:

1343

if old_path == '':

1344

raise AssertionError("cannot rename directory to"

1345

" itself")

1346

target_path = source_path[len(old_path) + 1:]

1347

adds.append((None, target_path, entry[0][2], entry[1][1], False))

1348

deletes.append(

1349

(source_path, target_path, entry[0][2], None, False))

1350

deletes.append(

1351

(encode(old_path), new_path, file_id, None, False))

1352

else:

1353

# changes to just the root should not require remove/insertion

1354

# of everything.

1355

changes.append((encode(old_path), encode(new_path), file_id,

1356

inv_to_entry(inv_entry)))

1357

1358

# Finish expunging deletes/first half of renames.

1359

self._update_basis_apply_deletes(deletes)

1360

# Reinstate second half of renames and new paths.

1361

self._update_basis_apply_adds(adds)

1362

# Apply in-situ changes.

1363

self._update_basis_apply_changes(changes)

1364

1365

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1366

self._header_state = DirState.IN_MEMORY_MODIFIED

1367

self._id_index = None

1368

return

1369

1370

def _update_basis_apply_adds(self, adds):

1371

"""Apply a sequence of adds to tree 1 during update_basis_by_delta.

1372

1373

They may be adds, or renames that have been split into add/delete

1374

pairs.

1375

1376

:param adds: A sequence of adds. Each add is a tuple:

1377

(None, new_path_utf8, file_id, (entry_details), real_add). real_add

1378

is False when the add is the second half of a remove-and-reinsert

1379

pair created to handle renames and deletes.

1380

"""

1381

# Adds are accumulated partly from renames, so can be in any input

1382

# order - sort it.

1383

adds.sort()

1384

# adds is now in lexographic order, which places all parents before

1385

# their children, so we can process it linearly.

1386

absent = 'ar'

1387

for old_path, new_path, file_id, new_details, real_add in adds:

1388

# the entry for this file_id must be in tree 0.

1389

entry = self._get_entry(0, file_id, new_path)

1390

if entry[0] is None or entry[0][2] != file_id:

1391

self._changes_aborted = True

1392

raise errors.InconsistentDelta(new_path, file_id,

1393

'working tree does not contain new entry')

1394

if real_add and entry[1][1][0] not in absent:

1395

self._changes_aborted = True

1396

raise errors.InconsistentDelta(new_path, file_id,

1397

'The entry was considered to be a genuinely new record,'

1398

' but there was already an old record for it.')

1399

# We don't need to update the target of an 'r' because the handling

1400

# of renames turns all 'r' situations into a delete at the original

1401

# location.

1402

entry[1][1] = new_details

1403

1404

def _update_basis_apply_changes(self, changes):

1405

"""Apply a sequence of changes to tree 1 during update_basis_by_delta.

1406

1407

:param adds: A sequence of changes. Each change is a tuple:

1408

(path_utf8, path_utf8, file_id, (entry_details))

1409

"""

1410

absent = 'ar'

1411

for old_path, new_path, file_id, new_details in changes:

1412

# the entry for this file_id must be in tree 0.

1413

entry = self._get_entry(0, file_id, new_path)

1414

if entry[0] is None or entry[0][2] != file_id:

1415

self._changes_aborted = True

1416

raise errors.InconsistentDelta(new_path, file_id,

1417

'working tree does not contain new entry')

1418

if (entry[1][0][0] in absent or

1419

entry[1][1][0] in absent):

1420

self._changes_aborted = True

1421

raise errors.InconsistentDelta(new_path, file_id,

1422

'changed considered absent')

1423

entry[1][1] = new_details

1424

1425

def _update_basis_apply_deletes(self, deletes):

1426

"""Apply a sequence of deletes to tree 1 during update_basis_by_delta.

1427

1428

They may be deletes, or renames that have been split into add/delete

1429

pairs.

1430

1431

:param deletes: A sequence of deletes. Each delete is a tuple:

1432

(old_path_utf8, new_path_utf8, file_id, None, real_delete).

1433

real_delete is True when the desired outcome is an actual deletion

1434

rather than the rename handling logic temporarily deleting a path

1435

during the replacement of a parent.

1436

"""

1437

null = DirState.NULL_PARENT_DETAILS

1438

for old_path, new_path, file_id, _, real_delete in deletes:

1439

if real_delete != (new_path is None):

1440

raise AssertionError("bad delete delta")

1441

# the entry for this file_id must be in tree 1.

1442

dirname, basename = osutils.split(old_path)

1443

block_index, entry_index, dir_present, file_present = \

1444

self._get_block_entry_index(dirname, basename, 1)

1445

if not file_present:

1446

self._changes_aborted = True

1447

raise errors.InconsistentDelta(old_path, file_id,

1448

'basis tree does not contain removed entry')

1449

entry = self._dirblocks[block_index][1][entry_index]

1450

if entry[0][2] != file_id:

1451

self._changes_aborted = True

1452

raise errors.InconsistentDelta(old_path, file_id,

1453

'mismatched file_id in tree 1')

1454

if real_delete:

1455

if entry[1][0][0] != 'a':

1456

self._changes_aborted = True

1457

raise errors.InconsistentDelta(old_path, file_id,

1458

'This was marked as a real delete, but the WT state'

1459

' claims that it still exists and is versioned.')

1460

del self._dirblocks[block_index][1][entry_index]

1461

else:

1462

if entry[1][0][0] == 'a':

1463

self._changes_aborted = True

1464

raise errors.InconsistentDelta(old_path, file_id,

1465

'The entry was considered a rename, but the source path'

1466

' is marked as absent.')

1467

# For whatever reason, we were asked to rename an entry

1468

# that was originally marked as deleted. This could be

1469

# because we are renaming the parent directory, and the WT

1470

# current state has the file marked as deleted.

1471

elif entry[1][0][0] == 'r':

1472

# implement the rename

1473

del self._dirblocks[block_index][1][entry_index]

1474

else:

1475

# it is being resurrected here, so blank it out temporarily.

1476

self._dirblocks[block_index][1][entry_index][1][1] = null

1477

1478

def _sha_cutoff_time(self):

1479

"""Return cutoff time.

1480

1481

Files modified more recently than this time are at risk of being

1482

undetectably modified and so can't be cached.

1483

"""

1484

# Cache the cutoff time as long as we hold a lock.

1485

# time.time() isn't super expensive (approx 3.38us), but

1486

# when you call it 50,000 times it adds up.

1487

# For comparison, os.lstat() costs 7.2us if it is hot.

1488

self._cutoff_time = int(time.time()) - 3

1489

return self._cutoff_time

1490

1491

def _lstat(self, abspath, entry):

1492

"""Return the os.lstat value for this path."""

1493

return os.lstat(abspath)

1494

1495

def _sha1_file_and_mutter(self, abspath):

1496

# when -Dhashcache is turned on, this is monkey-patched in to log

1497

# file reads

1498

trace.mutter("dirstate sha1 " + abspath)

1499

return osutils.sha_file_by_name(abspath)

1500

1501

def _is_executable(self, mode, old_executable):

1502

"""Is this file executable?"""

1503

return bool(S_IEXEC & mode)

1504

1505

def _is_executable_win32(self, mode, old_executable):

1506

"""On win32 the executable bit is stored in the dirstate."""

1507

return old_executable

1508

1509

if sys.platform == 'win32':

1510

_is_executable = _is_executable_win32

1511

1512

def _read_link(self, abspath, old_link):

1513

"""Read the target of a symlink"""

1514

# TODO: jam 200700301 On Win32, this could just return the value

1515

# already in memory. However, this really needs to be done at a

1516

# higher level, because there either won't be anything on disk,

1517

# or the thing on disk will be a file.

1518

return os.readlink(abspath)

1519

1520

def get_ghosts(self):

1521

"""Return a list of the parent tree revision ids that are ghosts."""

1522

self._read_header_if_needed()

1523

return self._ghosts

1524

1525

def get_lines(self):

1526

"""Serialise the entire dirstate to a sequence of lines."""

1527

if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and

1528

self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):

1529

# read whats on disk.

1530

self._state_file.seek(0)

1531

return self._state_file.readlines()

1532

lines = []

1533

lines.append(self._get_parents_line(self.get_parent_ids()))

1534

lines.append(self._get_ghosts_line(self._ghosts))

1535

# append the root line which is special cased

1536

lines.extend(map(self._entry_to_line, self._iter_entries()))

1537

return self._get_output_lines(lines)

1538

1539

def _get_ghosts_line(self, ghost_ids):

1540

"""Create a line for the state file for ghost information."""

1541

return '\0'.join([str(len(ghost_ids))] + ghost_ids)

1542

1543

def _get_parents_line(self, parent_ids):

1544

"""Create a line for the state file for parents information."""

1545

return '\0'.join([str(len(parent_ids))] + parent_ids)

1546

1547

def _get_fields_to_entry(self):

1548

"""Get a function which converts entry fields into a entry record.

1549

1550

This handles size and executable, as well as parent records.

1551

1552

:return: A function which takes a list of fields, and returns an

1553

appropriate record for storing in memory.

1554

"""

1555

# This is intentionally unrolled for performance

1556

num_present_parents = self._num_present_parents()

1557

if num_present_parents == 0:

1558

def fields_to_entry_0_parents(fields, _int=int):

1559

path_name_file_id_key = (fields[0], fields[1], fields[2])

1560

return (path_name_file_id_key, [

1561

( # Current tree

1562

fields[3], # minikind

1563

fields[4], # fingerprint

1564

_int(fields[5]), # size

1565

fields[6] == 'y', # executable

1566

fields[7], # packed_stat or revision_id

1567

)])

1568

return fields_to_entry_0_parents

1569

elif num_present_parents == 1:

1570

def fields_to_entry_1_parent(fields, _int=int):

1571

path_name_file_id_key = (fields[0], fields[1], fields[2])

1572

return (path_name_file_id_key, [

1573

( # Current tree

1574

fields[3], # minikind

1575

fields[4], # fingerprint

1576

_int(fields[5]), # size

1577

fields[6] == 'y', # executable

1578

fields[7], # packed_stat or revision_id

1579

1580

( # Parent 1

1581

fields[8], # minikind

1582

fields[9], # fingerprint

1583

_int(fields[10]), # size

1584

fields[11] == 'y', # executable

1585

fields[12], # packed_stat or revision_id

1586

1587

])

1588

return fields_to_entry_1_parent

1589

elif num_present_parents == 2:

1590

def fields_to_entry_2_parents(fields, _int=int):

1591

path_name_file_id_key = (fields[0], fields[1], fields[2])

1592

return (path_name_file_id_key, [

1593

( # Current tree

1594

fields[3], # minikind

1595

fields[4], # fingerprint

1596

_int(fields[5]), # size

1597

fields[6] == 'y', # executable

1598

fields[7], # packed_stat or revision_id

1599

1600

( # Parent 1

1601

fields[8], # minikind

1602

fields[9], # fingerprint

1603

_int(fields[10]), # size

1604

fields[11] == 'y', # executable

1605

fields[12], # packed_stat or revision_id

1606

1607

( # Parent 2

1608

fields[13], # minikind

1609

fields[14], # fingerprint

1610

_int(fields[15]), # size

1611

fields[16] == 'y', # executable

1612

fields[17], # packed_stat or revision_id

1613

1614

])

1615

return fields_to_entry_2_parents

1616

else:

1617

def fields_to_entry_n_parents(fields, _int=int):

1618

path_name_file_id_key = (fields[0], fields[1], fields[2])

1619

trees = [(fields[cur], # minikind

1620

fields[cur+1], # fingerprint

1621

_int(fields[cur+2]), # size

1622

fields[cur+3] == 'y', # executable

1623

fields[cur+4], # stat or revision_id

1624

) for cur in xrange(3, len(fields)-1, 5)]

1625

return path_name_file_id_key, trees

1626

return fields_to_entry_n_parents

1627

1628

def get_parent_ids(self):

1629

"""Return a list of the parent tree ids for the directory state."""

1630

self._read_header_if_needed()

1631

return list(self._parents)

1632

1633

def _get_block_entry_index(self, dirname, basename, tree_index):

1634

"""Get the coordinates for a path in the state structure.

1635

1636

:param dirname: The utf8 dirname to lookup.

1637

:param basename: The utf8 basename to lookup.

1638

:param tree_index: The index of the tree for which this lookup should

1639

be attempted.

1640

:return: A tuple describing where the path is located, or should be

1641

inserted. The tuple contains four fields: the block index, the row

1642

index, the directory is present (boolean), the entire path is

1643

present (boolean). There is no guarantee that either

1644

coordinate is currently reachable unless the found field for it is

1645

True. For instance, a directory not present in the searched tree

1646

may be returned with a value one greater than the current highest

1647

block offset. The directory present field will always be True when

1648

the path present field is True. The directory present field does

1649

NOT indicate that the directory is present in the searched tree,

1650

rather it indicates that there are at least some files in some

1651

tree present there.

1652

"""

1653

self._read_dirblocks_if_needed()

1654

key = dirname, basename, ''

1655

block_index, present = self._find_block_index_from_key(key)

1656

if not present:

1657

# no such directory - return the dir index and 0 for the row.

1658

return block_index, 0, False, False

1659

block = self._dirblocks[block_index][1] # access the entries only

1660

entry_index, present = self._find_entry_index(key, block)

1661

# linear search through entries at this path to find the one

1662

# requested.

1663

while entry_index < len(block) and block[entry_index][0][1] == basename:

1664

if block[entry_index][1][tree_index][0] not in 'ar':

1665

# neither absent or relocated

1666

return block_index, entry_index, True, True

1667

entry_index += 1

1668

return block_index, entry_index, True, False

1669

1670

def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None):

1671

"""Get the dirstate entry for path in tree tree_index.

1672

1673

If either file_id or path is supplied, it is used as the key to lookup.

1674

If both are supplied, the fastest lookup is used, and an error is

1675

raised if they do not both point at the same row.

1676

1677

:param tree_index: The index of the tree we wish to locate this path

1678

in. If the path is present in that tree, the entry containing its

1679

details is returned, otherwise (None, None) is returned

1680

0 is the working tree, higher indexes are successive parent

1681

trees.

1682

:param fileid_utf8: A utf8 file_id to look up.

1683

:param path_utf8: An utf8 path to be looked up.

1684

:return: The dirstate entry tuple for path, or (None, None)

1685

"""

1686

self._read_dirblocks_if_needed()

1687

if path_utf8 is not None:

1688

if type(path_utf8) is not str:

1689

raise AssertionError('path_utf8 is not a str: %s %s'

1690

% (type(path_utf8), path_utf8))

1691

# path lookups are faster

1692

dirname, basename = osutils.split(path_utf8)

1693

block_index, entry_index, dir_present, file_present = \

1694

self._get_block_entry_index(dirname, basename, tree_index)

1695

if not file_present:

1696

return None, None

1697

entry = self._dirblocks[block_index][1][entry_index]

1698

if not (entry[0][2] and entry[1][tree_index][0] not in ('a', 'r')):

1699

raise AssertionError('unversioned entry?')

1700

if fileid_utf8:

1701

if entry[0][2] != fileid_utf8:

1702

self._changes_aborted = True

1703

raise errors.BzrError('integrity error ? : mismatching'

1704

' tree_index, file_id and path')

1705

return entry

1706

else:

1707

possible_keys = self._get_id_index().get(fileid_utf8, None)

1708

if not possible_keys:

1709

return None, None

1710

for key in possible_keys:

1711

block_index, present = \

1712

self._find_block_index_from_key(key)

1713

# strange, probably indicates an out of date

1714

# id index - for now, allow this.

1715

if not present:

1716

continue

1717

# WARNING: DO not change this code to use _get_block_entry_index

1718

# as that function is not suitable: it does not use the key

1719

# to lookup, and thus the wrong coordinates are returned.

1720

block = self._dirblocks[block_index][1]

1721

entry_index, present = self._find_entry_index(key, block)

1722

if present:

1723

entry = self._dirblocks[block_index][1][entry_index]

1724

if entry[1][tree_index][0] in 'fdlt':

1725

# this is the result we are looking for: the

1726

# real home of this file_id in this tree.

1727

return entry

1728

if entry[1][tree_index][0] == 'a':

1729

# there is no home for this entry in this tree

1730

return None, None

1731

if entry[1][tree_index][0] != 'r':

1732

raise AssertionError(

1733

"entry %r has invalid minikind %r for tree %r" \

1734

% (entry,

1735

entry[1][tree_index][0],

1736

tree_index))

1737

real_path = entry[1][tree_index][1]

1738

return self._get_entry(tree_index, fileid_utf8=fileid_utf8,

1739

path_utf8=real_path)

1740

return None, None

1741

1742

@classmethod

1743

def initialize(cls, path):

1744

"""Create a new dirstate on path.

1745

1746

The new dirstate will be an empty tree - that is it has no parents,

1747

and only a root node - which has id ROOT_ID.

1748

1749

:param path: The name of the file for the dirstate.

1750

:return: A write-locked DirState object.

1751

"""

1752

# This constructs a new DirState object on a path, sets the _state_file

1753

# to a new empty file for that path. It then calls _set_data() with our

1754

# stock empty dirstate information - a root with ROOT_ID, no children,

1755

# and no parents. Finally it calls save() to ensure that this data will

1756

# persist.

1757

result = cls(path)

1758

# root dir and root dir contents with no children.

1759

empty_tree_dirblocks = [('', []), ('', [])]

1760

# a new root directory, with a NULLSTAT.

1761

empty_tree_dirblocks[0][1].append(

1762

(('', '', inventory.ROOT_ID), [

1763

('d', '', 0, False, DirState.NULLSTAT),

1764

]))

1765

result.lock_write()

1766

try:

1767

result._set_data([], empty_tree_dirblocks)

1768

result.save()

1769

except:

1770

result.unlock()

1771

raise

1772

return result

1773

1774

@staticmethod

1775

def _inv_entry_to_details(inv_entry):

1776

"""Convert an inventory entry (from a revision tree) to state details.

1777

1778

:param inv_entry: An inventory entry whose sha1 and link targets can be

1779

relied upon, and which has a revision set.

1780

:return: A details tuple - the details for a single tree at a path +

1781

id.

1782

"""

1783

kind = inv_entry.kind

1784

minikind = DirState._kind_to_minikind[kind]

1785

tree_data = inv_entry.revision

1786

if kind == 'directory':

1787

fingerprint = ''

1788

size = 0

1789

executable = False

1790

elif kind == 'symlink':

1791

# We don't support non-ascii targets for symlinks yet.

1792

fingerprint = str(inv_entry.symlink_target or '')

1793

size = 0

1794

executable = False

1795

elif kind == 'file':

1796

fingerprint = inv_entry.text_sha1 or ''

1797

size = inv_entry.text_size or 0

1798

executable = inv_entry.executable

1799

elif kind == 'tree-reference':

1800

fingerprint = inv_entry.reference_revision or ''

1801

size = 0

1802

executable = False

1803

else:

1804

raise Exception("can't pack %s" % inv_entry)

1805

return (minikind, fingerprint, size, executable, tree_data)

1806

1807

def _iter_child_entries(self, tree_index, path_utf8):

1808

"""Iterate over all the entries that are children of path_utf.

1809

1810

This only returns entries that are present (not in 'a', 'r') in

1811

tree_index. tree_index data is not refreshed, so if tree 0 is used,

1812

results may differ from that obtained if paths were statted to

1813

determine what ones were directories.

1814

1815

Asking for the children of a non-directory will return an empty

1816

iterator.

1817

"""

1818

pending_dirs = []

1819

next_pending_dirs = [path_utf8]

1820

absent = 'ar'

1821

while next_pending_dirs:

1822

pending_dirs = next_pending_dirs

1823

next_pending_dirs = []

1824

for path in pending_dirs:

1825

block_index, present = self._find_block_index_from_key(

1826

(path, '', ''))

1827

if block_index == 0:

1828

block_index = 1

1829

if len(self._dirblocks) == 1:

1830

# asked for the children of the root with no other

1831

# contents.

1832

return

1833

if not present:

1834

# children of a non-directory asked for.

1835

continue

1836

block = self._dirblocks[block_index]

1837

for entry in block[1]:

1838

kind = entry[1][tree_index][0]

1839

if kind not in absent:

1840

yield entry

1841

if kind == 'd':

1842

if entry[0][0]:

1843

path = entry[0][0] + '/' + entry[0][1]

1844

else:

1845

path = entry[0][1]

1846

next_pending_dirs.append(path)

1847

1848

def _iter_entries(self):

1849

"""Iterate over all the entries in the dirstate.

1850

1851

Each yelt item is an entry in the standard format described in the

1852

docstring of bzrlib.dirstate.

1853

"""

1854

self._read_dirblocks_if_needed()

1855

for directory in self._dirblocks:

1856

for entry in directory[1]:

1857

yield entry

1858

1859

def _get_id_index(self):

1860

"""Get an id index of self._dirblocks."""

1861

if self._id_index is None:

1862

id_index = {}

1863

for key, tree_details in self._iter_entries():

1864

id_index.setdefault(key[2], set()).add(key)

1865

self._id_index = id_index

1866

return self._id_index

1867

1868

def _get_output_lines(self, lines):

1869

"""Format lines for final output.

1870

1871

:param lines: A sequence of lines containing the parents list and the

1872

path lines.

1873

"""

1874

output_lines = [DirState.HEADER_FORMAT_3]

1875

lines.append('') # a final newline

1876

inventory_text = '\0\n\0'.join(lines)

1877

output_lines.append('crc32: %s\n' % (zlib.crc32(inventory_text),))

1878

# -3, 1 for num parents, 1 for ghosts, 1 for final newline

1879

num_entries = len(lines)-3

1880

output_lines.append('num_entries: %s\n' % (num_entries,))

1881

output_lines.append(inventory_text)

1882

return output_lines

1883

1884

def _make_deleted_row(self, fileid_utf8, parents):

1885

"""Return a deleted row for fileid_utf8."""

1886

return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,

1887

''), parents

1888

1889

def _num_present_parents(self):

1890

"""The number of parent entries in each record row."""

1891

return len(self._parents) - len(self._ghosts)

1892

1893

@staticmethod

1894

def on_file(path):

1895

"""Construct a DirState on the file at path path.

1896

1897

:return: An unlocked DirState object, associated with the given path.

1898

"""

1899

result = DirState(path)

1900

return result

1901

1902

def _read_dirblocks_if_needed(self):

1903

"""Read in all the dirblocks from the file if they are not in memory.

1904

1905

This populates self._dirblocks, and sets self._dirblock_state to

1906

IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block

1907

1908

"""

1909

self._read_header_if_needed()

1910

if self._dirblock_state == DirState.NOT_IN_MEMORY:

1911

_read_dirblocks(self)

1912

1913

def _read_header(self):

1914

"""This reads in the metadata header, and the parent ids.

1915

1916

After reading in, the file should be positioned at the null

1917

just before the start of the first record in the file.

1918

1919

:return: (expected crc checksum, number of entries, parent list)

1920

"""

1921

self._read_prelude()

1922

parent_line = self._state_file.readline()

1923

info = parent_line.split('\0')

1924

num_parents = int(info[0])

1925

self._parents = info[1:-1]

1926

ghost_line = self._state_file.readline()

1927

info = ghost_line.split('\0')

1928

num_ghosts = int(info[1])

1929

self._ghosts = info[2:-1]

1930

self._header_state = DirState.IN_MEMORY_UNMODIFIED

1931

self._end_of_header = self._state_file.tell()

1932

1933

def _read_header_if_needed(self):

1934

"""Read the header of the dirstate file if needed."""

1935

# inline this as it will be called a lot

1936

if not self._lock_token:

1937

raise errors.ObjectNotLocked(self)

1938

if self._header_state == DirState.NOT_IN_MEMORY:

1939

self._read_header()

1940

1941

def _read_prelude(self):

1942

"""Read in the prelude header of the dirstate file.

1943

1944

This only reads in the stuff that is not connected to the crc

1945

checksum. The position will be correct to read in the rest of

1946

the file and check the checksum after this point.

1947

The next entry in the file should be the number of parents,

1948

and their ids. Followed by a newline.

1949

"""

1950

header = self._state_file.readline()

1951

if header != DirState.HEADER_FORMAT_3:

1952

raise errors.BzrError(

1953

'invalid header line: %r' % (header,))

1954

crc_line = self._state_file.readline()

1955

if not crc_line.startswith('crc32: '):

1956

raise errors.BzrError('missing crc32 checksum: %r' % crc_line)

1957

self.crc_expected = int(crc_line[len('crc32: '):-1])

1958

num_entries_line = self._state_file.readline()

1959

if not num_entries_line.startswith('num_entries: '):

1960

raise errors.BzrError('missing num_entries line')

1961

self._num_entries = int(num_entries_line[len('num_entries: '):-1])

1962

1963

def sha1_from_stat(self, path, stat_result, _pack_stat=pack_stat):

1964

"""Find a sha1 given a stat lookup."""

1965

return self._get_packed_stat_index().get(_pack_stat(stat_result), None)

1966

1967

def _get_packed_stat_index(self):

1968

"""Get a packed_stat index of self._dirblocks."""

1969

if self._packed_stat_index is None:

1970

index = {}

1971

for key, tree_details in self._iter_entries():

1972

if tree_details[0][0] == 'f':

1973

index[tree_details[0][4]] = tree_details[0][1]

1974

self._packed_stat_index = index

1975

return self._packed_stat_index

1976

1977

def save(self):

1978

"""Save any pending changes created during this session.

1979

1980

We reuse the existing file, because that prevents race conditions with

1981

file creation, and use oslocks on it to prevent concurrent modification

1982

and reads - because dirstate's incremental data aggregation is not

1983

compatible with reading a modified file, and replacing a file in use by

1984

another process is impossible on Windows.

1985

1986

A dirstate in read only mode should be smart enough though to validate

1987

that the file has not changed, and otherwise discard its cache and

1988

start over, to allow for fine grained read lock duration, so 'status'

1989

wont block 'commit' - for example.

1990

"""

1991

if self._changes_aborted:

1992

# Should this be a warning? For now, I'm expecting that places that

1993

# mark it inconsistent will warn, making a warning here redundant.

1994

trace.mutter('Not saving DirState because '

1995

'_changes_aborted is set.')

1996

return

1997

if (self._header_state == DirState.IN_MEMORY_MODIFIED or

1998

self._dirblock_state == DirState.IN_MEMORY_MODIFIED):

1999

2000

grabbed_write_lock = False

2001

if self._lock_state != 'w':

2002

grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()

2003

# Switch over to the new lock, as the old one may be closed.

2004

# TODO: jam 20070315 We should validate the disk file has

2005

# not changed contents. Since temporary_write_lock may

2006

# not be an atomic operation.

2007

self._lock_token = new_lock

2008

self._state_file = new_lock.f

2009

if not grabbed_write_lock:

2010

# We couldn't grab a write lock, so we switch back to a read one

2011

return

2012

try:

2013

self._state_file.seek(0)

2014

self._state_file.writelines(self.get_lines())

2015

self._state_file.truncate()

2016

self._state_file.flush()

2017

self._header_state = DirState.IN_MEMORY_UNMODIFIED

2018

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

2019

finally:

2020

if grabbed_write_lock:

2021

self._lock_token = self._lock_token.restore_read_lock()

2022

self._state_file = self._lock_token.f

2023

# TODO: jam 20070315 We should validate the disk file has

2024

# not changed contents. Since restore_read_lock may

2025

# not be an atomic operation.

2026

2027

def _set_data(self, parent_ids, dirblocks):

2028

"""Set the full dirstate data in memory.

2029

2030

This is an internal function used to completely replace the objects

2031

in memory state. It puts the dirstate into state 'full-dirty'.

2032

2033

:param parent_ids: A list of parent tree revision ids.

2034

:param dirblocks: A list containing one tuple for each directory in the

2035

tree. Each tuple contains the directory path and a list of entries

2036

found in that directory.

2037

"""

2038

# our memory copy is now authoritative.

2039

self._dirblocks = dirblocks

2040

self._header_state = DirState.IN_MEMORY_MODIFIED

2041

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2042

self._parents = list(parent_ids)

2043

self._id_index = None

2044

self._packed_stat_index = None

2045

2046

def set_path_id(self, path, new_id):

2047

"""Change the id of path to new_id in the current working tree.

2048

2049

:param path: The path inside the tree to set - '' is the root, 'foo'

2050

is the path foo in the root.

2051

:param new_id: The new id to assign to the path. This must be a utf8

2052

file id (not unicode, and not None).

2053

"""

2054

self._read_dirblocks_if_needed()

2055

if len(path):

2056

# TODO: logic not written

2057

raise NotImplementedError(self.set_path_id)

2058

# TODO: check new id is unique

2059

entry = self._get_entry(0, path_utf8=path)

2060

if entry[0][2] == new_id:

2061

# Nothing to change.

2062

return

2063

# mark the old path absent, and insert a new root path

2064

self._make_absent(entry)

2065

self.update_minimal(('', '', new_id), 'd',

2066

path_utf8='', packed_stat=entry[1][0][4])

2067

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2068

if self._id_index is not None:

2069

self._id_index.setdefault(new_id, set()).add(entry[0])

2070

2071

def set_parent_trees(self, trees, ghosts):

2072

"""Set the parent trees for the dirstate.

2073

2074

:param trees: A list of revision_id, tree tuples. tree must be provided

2075

even if the revision_id refers to a ghost: supply an empty tree in

2076

this case.

2077

:param ghosts: A list of the revision_ids that are ghosts at the time

2078

of setting.

2079

"""

2080

# TODO: generate a list of parent indexes to preserve to save

2081

# processing specific parent trees. In the common case one tree will

2082

# be preserved - the left most parent.

2083

# TODO: if the parent tree is a dirstate, we might want to walk them

2084

# all by path in parallel for 'optimal' common-case performance.

2085

# generate new root row.

2086

self._read_dirblocks_if_needed()

2087

# TODO future sketch: Examine the existing parents to generate a change

2088

# map and then walk the new parent trees only, mapping them into the

2089

# dirstate. Walk the dirstate at the same time to remove unreferenced

2090

# entries.

2091

# for now:

2092

# sketch: loop over all entries in the dirstate, cherry picking

2093

# entries from the parent trees, if they are not ghost trees.

2094

# after we finish walking the dirstate, all entries not in the dirstate

2095

# are deletes, so we want to append them to the end as per the design

2096

# discussions. So do a set difference on ids with the parents to

2097

# get deletes, and add them to the end.

2098

# During the update process we need to answer the following questions:

2099

# - find other keys containing a fileid in order to create cross-path

2100

# links. We dont't trivially use the inventory from other trees

2101

# because this leads to either double touching, or to accessing

2102

# missing keys,

2103

# - find other keys containing a path

2104

# We accumulate each entry via this dictionary, including the root

2105

by_path = {}

2106

id_index = {}

2107

# we could do parallel iterators, but because file id data may be

2108

# scattered throughout, we dont save on index overhead: we have to look

2109

# at everything anyway. We can probably save cycles by reusing parent

2110

# data and doing an incremental update when adding an additional

2111

# parent, but for now the common cases are adding a new parent (merge),

2112

# and replacing completely (commit), and commit is more common: so

2113

# optimise merge later.

2114

2115

# ---- start generation of full tree mapping data

2116

# what trees should we use?

2117

parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]

2118

# how many trees do we end up with

2119

parent_count = len(parent_trees)

2120

2121

# one: the current tree

2122

for entry in self._iter_entries():

2123

# skip entries not in the current tree

2124

if entry[1][0][0] in 'ar': # absent, relocated

2125

continue

2126

by_path[entry[0]] = [entry[1][0]] + \

2127

[DirState.NULL_PARENT_DETAILS] * parent_count

2128

id_index[entry[0][2]] = set([entry[0]])

2129

2130

# now the parent trees:

2131

for tree_index, tree in enumerate(parent_trees):

2132

# the index is off by one, adjust it.

2133

tree_index = tree_index + 1

2134

# when we add new locations for a fileid we need these ranges for

2135

# any fileid in this tree as we set the by_path[id] to:

2136

# already_processed_tree_details + new_details + new_location_suffix

2137

# the suffix is from tree_index+1:parent_count+1.

2138

new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)

2139

# now stitch in all the entries from this tree

2140

for path, entry in tree.inventory.iter_entries_by_dir():

2141

# here we process each trees details for each item in the tree.

2142

# we first update any existing entries for the id at other paths,

2143

# then we either create or update the entry for the id at the

2144

# right path, and finally we add (if needed) a mapping from

2145

# file_id to this path. We do it in this order to allow us to

2146

# avoid checking all known paths for the id when generating a

2147

# new entry at this path: by adding the id->path mapping last,

2148

# all the mappings are valid and have correct relocation

2149

# records where needed.

2150

file_id = entry.file_id

2151

path_utf8 = path.encode('utf8')

2152

dirname, basename = osutils.split(path_utf8)

2153

new_entry_key = (dirname, basename, file_id)

2154

# tree index consistency: All other paths for this id in this tree

2155

# index must point to the correct path.

2156

for entry_key in id_index.setdefault(file_id, set()):

2157

# TODO:PROFILING: It might be faster to just update

2158

# rather than checking if we need to, and then overwrite

2159

# the one we are located at.

2160

if entry_key != new_entry_key:

2161

# this file id is at a different path in one of the

2162

# other trees, so put absent pointers there

2163

# This is the vertical axis in the matrix, all pointing

2164

# to the real path.

2165

by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')

2166

# by path consistency: Insert into an existing path record (trivial), or

2167

# add a new one with relocation pointers for the other tree indexes.

2168

if new_entry_key in id_index[file_id]:

2169

# there is already an entry where this data belongs, just insert it.

2170

by_path[new_entry_key][tree_index] = \

2171

self._inv_entry_to_details(entry)

2172

else:

2173

# add relocated entries to the horizontal axis - this row

2174

# mapping from path,id. We need to look up the correct path

2175

# for the indexes from 0 to tree_index -1

2176

new_details = []

2177

for lookup_index in xrange(tree_index):

2178

# boundary case: this is the first occurence of file_id

2179

# so there are no id_indexs, possibly take this out of

2180

# the loop?

2181

if not len(id_index[file_id]):

2182

new_details.append(DirState.NULL_PARENT_DETAILS)

2183

else:

2184

# grab any one entry, use it to find the right path.

2185

# TODO: optimise this to reduce memory use in highly

2186

# fragmented situations by reusing the relocation

2187

# records.

2188

a_key = iter(id_index[file_id]).next()

2189

if by_path[a_key][lookup_index][0] in ('r', 'a'):

2190

# its a pointer or missing statement, use it as is.

2191

new_details.append(by_path[a_key][lookup_index])

2192

else:

2193

# we have the right key, make a pointer to it.

2194

real_path = ('/'.join(a_key[0:2])).strip('/')

2195

new_details.append(('r', real_path, 0, False, ''))

2196

new_details.append(self._inv_entry_to_details(entry))

2197

new_details.extend(new_location_suffix)

2198

by_path[new_entry_key] = new_details

2199

id_index[file_id].add(new_entry_key)

2200

# --- end generation of full tree mappings

2201

2202

# sort and output all the entries

2203

new_entries = self._sort_entries(by_path.items())

2204

self._entries_to_current_state(new_entries)

2205

self._parents = [rev_id for rev_id, tree in trees]

2206

self._ghosts = list(ghosts)

2207

self._header_state = DirState.IN_MEMORY_MODIFIED

2208

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2209

self._id_index = id_index

2210

2211

def _sort_entries(self, entry_list):

2212

"""Given a list of entries, sort them into the right order.

2213

2214

This is done when constructing a new dirstate from trees - normally we

2215

try to keep everything in sorted blocks all the time, but sometimes

2216

it's easier to sort after the fact.

2217

"""

2218

def _key(entry):

2219

# sort by: directory parts, file name, file id

2220

return entry[0][0].split('/'), entry[0][1], entry[0][2]

2221

return sorted(entry_list, key=_key)

2222

2223

def set_state_from_inventory(self, new_inv):

2224

"""Set new_inv as the current state.

2225

2226

This API is called by tree transform, and will usually occur with

2227

existing parent trees.

2228

2229

:param new_inv: The inventory object to set current state from.

2230

"""

2231

if 'evil' in debug.debug_flags:

2232

trace.mutter_callsite(1,

2233

"set_state_from_inventory called; please mutate the tree instead")

2234

self._read_dirblocks_if_needed()

2235

# sketch:

2236

# Two iterators: current data and new data, both in dirblock order.

2237

# We zip them together, which tells about entries that are new in the

2238

# inventory, or removed in the inventory, or present in both and

2239

# possibly changed.

2240

2241

# You might think we could just synthesize a new dirstate directly

2242

# since we're processing it in the right order. However, we need to

2243

# also consider there may be any number of parent trees and relocation

2244

# pointers, and we don't want to duplicate that here.

2245

new_iterator = new_inv.iter_entries_by_dir()

2246

# we will be modifying the dirstate, so we need a stable iterator. In

2247

# future we might write one, for now we just clone the state into a

2248

# list - which is a shallow copy.

2249

old_iterator = iter(list(self._iter_entries()))

2250

# both must have roots so this is safe:

2251

current_new = new_iterator.next()

2252

current_old = old_iterator.next()

2253

def advance(iterator):

2254

try:

2255

return iterator.next()

2256

except StopIteration:

2257

return None

2258

while current_new or current_old:

2259

# skip entries in old that are not really there

2260

if current_old and current_old[1][0][0] in 'ar':

2261

# relocated or absent

2262

current_old = advance(old_iterator)

2263

continue

2264

if current_new:

2265

# convert new into dirblock style

2266

new_path_utf8 = current_new[0].encode('utf8')

2267

new_dirname, new_basename = osutils.split(new_path_utf8)

2268

new_id = current_new[1].file_id

2269

new_entry_key = (new_dirname, new_basename, new_id)

2270

current_new_minikind = \

2271

DirState._kind_to_minikind[current_new[1].kind]

2272

if current_new_minikind == 't':

2273

fingerprint = current_new[1].reference_revision or ''

2274

else:

2275

# We normally only insert or remove records, or update

2276

# them when it has significantly changed. Then we want to

2277

# erase its fingerprint. Unaffected records should

2278

# normally not be updated at all.

2279

fingerprint = ''

2280

else:

2281

# for safety disable variables

2282

new_path_utf8 = new_dirname = new_basename = new_id = \

2283

new_entry_key = None

2284

# 5 cases, we dont have a value that is strictly greater than everything, so

2285

# we make both end conditions explicit

2286

if not current_old:

2287

# old is finished: insert current_new into the state.

2288

self.update_minimal(new_entry_key, current_new_minikind,

2289

executable=current_new[1].executable,

2290

path_utf8=new_path_utf8, fingerprint=fingerprint)

2291

current_new = advance(new_iterator)

2292

elif not current_new:

2293

# new is finished

2294

self._make_absent(current_old)

2295

current_old = advance(old_iterator)

2296

elif new_entry_key == current_old[0]:

2297

# same - common case

2298

# We're looking at the same path and id in both the dirstate

2299

# and inventory, so just need to update the fields in the

2300

# dirstate from the one in the inventory.

2301

# TODO: update the record if anything significant has changed.

2302

# the minimal required trigger is if the execute bit or cached

2303

# kind has changed.

2304

if (current_old[1][0][3] != current_new[1].executable or

2305

current_old[1][0][0] != current_new_minikind):

2306

self.update_minimal(current_old[0], current_new_minikind,

2307

executable=current_new[1].executable,

2308

path_utf8=new_path_utf8, fingerprint=fingerprint)

2309

# both sides are dealt with, move on

2310

current_old = advance(old_iterator)

2311

current_new = advance(new_iterator)

2312

elif (cmp_by_dirs(new_dirname, current_old[0][0]) < 0

2313

or (new_dirname == current_old[0][0]

2314

and new_entry_key[1:] < current_old[0][1:])):

2315

# new comes before:

2316

# add a entry for this and advance new

2317

self.update_minimal(new_entry_key, current_new_minikind,

2318

executable=current_new[1].executable,

2319

path_utf8=new_path_utf8, fingerprint=fingerprint)

2320

current_new = advance(new_iterator)

2321

else:

2322

# we've advanced past the place where the old key would be,

2323

# without seeing it in the new list. so it must be gone.

2324

self._make_absent(current_old)

2325

current_old = advance(old_iterator)

2326

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2327

self._id_index = None

2328

self._packed_stat_index = None

2329

2330

def _make_absent(self, current_old):

2331

"""Mark current_old - an entry - as absent for tree 0.

2332

2333

:return: True if this was the last details entry for the entry key:

2334

that is, if the underlying block has had the entry removed, thus

2335

shrinking in length.

2336

"""

2337

# build up paths that this id will be left at after the change is made,

2338

# so we can update their cross references in tree 0

2339

all_remaining_keys = set()

2340

# Dont check the working tree, because it's going.

2341

for details in current_old[1][1:]:

2342

if details[0] not in 'ar': # absent, relocated

2343

all_remaining_keys.add(current_old[0])

2344

elif details[0] == 'r': # relocated

2345

# record the key for the real path.

2346

all_remaining_keys.add(tuple(osutils.split(details[1])) + (current_old[0][2],))

2347

# absent rows are not present at any path.

2348

last_reference = current_old[0] not in all_remaining_keys

2349

if last_reference:

2350

# the current row consists entire of the current item (being marked

2351

# absent), and relocated or absent entries for the other trees:

2352

# Remove it, its meaningless.

2353

block = self._find_block(current_old[0])

2354

entry_index, present = self._find_entry_index(current_old[0], block[1])

2355

if not present:

2356

raise AssertionError('could not find entry for %s' % (current_old,))

2357

block[1].pop(entry_index)

2358

# if we have an id_index in use, remove this key from it for this id.

2359

if self._id_index is not None:

2360

self._id_index[current_old[0][2]].remove(current_old[0])

2361

# update all remaining keys for this id to record it as absent. The

2362

# existing details may either be the record we are marking as deleted

2363

# (if there were other trees with the id present at this path), or may

2364

# be relocations.

2365

for update_key in all_remaining_keys:

2366

update_block_index, present = \

2367

self._find_block_index_from_key(update_key)

2368

if not present:

2369

raise AssertionError('could not find block for %s' % (update_key,))

2370

update_entry_index, present = \

2371

self._find_entry_index(update_key, self._dirblocks[update_block_index][1])

2372

if not present:

2373

raise AssertionError('could not find entry for %s' % (update_key,))

2374

update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]

2375

# it must not be absent at the moment

2376

if update_tree_details[0][0] == 'a': # absent

2377

raise AssertionError('bad row %r' % (update_tree_details,))

2378

update_tree_details[0] = DirState.NULL_PARENT_DETAILS

2379

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2380

return last_reference

2381

2382

def update_minimal(self, key, minikind, executable=False, fingerprint='',

2383

packed_stat=None, size=0, path_utf8=None):

2384

"""Update an entry to the state in tree 0.

2385

2386

This will either create a new entry at 'key' or update an existing one.

2387

It also makes sure that any other records which might mention this are

2388

updated as well.

2389

2390

:param key: (dir, name, file_id) for the new entry

2391

:param minikind: The type for the entry ('f' == 'file', 'd' ==

2392

'directory'), etc.

2393

:param executable: Should the executable bit be set?

2394

:param fingerprint: Simple fingerprint for new entry: sha1 for files,

2395

referenced revision id for subtrees, etc.

2396

:param packed_stat: Packed stat value for new entry.

2397

:param size: Size information for new entry

2398

:param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing

2399

extra computation.

2400

2401

If packed_stat and fingerprint are not given, they're invalidated in

2402

the entry.

2403

"""

2404

block = self._find_block(key)[1]

2405

if packed_stat is None:

2406

packed_stat = DirState.NULLSTAT

2407

# XXX: Some callers pass '' as the packed_stat, and it seems to be

2408

# sometimes present in the dirstate - this seems oddly inconsistent.

2409

# mbp 20071008

2410

entry_index, present = self._find_entry_index(key, block)

2411

new_details = (minikind, fingerprint, size, executable, packed_stat)

2412

id_index = self._get_id_index()

2413

if not present:

2414

# new entry, synthesis cross reference here,

2415

existing_keys = id_index.setdefault(key[2], set())

2416

if not existing_keys:

2417

# not currently in the state, simplest case

2418

new_entry = key, [new_details] + self._empty_parent_info()

2419

else:

2420

# present at one or more existing other paths.

2421

# grab one of them and use it to generate parent

2422

# relocation/absent entries.

2423

new_entry = key, [new_details]

2424

for other_key in existing_keys:

2425

# change the record at other to be a pointer to this new

2426

# record. The loop looks similar to the change to

2427

# relocations when updating an existing record but its not:

2428

# the test for existing kinds is different: this can be

2429

# factored out to a helper though.

2430

other_block_index, present = self._find_block_index_from_key(other_key)

2431

if not present:

2432

raise AssertionError('could not find block for %s' % (other_key,))

2433

other_entry_index, present = self._find_entry_index(other_key,

2434

self._dirblocks[other_block_index][1])

2435

if not present:

2436

raise AssertionError('could not find entry for %s' % (other_key,))

2437

if path_utf8 is None:

2438

raise AssertionError('no path')

2439

self._dirblocks[other_block_index][1][other_entry_index][1][0] = \

2440

('r', path_utf8, 0, False, '')

2441

2442

num_present_parents = self._num_present_parents()

2443

for lookup_index in xrange(1, num_present_parents + 1):

2444

# grab any one entry, use it to find the right path.

2445

# TODO: optimise this to reduce memory use in highly

2446

# fragmented situations by reusing the relocation

2447

# records.

2448

update_block_index, present = \

2449

self._find_block_index_from_key(other_key)

2450

if not present:

2451

raise AssertionError('could not find block for %s' % (other_key,))

2452

update_entry_index, present = \

2453

self._find_entry_index(other_key, self._dirblocks[update_block_index][1])

2454

if not present:

2455

raise AssertionError('could not find entry for %s' % (other_key,))

2456

update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]

2457

if update_details[0] in 'ar': # relocated, absent

2458

# its a pointer or absent in lookup_index's tree, use

2459

# it as is.

2460

new_entry[1].append(update_details)

2461

else:

2462

# we have the right key, make a pointer to it.

2463

pointer_path = osutils.pathjoin(*other_key[0:2])

2464

new_entry[1].append(('r', pointer_path, 0, False, ''))

2465

block.insert(entry_index, new_entry)

2466

existing_keys.add(key)

2467

else:

2468

# Does the new state matter?

2469

block[entry_index][1][0] = new_details

2470

# parents cannot be affected by what we do.

2471

# other occurences of this id can be found

2472

# from the id index.

2473

# ---

2474

# tree index consistency: All other paths for this id in this tree

2475

# index must point to the correct path. We have to loop here because

2476

# we may have passed entries in the state with this file id already

2477

# that were absent - where parent entries are - and they need to be

2478

# converted to relocated.

2479

if path_utf8 is None:

2480

raise AssertionError('no path')

2481

for entry_key in id_index.setdefault(key[2], set()):

2482

# TODO:PROFILING: It might be faster to just update

2483

# rather than checking if we need to, and then overwrite

2484

# the one we are located at.

2485

if entry_key != key:

2486

# this file id is at a different path in one of the

2487

# other trees, so put absent pointers there

2488

# This is the vertical axis in the matrix, all pointing

2489

# to the real path.

2490

block_index, present = self._find_block_index_from_key(entry_key)

2491

if not present:

2492

raise AssertionError('not present: %r', entry_key)

2493

entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])

2494

if not present:

2495

raise AssertionError('not present: %r', entry_key)

2496

self._dirblocks[block_index][1][entry_index][1][0] = \

2497

('r', path_utf8, 0, False, '')

2498

# add a containing dirblock if needed.

2499

if new_details[0] == 'd':

2500

subdir_key = (osutils.pathjoin(*key[0:2]), '', '')

2501

block_index, present = self._find_block_index_from_key(subdir_key)

2502

if not present:

2503

self._dirblocks.insert(block_index, (subdir_key[0], []))

2504

2505

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2506

2507

def _validate(self):

2508

"""Check that invariants on the dirblock are correct.

2509

2510

This can be useful in debugging; it shouldn't be necessary in

2511

normal code.

2512

2513

This must be called with a lock held.

2514

"""

2515

# NOTE: This must always raise AssertionError not just assert,

2516

# otherwise it may not behave properly under python -O

2517

2518

# TODO: All entries must have some content that's not 'a' or 'r',

2519

# otherwise it could just be removed.

2520

2521

# TODO: All relocations must point directly to a real entry.

2522

2523

# TODO: No repeated keys.

2524

2525

# -- mbp 20070325

2526

from pprint import pformat

2527

self._read_dirblocks_if_needed()

2528

if len(self._dirblocks) > 0:

2529

if not self._dirblocks[0][0] == '':

2530

raise AssertionError(

2531

"dirblocks don't start with root block:\n" + \

2532

pformat(self._dirblocks))

2533

if len(self._dirblocks) > 1:

2534

if not self._dirblocks[1][0] == '':

2535

raise AssertionError(

2536

"dirblocks missing root directory:\n" + \

2537

pformat(self._dirblocks))

2538

# the dirblocks are sorted by their path components, name, and dir id

2539

dir_names = [d[0].split('/')

2540

for d in self._dirblocks[1:]]

2541

if dir_names != sorted(dir_names):

2542

raise AssertionError(

2543

"dir names are not in sorted order:\n" + \

2544

pformat(self._dirblocks) + \

2545

"\nkeys:\n" +

2546

pformat(dir_names))

2547

for dirblock in self._dirblocks:

2548

# within each dirblock, the entries are sorted by filename and

2549

# then by id.

2550

for entry in dirblock[1]:

2551

if dirblock[0] != entry[0][0]:

2552

raise AssertionError(

2553

"entry key for %r"

2554

"doesn't match directory name in\n%r" %

2555

(entry, pformat(dirblock)))

2556

if dirblock[1] != sorted(dirblock[1]):

2557

raise AssertionError(

2558

"dirblock for %r is not sorted:\n%s" % \

2559

(dirblock[0], pformat(dirblock)))

2560

2561

def check_valid_parent():

2562

"""Check that the current entry has a valid parent.

2563

2564

This makes sure that the parent has a record,

2565

and that the parent isn't marked as "absent" in the

2566

current tree. (It is invalid to have a non-absent file in an absent

2567

directory.)

2568

"""

2569

if entry[0][0:2] == ('', ''):

2570

# There should be no parent for the root row

2571

return

2572

parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0])

2573

if parent_entry == (None, None):

2574

raise AssertionError(

2575

"no parent entry for: %s in tree %s"

2576

% (this_path, tree_index))

2577

if parent_entry[1][tree_index][0] != 'd':

2578

raise AssertionError(

2579

"Parent entry for %s is not marked as a valid"

2580

" directory. %s" % (this_path, parent_entry,))

2581

2582

# For each file id, for each tree: either

2583

# the file id is not present at all; all rows with that id in the

2584

# key have it marked as 'absent'

2585

# OR the file id is present under exactly one name; any other entries

2586

# that mention that id point to the correct name.

2587

2588

# We check this with a dict per tree pointing either to the present

2589

# name, or None if absent.

2590

tree_count = self._num_present_parents() + 1

2591

id_path_maps = [dict() for i in range(tree_count)]

2592

# Make sure that all renamed entries point to the correct location.

2593

for entry in self._iter_entries():

2594

file_id = entry[0][2]

2595

this_path = osutils.pathjoin(entry[0][0], entry[0][1])

2596

if len(entry[1]) != tree_count:

2597

raise AssertionError(

2598

"wrong number of entry details for row\n%s" \

2599

",\nexpected %d" % \

2600

(pformat(entry), tree_count))

2601

absent_positions = 0

2602

for tree_index, tree_state in enumerate(entry[1]):

2603

this_tree_map = id_path_maps[tree_index]

2604

minikind = tree_state[0]

2605

if minikind in 'ar':

2606

absent_positions += 1

2607

# have we seen this id before in this column?

2608

if file_id in this_tree_map:

2609

previous_path, previous_loc = this_tree_map[file_id]

2610

# any later mention of this file must be consistent with

2611

# what was said before

2612

if minikind == 'a':

2613

if previous_path is not None:

2614

raise AssertionError(

2615

"file %s is absent in row %r but also present " \

2616

"at %r"% \

2617

(file_id, entry, previous_path))

2618

elif minikind == 'r':

2619

target_location = tree_state[1]

2620

if previous_path != target_location:

2621

raise AssertionError(

2622

"file %s relocation in row %r but also at %r" \

2623

% (file_id, entry, previous_path))

2624

else:

2625

# a file, directory, etc - may have been previously

2626

# pointed to by a relocation, which must point here

2627

if previous_path != this_path:

2628

raise AssertionError(

2629

"entry %r inconsistent with previous path %r "

2630

"seen at %r" %

2631

(entry, previous_path, previous_loc))

2632

check_valid_parent()

2633

else:

2634

if minikind == 'a':

2635

# absent; should not occur anywhere else

2636

this_tree_map[file_id] = None, this_path

2637

elif minikind == 'r':

2638

# relocation, must occur at expected location

2639

this_tree_map[file_id] = tree_state[1], this_path

2640

else:

2641

this_tree_map[file_id] = this_path, this_path

2642

check_valid_parent()

2643

if absent_positions == tree_count:

2644

raise AssertionError(

2645

"entry %r has no data for any tree." % (entry,))

2646

2647

def _wipe_state(self):

2648

"""Forget all state information about the dirstate."""

2649

self._header_state = DirState.NOT_IN_MEMORY

2650

self._dirblock_state = DirState.NOT_IN_MEMORY

2651

self._changes_aborted = False

2652

self._parents = []

2653

self._ghosts = []

2654

self._dirblocks = []

2655

self._id_index = None

2656

self._packed_stat_index = None

2657

self._end_of_header = None

2658

self._cutoff_time = None

2659

self._split_path_cache = {}

2660

2661

def lock_read(self):

2662

"""Acquire a read lock on the dirstate."""

2663

if self._lock_token is not None:

2664

raise errors.LockContention(self._lock_token)

2665

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2666

# already in memory, we could read just the header and check for

2667

# any modification. If not modified, we can just leave things

2668

# alone

2669

self._lock_token = lock.ReadLock(self._filename)

2670

self._lock_state = 'r'

2671

self._state_file = self._lock_token.f

2672

self._wipe_state()

2673

2674

def lock_write(self):

2675

"""Acquire a write lock on the dirstate."""

2676

if self._lock_token is not None:

2677

raise errors.LockContention(self._lock_token)

2678

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2679

# already in memory, we could read just the header and check for

2680

# any modification. If not modified, we can just leave things

2681

# alone

2682

self._lock_token = lock.WriteLock(self._filename)

2683

self._lock_state = 'w'

2684

self._state_file = self._lock_token.f

2685

self._wipe_state()

2686

2687

def unlock(self):

2688

"""Drop any locks held on the dirstate."""

2689

if self._lock_token is None:

2690

raise errors.LockNotHeld(self)

2691

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2692

# already in memory, we could read just the header and check for

2693

# any modification. If not modified, we can just leave things

2694

# alone

2695

self._state_file = None

2696

self._lock_state = None

2697

self._lock_token.unlock()

2698

self._lock_token = None

2699

self._split_path_cache = {}

2700

2701

def _requires_lock(self):

2702

"""Check that a lock is currently held by someone on the dirstate."""

2703

if not self._lock_token:

2704

raise errors.ObjectNotLocked(self)

2705

2706

2707

def py_update_entry(self, entry, abspath, stat_value,

2708

_stat_to_minikind=DirState._stat_to_minikind,

2709

_pack_stat=pack_stat):

2710

"""Update the entry based on what is actually on disk.

2711

2712

:param entry: This is the dirblock entry for the file in question.

2713

:param abspath: The path on disk for this file.

2714

:param stat_value: (optional) if we already have done a stat on the

2715

file, re-use it.

2716

:return: The sha1 hexdigest of the file (40 bytes) or link target of a

2717

symlink.

2718

"""

2719

try:

2720

minikind = _stat_to_minikind[stat_value.st_mode & 0170000]

2721

except KeyError:

2722

# Unhandled kind

2723

return None

2724

packed_stat = _pack_stat(stat_value)

2725

(saved_minikind, saved_link_or_sha1, saved_file_size,

2726

saved_executable, saved_packed_stat) = entry[1][0]

2727

2728

if (minikind == saved_minikind

2729

and packed_stat == saved_packed_stat):

2730

# The stat hasn't changed since we saved, so we can re-use the

2731

# saved sha hash.

2732

if minikind == 'd':

2733

return None

2734

2735

# size should also be in packed_stat

2736

if saved_file_size == stat_value.st_size:

2737

return saved_link_or_sha1

2738

2739

# If we have gotten this far, that means that we need to actually

2740

# process this entry.

2741

link_or_sha1 = None

2742

if minikind == 'f':

2743

link_or_sha1 = self._sha1_file(abspath)

2744

executable = self._is_executable(stat_value.st_mode,

2745

saved_executable)

2746

if self._cutoff_time is None:

2747

self._sha_cutoff_time()

2748

if (stat_value.st_mtime < self._cutoff_time

2749

and stat_value.st_ctime < self._cutoff_time):

2750

entry[1][0] = ('f', link_or_sha1, stat_value.st_size,

2751

executable, packed_stat)

2752

else:

2753

entry[1][0] = ('f', '', stat_value.st_size,

2754

executable, DirState.NULLSTAT)

2755

elif minikind == 'd':

2756

link_or_sha1 = None

2757

entry[1][0] = ('d', '', 0, False, packed_stat)

2758

if saved_minikind != 'd':

2759

# This changed from something into a directory. Make sure we

2760

# have a directory block for it. This doesn't happen very

2761

# often, so this doesn't have to be super fast.

2762

block_index, entry_index, dir_present, file_present = \

2763

self._get_block_entry_index(entry[0][0], entry[0][1], 0)

2764

self._ensure_block(block_index, entry_index,

2765

osutils.pathjoin(entry[0][0], entry[0][1]))

2766

elif minikind == 'l':

2767

link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)

2768

if self._cutoff_time is None:

2769

self._sha_cutoff_time()

2770

if (stat_value.st_mtime < self._cutoff_time

2771

and stat_value.st_ctime < self._cutoff_time):

2772

entry[1][0] = ('l', link_or_sha1, stat_value.st_size,

2773

False, packed_stat)

2774

else:

2775

entry[1][0] = ('l', '', stat_value.st_size,

2776

False, DirState.NULLSTAT)

2777

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2778

return link_or_sha1

2779

2780

update_entry = py_update_entry

2781

2782

2783

class ProcessEntryPython(object):

2784

2785

__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",

2786

"last_source_parent", "last_target_parent", "include_unchanged",

2787

"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",

2788

"search_specific_files"]

2789

2790

def __init__(self, include_unchanged, use_filesystem_for_exec,

2791

search_specific_files):

2792

self.old_dirname_to_file_id = {}

2793

self.new_dirname_to_file_id = {}

2794

# Just a sentry, so that _process_entry can say that this

2795

# record is handled, but isn't interesting to process (unchanged)

2796

self.uninteresting = object()

2797

# Using a list so that we can access the values and change them in

2798

# nested scope. Each one is [path, file_id, entry]

2799

self.last_source_parent = [None, None]

2800

self.last_target_parent = [None, None]

2801

self.include_unchanged = include_unchanged

2802

self.use_filesystem_for_exec = use_filesystem_for_exec

2803

self.utf8_decode = cache_utf8._utf8_decode

2804

# for all search_indexs in each path at or under each element of

2805

# search_specific_files, if the detail is relocated: add the id, and add the

2806

# relocated path as one to search if its not searched already. If the

2807

# detail is not relocated, add the id.

2808

self.searched_specific_files = set()

2809

self.search_specific_files = search_specific_files

2810

2811

def _process_entry(self, entry, path_info, source_index, target_index, state):

2812

"""Compare an entry and real disk to generate delta information.

2813

2814

:param path_info: top_relpath, basename, kind, lstat, abspath for

2815

the path of entry. If None, then the path is considered absent.

2816

(Perhaps we should pass in a concrete entry for this ?)

2817

Basename is returned as a utf8 string because we expect this

2818

tuple will be ignored, and don't want to take the time to

2819

decode.

2820

:return: None if these don't match

2821

A tuple of information about the change, or

2822

the object 'uninteresting' if these match, but are

2823

basically identical.

2824

"""

2825

if source_index is None:

2826

source_details = DirState.NULL_PARENT_DETAILS

2827

else:

2828

source_details = entry[1][source_index]

2829

target_details = entry[1][target_index]

2830

target_minikind = target_details[0]

2831

if path_info is not None and target_minikind in 'fdlt':

2832

if not (target_index == 0):

2833

raise AssertionError()

2834

link_or_sha1 = update_entry(state, entry,

2835

abspath=path_info[4], stat_value=path_info[3])

2836

# The entry may have been modified by update_entry

2837

target_details = entry[1][target_index]

2838

target_minikind = target_details[0]

2839

else:

2840

link_or_sha1 = None

2841

file_id = entry[0][2]

2842

source_minikind = source_details[0]

2843

if source_minikind in 'fdltr' and target_minikind in 'fdlt':

2844

# claimed content in both: diff

2845

# r | fdlt | | add source to search, add id path move and perform

2846

# | | | diff check on source-target

2847

# r | fdlt | a | dangling file that was present in the basis.

2848

# | | | ???

2849

if source_minikind in 'r':

2850

# add the source to the search path to find any children it

2851

# has. TODO ? : only add if it is a container ?

2852

if not osutils.is_inside_any(self.searched_specific_files,

2853

source_details[1]):

2854

search_specific_files.add(source_details[1])

2855

# generate the old path; this is needed for stating later

2856

# as well.

2857

old_path = source_details[1]

2858

old_dirname, old_basename = os.path.split(old_path)

2859

path = pathjoin(entry[0][0], entry[0][1])

2860

old_entry = state._get_entry(source_index,

2861

path_utf8=old_path)

2862

# update the source details variable to be the real

2863

# location.

2864

if old_entry == (None, None):

2865

raise errors.CorruptDirstate(state._filename,

2866

"entry '%s/%s' is considered renamed from %r"

2867

" but source does not exist\n"

2868

"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))

2869

source_details = old_entry[1][source_index]

2870

source_minikind = source_details[0]

2871

else:

2872

old_dirname = entry[0][0]

2873

old_basename = entry[0][1]

2874

old_path = path = None

2875

if path_info is None:

2876

# the file is missing on disk, show as removed.

2877

content_change = True

2878

target_kind = None

2879

target_exec = False

2880

else:

2881

# source and target are both versioned and disk file is present.

2882

target_kind = path_info[2]

2883

if target_kind == 'directory':

2884

if path is None:

2885

old_path = path = pathjoin(old_dirname, old_basename)

2886

self.new_dirname_to_file_id[path] = file_id

2887

if source_minikind != 'd':

2888

content_change = True

2889

else:

2890

# directories have no fingerprint

2891

content_change = False

2892

target_exec = False

2893

elif target_kind == 'file':

2894

if source_minikind != 'f':

2895

content_change = True

2896

else:

2897

# We could check the size, but we already have the

2898

# sha1 hash.

2899

content_change = (link_or_sha1 != source_details[1])

2900

# Target details is updated at update_entry time

2901

if self.use_filesystem_for_exec:

2902

# We don't need S_ISREG here, because we are sure

2903

# we are dealing with a file.

2904

target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)

2905

else:

2906

target_exec = target_details[3]

2907

elif target_kind == 'symlink':

2908

if source_minikind != 'l':

2909

content_change = True

2910

else:

2911

content_change = (link_or_sha1 != source_details[1])

2912

target_exec = False

2913

elif target_kind == 'tree-reference':

2914

if source_minikind != 't':

2915

content_change = True

2916

else:

2917

content_change = False

2918

target_exec = False

2919

else:

2920

raise Exception, "unknown kind %s" % path_info[2]

2921

if source_minikind == 'd':

2922

if path is None:

2923

old_path = path = pathjoin(old_dirname, old_basename)

2924

self.old_dirname_to_file_id[old_path] = file_id

2925

# parent id is the entry for the path in the target tree

2926

if old_dirname == self.last_source_parent[0]:

2927

source_parent_id = self.last_source_parent[1]

2928

else:

2929

try:

2930

source_parent_id = self.old_dirname_to_file_id[old_dirname]

2931

except KeyError:

2932

source_parent_entry = state._get_entry(source_index,

2933

path_utf8=old_dirname)

2934

source_parent_id = source_parent_entry[0][2]

2935

if source_parent_id == entry[0][2]:

2936

# This is the root, so the parent is None

2937

source_parent_id = None

2938

else:

2939

self.last_source_parent[0] = old_dirname

2940

self.last_source_parent[1] = source_parent_id

2941

new_dirname = entry[0][0]

2942

if new_dirname == self.last_target_parent[0]:

2943

target_parent_id = self.last_target_parent[1]

2944

else:

2945

try:

2946

target_parent_id = self.new_dirname_to_file_id[new_dirname]

2947

except KeyError:

2948

# TODO: We don't always need to do the lookup, because the

2949

# parent entry will be the same as the source entry.

2950

target_parent_entry = state._get_entry(target_index,

2951

path_utf8=new_dirname)

2952

if target_parent_entry == (None, None):

2953

raise AssertionError(

2954

"Could not find target parent in wt: %s\nparent of: %s"

2955

% (new_dirname, entry))

2956

target_parent_id = target_parent_entry[0][2]

2957

if target_parent_id == entry[0][2]:

2958

# This is the root, so the parent is None

2959

target_parent_id = None

2960

else:

2961

self.last_target_parent[0] = new_dirname

2962

self.last_target_parent[1] = target_parent_id

2963

2964

source_exec = source_details[3]

2965

if (self.include_unchanged

2966

or content_change

2967

or source_parent_id != target_parent_id

2968

or old_basename != entry[0][1]

2969

or source_exec != target_exec

2970

2971

if old_path is None:

2972

old_path = path = pathjoin(old_dirname, old_basename)

2973

old_path_u = self.utf8_decode(old_path)[0]

2974

path_u = old_path_u

2975

else:

2976

old_path_u = self.utf8_decode(old_path)[0]

2977

if old_path == path:

2978

path_u = old_path_u

2979

else:

2980

path_u = self.utf8_decode(path)[0]

2981

source_kind = DirState._minikind_to_kind[source_minikind]

2982

return (entry[0][2],

2983

(old_path_u, path_u),

2984

content_change,

2985

(True, True),

2986

(source_parent_id, target_parent_id),

2987

(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),

2988

(source_kind, target_kind),

2989

(source_exec, target_exec))

2990

else:

2991

return self.uninteresting

2992

elif source_minikind in 'a' and target_minikind in 'fdlt':

2993

# looks like a new file

2994

path = pathjoin(entry[0][0], entry[0][1])

2995

# parent id is the entry for the path in the target tree

2996

# TODO: these are the same for an entire directory: cache em.

2997

parent_id = state._get_entry(target_index,

2998

path_utf8=entry[0][0])[0][2]

2999

if parent_id == entry[0][2]:

3000

parent_id = None

3001

if path_info is not None:

3002

# Present on disk:

3003

if self.use_filesystem_for_exec:

3004

# We need S_ISREG here, because we aren't sure if this

3005

# is a file or not.

3006

target_exec = bool(

3007

stat.S_ISREG(path_info[3].st_mode)

3008

and stat.S_IEXEC & path_info[3].st_mode)

3009

else:

3010

target_exec = target_details[3]

3011

return (entry[0][2],

3012

(None, self.utf8_decode(path)[0]),

3013

True,

3014

(False, True),

3015

(None, parent_id),

3016

(None, self.utf8_decode(entry[0][1])[0]),

3017

(None, path_info[2]),

3018

(None, target_exec))

3019

else:

3020

# Its a missing file, report it as such.

3021

return (entry[0][2],

3022

(None, self.utf8_decode(path)[0]),

3023

False,

3024

(False, True),

3025

(None, parent_id),

3026

(None, self.utf8_decode(entry[0][1])[0]),

3027

(None, None),

3028

(None, False))

3029

elif source_minikind in 'fdlt' and target_minikind in 'a':

3030

# unversioned, possibly, or possibly not deleted: we dont care.

3031

# if its still on disk, *and* theres no other entry at this

3032

# path [we dont know this in this routine at the moment -

3033

# perhaps we should change this - then it would be an unknown.

3034

old_path = pathjoin(entry[0][0], entry[0][1])

3035

# parent id is the entry for the path in the target tree

3036

parent_id = state._get_entry(source_index, path_utf8=entry[0][0])[0][2]

3037

if parent_id == entry[0][2]:

3038

parent_id = None

3039

return (entry[0][2],

3040

(self.utf8_decode(old_path)[0], None),

3041

True,

3042

(True, False),

3043

(parent_id, None),

3044

(self.utf8_decode(entry[0][1])[0], None),

3045

(DirState._minikind_to_kind[source_minikind], None),

3046

(source_details[3], None))

3047

elif source_minikind in 'fdlt' and target_minikind in 'r':

3048

# a rename; could be a true rename, or a rename inherited from

3049

# a renamed parent. TODO: handle this efficiently. Its not

3050

# common case to rename dirs though, so a correct but slow

3051

# implementation will do.

3052

if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):

3053

search_specific_files.add(target_details[1])

3054

elif source_minikind in 'ra' and target_minikind in 'ra':

3055

# neither of the selected trees contain this file,

3056

# so skip over it. This is not currently directly tested, but

3057

# is indirectly via test_too_much.TestCommands.test_conflicts.

3058

pass

3059

else:

3060

raise AssertionError("don't know how to compare "

3061

"source_minikind=%r, target_minikind=%r"

3062

% (source_minikind, target_minikind))

3063

## import pdb;pdb.set_trace()

3064

return None

3065

_process_entry = ProcessEntryPython

3066

3067

3068

# Try to load the compiled form if possible

3069

try:

3070

from bzrlib._dirstate_helpers_c import (

3071

_read_dirblocks_c as _read_dirblocks,

3072

bisect_dirblock_c as bisect_dirblock,

3073

_bisect_path_left_c as _bisect_path_left,

3074

_bisect_path_right_c as _bisect_path_right,

3075

cmp_by_dirs_c as cmp_by_dirs,

3076

update_entry as update_entry,

3077

ProcessEntryC as _process_entry,

3078

)

3079

except ImportError:

3080

from bzrlib._dirstate_helpers_py import (

3081

_read_dirblocks_py as _read_dirblocks,

3082

bisect_dirblock_py as bisect_dirblock,

3083

_bisect_path_left_py as _bisect_path_left,

3084

_bisect_path_right_py as _bisect_path_right,

3085

cmp_by_dirs_py as cmp_by_dirs,

3086

)

Older »