/brz/remove-bazaar : revision 2255.10.2

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: John Arbash Meinel
Date: 2007-03-01 21:56:19 UTC
mto: (2255.7.84 dirstate)
mto: This revision was merged to the branch mainline in revision 2322.
Revision ID: john@arbash-meinel.com-20070301215619-wpt6kz8yem3ypu1b

Update to dirstate locking.
Move all of WT4.lock_* functions locally, so that they can
properly interact and cleanup around when we lock/unlock the
dirstate file.
Change all Lock objects to be non-blocking. So that if someone
grabs a lock on the DirState we find out immediately, rather
than blocking.
Change WT4.unlock() so that if the dirstate is dirty, it will
save the contents even if it only has a read lock.
It does this by trying to take a write lock, if it fails
we just ignore it. If it succeeds, then we can flush to disk.
This is more important now that DirState tracks file changes.
It allows 'bzr status' to update the cached stat and sha values.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""DirState objects record the state of a directory and its bzr metadata.

Pseudo EBNF grammar for the state file. Fields are separated by NULLs, and

lines by NL. The field delimiters are ommitted in the grammar, line delimiters

are not - this is done for clarity of reading. All string data is in utf8.

MINIKIND = "f" | "d" | "l" | "a" | "r";

NL = "\n";

NULL = "\0";

WHOLE_NUMBER = {digit}, digit;

BOOLEAN = "y" | "n";

REVISION_ID = a non-empty utf8 string;

dirstate format = header line, full checksum, row count, parent details,

ghost_details, entries;

header line = "#bazaar dirstate flat format 2", NL;

full checksum = "adler32: ", ["-"], WHOLE_NUMBER, NL;

row count = "num_entries: ", digit, NL;

parent_details = WHOLE NUMBER, {REVISION_ID}* NL;

ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;

entries = {entry};

entry = entry_key, current_entry_details, {parent_entry_details};

entry_key = dirname, basename, fileid;

current_entry_details = common_entry_details, working_entry_details;

parent_entry_details = common_entry_details, history_entry_details;

common_entry_details = MINIKIND, fingerprint, size, executable

working_entry_details = packed_stat

history_entry_details = REVISION_ID;

executable = BOOLEAN;

size = WHOLE_NUMBER;

fingerprint = a nonempty utf8 sequence with meaning defined by minikind.

Given this definition, the following is useful to know:

entry (aka row) - all the data for a given key.

entry[0]: The key (dirname, basename, fileid)

entry[0][0]: dirname

entry[0][1]: basename

entry[0][2]: fileid

entry[1]: The tree(s) data for this path and id combination.

entry[1][0]: The current tree

entry[1][1]: The second tree

For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:

entry[1][0][0]: minikind

entry[1][0][1]: fingerprint

entry[1][0][2]: size

entry[1][0][3]: executable

entry[1][0][4]: packed_stat

OR (for non tree-0)

entry[1][1][4]: revision_id

There may be multiple rows at the root, one per id present in the root, so the

in memory root row is now:

self._dirblocks[0] -> ('', [entry ...]),

and the entries in there are

entries[0][0]: ''

entries[0][1]: ''

entries[0][2]: file_id

entries[1][0]: The tree data for the current tree for this fileid at /

etc.

Kinds:

'r' is a relocated entry: This path is not present in this tree with this id,

but the id can be found at another location. The fingerprint is used to

point to the target location.

'a' is an absent entry: In that tree the id is not present at this path.

'd' is a directory entry: This path in this tree is a directory with the

current file id. There is no fingerprint for directories.

'f' is a file entry: As for directory, but its a file. The fingerprint is a

sha1 value.

'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the

link target.

--- Format 1 had the following different definition: ---

rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,

WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,

{PARENT ROW}

PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,

basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,

SHA1

PARENT ROW's are emitted for every parent that is not in the ghosts details

100

line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then

101

each row will have a PARENT ROW for foo and baz, but not for bar.

102

103

104

In any tree, a kind of 'moved' indicates that the fingerprint field

105

(which we treat as opaque data specific to the 'kind' anyway) has the

106

details for the id of this row in that tree.

107

108

I'm strongly tempted to add a id->path index as well, but I think that

109

where we need id->path mapping; we also usually read the whole file, so

110

I'm going to skip that for the moment, as we have the ability to locate

111

via bisect any path in any tree, and if we lookup things by path, we can

112

accumulate a id->path mapping as we go, which will tend to match what we

113

looked for.

114

115

I plan to implement this asap, so please speak up now to alter/tweak the

116

design - and once we stabilise on this, I'll update the wiki page for

117

it.

118

119

The rationale for all this is that we want fast operations for the

120

common case (diff/status/commit/merge on all files) and extremely fast

121

operations for the less common but still occurs a lot status/diff/commit

122

on specific files). Operations on specific files involve a scan for all

123

the children of a path, *in every involved tree*, which the current

124

format did not accommodate.

125

----

126

127

Design priorities:

128

1) Fast end to end use for bzr's top 5 uses cases. (commmit/diff/status/merge/???)

129

2) fall back current object model as needed.

130

3) scale usably to the largest trees known today - say 50K entries. (mozilla

131

is an example of this)

132

133

134

Locking:

135

Eventually reuse dirstate objects across locks IFF the dirstate file has not

136

been modified, but will require that we flush/ignore cached stat-hit data

137

because we wont want to restat all files on disk just because a lock was

138

acquired, yet we cannot trust the data after the previous lock was released.

139

140

Memory representation:

141

vector of all directories, and vector of the childen ?

142

i.e.

143

root_entrie = (direntry for root, [parent_direntries_for_root]),

144

dirblocks = [

145

('', ['data for achild', 'data for bchild', 'data for cchild'])

146

('dir', ['achild', 'cchild', 'echild'])

147

]

148

- single bisect to find N subtrees from a path spec

149

- in-order for serialisation - this is 'dirblock' grouping.

150

- insertion of a file '/a' affects only the '/' child-vector, that is, to

151

insert 10K elements from scratch does not generates O(N^2) memoves of a

152

single vector, rather each individual, which tends to be limited to a

153

manageable number. Will scale badly on trees with 10K entries in a

154

single directory. compare with Inventory.InventoryDirectory which has

155

a dictionary for the children. No bisect capability, can only probe for

156

exact matches, or grab all elements and sorta.

157

- Whats the risk of error here? Once we have the base format being processed

158

we should have a net win regardless of optimality. So we are going to

159

go with what seems reasonably.

160

open questions:

161

162

maybe we should do a test profile of these core structure - 10K simulated searches/lookups/etc?

163

164

Objects for each row?

165

The lifetime of Dirstate objects is current per lock, but see above for

166

possible extensions. The lifetime of a row from a dirstate is expected to be

167

very short in the optimistic case: which we are optimising for. For instance,

168

subtree status will determine from analysis of the disk data what rows need to

169

be examined at all, and will be able to determine from a single row whether

170

that file has altered or not, so we are aiming to process tens of thousands of

171

entries each second within the dirstate context, before exposing anything to

172

the larger codebase. This suggests we want the time for a single file

173

comparison to be < 0.1 milliseconds. That would give us 10000 paths per second

174

processed, and to scale to 100 thousand we'll another order of magnitude to do

175

that. Now, as the lifetime for all unchanged entries is the time to parse, stat

176

the file on disk, and then immediately discard, the overhead of object creation

177

becomes a significant cost.

178

179

Figures: Creating a tuple from from 3 elements was profiled at 0.0625

180

microseconds, whereas creating a object which is subclassed from tuple was

181

0.500 microseconds, and creating an object with 3 elements and slots was 3

182

microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get

183

down to 10 microseconds for the total processing - having 33% of that be object

184

creation is a huge overhead. There is a potential cost in using tuples within

185

each row which is that the conditional code to do comparisons may be slower

186

than method invocation, but method invocation is known to be slow due to stack

187

frame creation, so avoiding methods in these tight inner loops in unfortunately

188

desirable. We can consider a pyrex version of this with objects in future if

189

desired.

190

191

"""

192

193

194

import bisect

195

import codecs

196

import cStringIO

197

import errno

198

import os

199

import sha

200

import struct

201

import time

202

import zlib

203

204

from bzrlib import (

205

errors,

206

inventory,

207

lock,

208

osutils,

209

trace,

210

)

211

from bzrlib.osutils import (

212

pathjoin,

213

sha_file,

214

sha_string,

215

walkdirs,

216

)

217

218

219

class _Bisector(object):

220

"""This just keeps track of information as we are bisecting."""

221

222

223

class DirState(object):

224

"""Record directory and metadata state for fast access.

225

226

A dirstate is a specialised data structure for managing local working

227

tree state information. Its not yet well defined whether it is platform

228

specific, and if it is how we detect/parameterise that.

229

"""

230

231

_kind_to_minikind = {'absent':'a', 'file':'f', 'directory':'d', 'relocated':'r', 'symlink':'l'}

232

_minikind_to_kind = {'a':'absent', 'f':'file', 'd':'directory', 'l':'symlink', 'r':'relocated'}

233

_to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain

234

# of using int conversion rather than a dict here. AND BLAME ANDREW IF

235

# it is faster.

236

237

# TODO: jam 20070221 Make sure we handle when there are duplicated records

238

# (like when we remove + add the same path, or we have a rename)

239

# TODO: jam 20070221 Figure out what to do if we have a record that exceeds

240

# the BISECT_PAGE_SIZE. For now, we just have to make it large enough

241

# that we are sure a single record will always fit.

242

BISECT_PAGE_SIZE = 4096

243

244

NOT_IN_MEMORY = 0

245

IN_MEMORY_UNMODIFIED = 1

246

IN_MEMORY_MODIFIED = 2

247

248

# A pack_stat (the x's) that is just noise and will never match the output

249

# of base64 encode.

250

NULLSTAT = 'x' * 32

251

NULL_PARENT_DETAILS = ('a', '', 0, False, '')

252

253

HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'

254

HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'

255

256

def __init__(self, path):

257

"""Create a DirState object.

258

259

Attributes of note:

260

261

:attr _root_entrie: The root row of the directory/file information,

262

- contains the path to / - '', ''

263

- kind of 'directory',

264

- the file id of the root in utf8

265

- size of 0

266

- a packed state

267

- and no sha information.

268

:param path: The path at which the dirstate file on disk should live.

269

"""

270

# _header_state and _dirblock_state represent the current state

271

# of the dirstate metadata and the per-row data respectiely.

272

# NOT_IN_MEMORY indicates that no data is in memory

273

# IN_MEMORY_UNMODIFIED indicates that what we have in memory

274

# is the same as is on disk

275

# IN_MEMORY_MODIFIED indicates that we have a modified version

276

# of what is on disk.

277

# In future we will add more granularity, for instance _dirblock_state

278

# will probably support partially-in-memory as a separate variable,

279

# allowing for partially-in-memory unmodified and partially-in-memory

280

# modified states.

281

self._header_state = DirState.NOT_IN_MEMORY

282

self._dirblock_state = DirState.NOT_IN_MEMORY

283

self._dirblocks = []

284

self._ghosts = []

285

self._parents = []

286

self._state_file = None

287

self._filename = path

288

self._lock_token = None

289

self._lock_state = None

290

self._id_index = None

291

self._end_of_header = None

292

self._split_path_cache = {}

293

self._bisect_page_size = DirState.BISECT_PAGE_SIZE

294

295

def add(self, path, file_id, kind, stat, link_or_sha1):

296

"""Add a path to be tracked.

297

298

:param path: The path within the dirstate - '' is the root, 'foo' is the

299

path foo within the root, 'foo/bar' is the path bar within foo

300

within the root.

301

:param file_id: The file id of the path being added.

302

:param kind: The kind of the path.

303

:param stat: The output of os.lstat for the path.

304

:param link_or_sha1: The sha value of the file, or the target of a

305

symlink. '' for directories.

306

"""

307

# adding a file:

308

# find the block its in.

309

# find the location in the block.

310

# check its not there

311

# add it.

312

#------- copied from inventory.make_entry

313

# --- normalized_filename wants a unicode basename only, so get one.

314

dirname, basename = osutils.split(path)

315

# we dont import normalized_filename directly because we want to be

316

# able to change the implementation at runtime for tests.

317

norm_name, can_access = osutils.normalized_filename(basename)

318

if norm_name != basename:

319

if can_access:

320

basename = norm_name

321

else:

322

raise errors.InvalidNormalization(path)

323

# now that we've normalised, we need the correct utf8 path and

324

# dirname and basename elements. This single encode and split should be

325

# faster than three separate encodes.

326

utf8path = (dirname + '/' + basename).strip('/').encode('utf8')

327

dirname, basename = osutils.split(utf8path)

328

assert file_id.__class__ == str, \

329

"must be a utf8 file_id not %s" % (type(file_id))

330

# Make sure the file_id does not exist in this tree

331

file_id_entry = self._get_entry(0, fileid_utf8=file_id)

332

if file_id_entry != (None, None):

333

path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])

334

kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]

335

info = '%s:%s' % (kind, path)

336

raise errors.DuplicateFileId(file_id, info)

337

first_key = (dirname, basename, '')

338

block_index, present = self._find_block_index_from_key(first_key)

339

if present:

340

# check the path is not in the tree

341

block = self._dirblocks[block_index][1]

342

entry_index, _ = self._find_entry_index(first_key, block)

343

while (entry_index < len(block) and

344

block[entry_index][0][0:2] == first_key[0:2]):

345

if block[entry_index][1][0][0] not in 'ar':

346

# this path is in the dirstate in the current tree.

347

raise Exception, "adding already added path!"

348

entry_index += 1

349

else:

350

# The block where we want to put the file is not present. But it

351

# might be because the directory was empty, or not loaded yet. Look

352

# for a parent entry, if not found, raise NotVersionedError

353

parent_dir, parent_base = osutils.split(dirname)

354

parent_block_idx, parent_entry_idx, _, parent_present = \

355

self._get_block_entry_index(parent_dir, parent_base, 0)

356

if not parent_present:

357

raise errors.NotVersionedError(path, str(self))

358

self._ensure_block(parent_block_idx, parent_entry_idx, dirname)

359

block = self._dirblocks[block_index][1]

360

entry_key = (dirname, basename, file_id)

361

if stat is None:

362

size = 0

363

packed_stat = DirState.NULLSTAT

364

else:

365

size = stat.st_size

366

packed_stat = pack_stat(stat)

367

parent_info = self._empty_parent_info()

368

minikind = DirState._kind_to_minikind[kind]

369

if kind == 'file':

370

entry_data = entry_key, [

371

(minikind, link_or_sha1, size, False, packed_stat),

372

] + parent_info

373

elif kind == 'directory':

374

entry_data = entry_key, [

375

(minikind, '', 0, False, packed_stat),

376

] + parent_info

377

elif kind == 'symlink':

378

entry_data = entry_key, [

379

(minikind, link_or_sha1, size, False, packed_stat),

380

] + parent_info

381

else:

382

raise errors.BzrError('unknown kind %r' % kind)

383

entry_index, present = self._find_entry_index(entry_key, block)

384

assert not present, "basename %r already added" % basename

385

block.insert(entry_index, entry_data)

386

387

if kind == 'directory':

388

# insert a new dirblock

389

self._ensure_block(block_index, entry_index, utf8path)

390

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

391

if self._id_index:

392

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

393

394

def _bisect(self, dir_name_list):

395

"""Bisect through the disk structure for specific rows.

396

397

:param dir_name_list: A list of (dir, name) pairs.

398

:return: A dict mapping (dir, name) => entry for found entries. Missing

399

entries will not be in the map.

400

"""

401

self._requires_lock()

402

# We need the file pointer to be right after the initial header block

403

self._read_header_if_needed()

404

# If _dirblock_state was in memory, we should just return info from

405

# there, this function is only meant to handle when we want to read

406

# part of the disk.

407

assert self._dirblock_state == DirState.NOT_IN_MEMORY

408

409

# The disk representation is generally info + '\0\n\0' at the end. But

410

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

411

# Because it means we can sync on the '\n'

412

state_file = self._state_file

413

file_size = os.fstat(state_file.fileno()).st_size

414

# We end up with 2 extra fields, we should have a trailing '\n' to

415

# ensure that we read the whole record, and we should have a precursur

416

# '' which ensures that we start after the previous '\n'

417

entry_field_count = self._fields_per_entry() + 1

418

419

low = self._end_of_header

420

high = file_size - 1 # Ignore the final '\0'

421

# Map from (dir, name) => entry

422

found = {}

423

424

# Avoid infinite seeking

425

max_count = 30*len(dir_name_list)

426

count = 0

427

# pending is a list of places to look.

428

# each entry is a tuple of low, high, dir_names

429

# low -> the first byte offset to read (inclusive)

430

# high -> the last byte offset (inclusive)

431

# dir_names -> The list of (dir, name) pairs that should be found in

432

# the [low, high] range

433

pending = [(low, high, dir_name_list)]

434

435

page_size = self._bisect_page_size

436

437

fields_to_entry = self._get_fields_to_entry()

438

439

while pending:

440

low, high, cur_files = pending.pop()

441

442

if not cur_files or low >= high:

443

# Nothing to find

444

continue

445

446

count += 1

447

if count > max_count:

448

raise errors.BzrError('Too many seeks, most likely a bug.')

449

450

mid = max(low, (low+high-page_size)/2)

451

452

state_file.seek(mid)

453

# limit the read size, so we don't end up reading data that we have

454

# already read.

455

read_size = min(page_size, (high-mid)+1)

456

block = state_file.read(read_size)

457

458

start = mid

459

entries = block.split('\n')

460

461

if len(entries) < 2:

462

# We didn't find a '\n', so we cannot have found any records.

463

# So put this range back and try again. But we know we have to

464

# increase the page size, because a single read did not contain

465

# a record break (so records must be larger than page_size)

466

page_size *= 2

467

pending.append((low, high, cur_files))

468

continue

469

470

# Check the first and last entries, in case they are partial, or if

471

# we don't care about the rest of this page

472

first_entry_num = 0

473

first_fields = entries[0].split('\0')

474

if len(first_fields) < entry_field_count:

475

# We didn't get the complete first entry

476

# so move start, and grab the next, which

477

# should be a full entry

478

start += len(entries[0])+1

479

first_fields = entries[1].split('\0')

480

first_entry_num = 1

481

482

if len(first_fields) <= 2:

483

# We didn't even get a filename here... what do we do?

484

# Try a large page size and repeat this query

485

page_size *= 2

486

pending.append((low, high, cur_files))

487

continue

488

else:

489

# Find what entries we are looking for, which occur before and

490

# after this first record.

491

after = start

492

first_dir_name = (first_fields[1], first_fields[2])

493

first_loc = bisect.bisect_left(cur_files, first_dir_name)

494

495

# These exist before the current location

496

pre = cur_files[:first_loc]

497

# These occur after the current location, which may be in the

498

# data we read, or might be after the last entry

499

post = cur_files[first_loc:]

500

501

if post and len(first_fields) >= entry_field_count:

502

# We have files after the first entry

503

504

# Parse the last entry

505

last_entry_num = len(entries)-1

506

last_fields = entries[last_entry_num].split('\0')

507

if len(last_fields) < entry_field_count:

508

# The very last hunk was not complete,

509

# read the previous hunk

510

after = mid + len(block) - len(entries[-1])

511

last_entry_num -= 1

512

last_fields = entries[last_entry_num].split('\0')

513

else:

514

after = mid + len(block)

515

516

last_dir_name = (last_fields[1], last_fields[2])

517

last_loc = bisect.bisect_right(post, last_dir_name)

518

519

middle_files = post[:last_loc]

520

post = post[last_loc:]

521

522

if middle_files:

523

# We have files that should occur in this block

524

# (>= first, <= last)

525

# Either we will find them here, or we can mark them as

526

# missing.

527

528

if middle_files[0] == first_dir_name:

529

# We might need to go before this location

530

pre.append(first_dir_name)

531

if middle_files[-1] == last_dir_name:

532

post.insert(0, last_dir_name)

533

534

# Find out what paths we have

535

paths = {first_dir_name:[first_fields]}

536

# last_dir_name might == first_dir_name so we need to be

537

# careful if we should append rather than overwrite

538

if last_entry_num != first_entry_num:

539

paths.setdefault(last_dir_name, []).append(last_fields)

540

for num in xrange(first_entry_num+1, last_entry_num):

541

# TODO: jam 20070223 We are already splitting here, so

542

# shouldn't we just split the whole thing rather

543

# than doing the split again in add_one_record?

544

fields = entries[num].split('\0')

545

dir_name = (fields[1], fields[2])

546

paths.setdefault(dir_name, []).append(fields)

547

548

for dir_name in middle_files:

549

for fields in paths.get(dir_name, []):

550

# offset by 1 because of the opening '\0'

551

# consider changing fields_to_entry to avoid the

552

# extra list slice

553

entry = fields_to_entry(fields[1:])

554

found.setdefault(dir_name, []).append(entry)

555

556

# Now we have split up everything into pre, middle, and post, and

557

# we have handled everything that fell in 'middle'.

558

# We add 'post' first, so that we prefer to seek towards the

559

# beginning, so that we will tend to go as early as we need, and

560

# then only seek forward after that.

561

if post:

562

pending.append((after, high, post))

563

if pre:

564

pending.append((low, start-1, pre))

565

566

# Consider that we may want to return the directory entries in sorted

567

# order. For now, we just return them in whatever order we found them,

568

# and leave it up to the caller if they care if it is ordered or not.

569

return found

570

571

def _bisect_dirblocks(self, dir_list):

572

"""Bisect through the disk structure to find entries in given dirs.

573

574

_bisect_dirblocks is meant to find the contents of directories, which

575

differs from _bisect, which only finds individual entries.

576

577

:param dir_list: An sorted list of directory names ['', 'dir', 'foo'].

578

:return: A map from dir => entries_for_dir

579

"""

580

# TODO: jam 20070223 A lot of the bisecting logic could be shared

581

# between this and _bisect. It would require parameterizing the

582

# inner loop with a function, though. We should evaluate the

583

# performance difference.

584

self._requires_lock()

585

# We need the file pointer to be right after the initial header block

586

self._read_header_if_needed()

587

# If _dirblock_state was in memory, we should just return info from

588

# there, this function is only meant to handle when we want to read

589

# part of the disk.

590

assert self._dirblock_state == DirState.NOT_IN_MEMORY

591

592

# The disk representation is generally info + '\0\n\0' at the end. But

593

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

594

# Because it means we can sync on the '\n'

595

state_file = self._state_file

596

file_size = os.fstat(state_file.fileno()).st_size

597

# We end up with 2 extra fields, we should have a trailing '\n' to

598

# ensure that we read the whole record, and we should have a precursur

599

# '' which ensures that we start after the previous '\n'

600

entry_field_count = self._fields_per_entry() + 1

601

602

low = self._end_of_header

603

high = file_size - 1 # Ignore the final '\0'

604

# Map from dir => entry

605

found = {}

606

607

# Avoid infinite seeking

608

max_count = 30*len(dir_list)

609

count = 0

610

# pending is a list of places to look.

611

# each entry is a tuple of low, high, dir_names

612

# low -> the first byte offset to read (inclusive)

613

# high -> the last byte offset (inclusive)

614

# dirs -> The list of directories that should be found in

615

# the [low, high] range

616

pending = [(low, high, dir_list)]

617

618

page_size = self._bisect_page_size

619

620

fields_to_entry = self._get_fields_to_entry()

621

622

while pending:

623

low, high, cur_dirs = pending.pop()

624

625

if not cur_dirs or low >= high:

626

# Nothing to find

627

continue

628

629

count += 1

630

if count > max_count:

631

raise errors.BzrError('Too many seeks, most likely a bug.')

632

633

mid = max(low, (low+high-page_size)/2)

634

635

state_file.seek(mid)

636

# limit the read size, so we don't end up reading data that we have

637

# already read.

638

read_size = min(page_size, (high-mid)+1)

639

block = state_file.read(read_size)

640

641

start = mid

642

entries = block.split('\n')

643

644

if len(entries) < 2:

645

# We didn't find a '\n', so we cannot have found any records.

646

# So put this range back and try again. But we know we have to

647

# increase the page size, because a single read did not contain

648

# a record break (so records must be larger than page_size)

649

page_size *= 2

650

pending.append((low, high, cur_dirs))

651

continue

652

653

# Check the first and last entries, in case they are partial, or if

654

# we don't care about the rest of this page

655

first_entry_num = 0

656

first_fields = entries[0].split('\0')

657

if len(first_fields) < entry_field_count:

658

# We didn't get the complete first entry

659

# so move start, and grab the next, which

660

# should be a full entry

661

start += len(entries[0])+1

662

first_fields = entries[1].split('\0')

663

first_entry_num = 1

664

665

if len(first_fields) <= 1:

666

# We didn't even get a dirname here... what do we do?

667

# Try a large page size and repeat this query

668

page_size *= 2

669

pending.append((low, high, cur_dirs))

670

continue

671

else:

672

# Find what entries we are looking for, which occur before and

673

# after this first record.

674

after = start

675

first_dir = first_fields[1]

676

first_loc = bisect.bisect_left(cur_dirs, first_dir)

677

678

# These exist before the current location

679

pre = cur_dirs[:first_loc]

680

# These occur after the current location, which may be in the

681

# data we read, or might be after the last entry

682

post = cur_dirs[first_loc:]

683

684

if post and len(first_fields) >= entry_field_count:

685

# We have records to look at after the first entry

686

687

# Parse the last entry

688

last_entry_num = len(entries)-1

689

last_fields = entries[last_entry_num].split('\0')

690

if len(last_fields) < entry_field_count:

691

# The very last hunk was not complete,

692

# read the previous hunk

693

after = mid + len(block) - len(entries[-1])

694

last_entry_num -= 1

695

last_fields = entries[last_entry_num].split('\0')

696

else:

697

after = mid + len(block)

698

699

last_dir = last_fields[1]

700

last_loc = bisect.bisect_right(post, last_dir)

701

702

middle_files = post[:last_loc]

703

post = post[last_loc:]

704

705

if middle_files:

706

# We have files that should occur in this block

707

# (>= first, <= last)

708

# Either we will find them here, or we can mark them as

709

# missing.

710

711

if middle_files[0] == first_dir:

712

# We might need to go before this location

713

pre.append(first_dir)

714

if middle_files[-1] == last_dir:

715

post.insert(0, last_dir)

716

717

# Find out what paths we have

718

paths = {first_dir:[first_fields]}

719

# last_dir might == first_dir so we need to be

720

# careful if we should append rather than overwrite

721

if last_entry_num != first_entry_num:

722

paths.setdefault(last_dir, []).append(last_fields)

723

for num in xrange(first_entry_num+1, last_entry_num):

724

# TODO: jam 20070223 We are already splitting here, so

725

# shouldn't we just split the whole thing rather

726

# than doing the split again in add_one_record?

727

fields = entries[num].split('\0')

728

paths.setdefault(fields[1], []).append(fields)

729

730

for cur_dir in middle_files:

731

for fields in paths.get(cur_dir, []):

732

# offset by 1 because of the opening '\0'

733

# consider changing fields_to_entry to avoid the

734

# extra list slice

735

entry = fields_to_entry(fields[1:])

736

found.setdefault(cur_dir, []).append(entry)

737

738

# Now we have split up everything into pre, middle, and post, and

739

# we have handled everything that fell in 'middle'.

740

# We add 'post' first, so that we prefer to seek towards the

741

# beginning, so that we will tend to go as early as we need, and

742

# then only seek forward after that.

743

if post:

744

pending.append((after, high, post))

745

if pre:

746

pending.append((low, start-1, pre))

747

748

return found

749

750

def _bisect_recursive(self, dir_name_list):

751

"""Bisect for entries for all paths and their children.

752

753

This will use bisect to find all records for the supplied paths. It

754

will then continue to bisect for any records which are marked as

755

directories. (and renames?)

756

757

:param paths: A sorted list of (dir, name) pairs

758

eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]

759

:return: A dictionary mapping (dir, name, file_id) => [tree_info]

760

"""

761

# Map from (dir, name, file_id) => [tree_info]

762

found = {}

763

764

found_dir_names = set()

765

766

# Directories that have been read

767

processed_dirs = set()

768

# Get the ball rolling with the first bisect for all entries.

769

newly_found = self._bisect(dir_name_list)

770

771

while newly_found:

772

# Directories that need to be read

773

pending_dirs = set()

774

paths_to_search = set()

775

for entry_list in newly_found.itervalues():

776

for dir_name_id, trees_info in entry_list:

777

found[dir_name_id] = trees_info

778

found_dir_names.add(dir_name_id[:2])

779

is_dir = False

780

for tree_info in trees_info:

781

minikind = tree_info[0]

782

if minikind == 'd':

783

if is_dir:

784

# We already processed this one as a directory,

785

# we don't need to do the extra work again.

786

continue

787

subdir, name, file_id = dir_name_id

788

path = osutils.pathjoin(subdir, name)

789

is_dir = True

790

if path not in processed_dirs:

791

pending_dirs.add(path)

792

elif minikind == 'r':

793

# Rename, we need to directly search the target

794

# which is contained in the fingerprint column

795

dir_name = osutils.split(tree_info[1])

796

if dir_name[0] in pending_dirs:

797

# This entry will be found in the dir search

798

continue

799

# TODO: We need to check if this entry has

800

# already been found. Otherwise we might be

801

# hitting infinite recursion.

802

if dir_name not in found_dir_names:

803

paths_to_search.add(dir_name)

804

# Now we have a list of paths to look for directly, and

805

# directory blocks that need to be read.

806

# newly_found is mixing the keys between (dir, name) and path

807

# entries, but that is okay, because we only really care about the

808

# targets.

809

newly_found = self._bisect(sorted(paths_to_search))

810

newly_found.update(self._bisect_dirblocks(sorted(pending_dirs)))

811

processed_dirs.update(pending_dirs)

812

return found

813

814

def _empty_parent_info(self):

815

return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -

816

len(self._ghosts))

817

818

def _ensure_block(self, parent_block_index, parent_row_index, dirname):

819

"""Ensure a block for dirname exists.

820

821

This function exists to let callers which know that there is a

822

directory dirname ensure that the block for it exists. This block can

823

fail to exist because of demand loading, or because a directory had no

824

children. In either case it is not an error. It is however an error to

825

call this if there is no parent entry for the directory, and thus the

826

function requires the coordinates of such an entry to be provided.

827

828

The root row is special cased and can be indicated with a parent block

829

and row index of -1

830

831

:param parent_block_index: The index of the block in which dirname's row

832

exists.

833

:param parent_row_index: The index in the parent block where the row

834

exists.

835

:param dirname: The utf8 dirname to ensure there is a block for.

836

:return: The index for the block.

837

"""

838

if dirname == '' and parent_row_index == 0 and parent_block_index == 0:

839

# This is the signature of the root row, and the

840

# contents-of-root row is always index 1

841

return 1

842

# the basename of the directory must be the end of its full name.

843

if not (parent_block_index == -1 and

844

parent_block_index == -1 and dirname == ''):

845

assert dirname.endswith(

846

self._dirblocks[parent_block_index][1][parent_row_index][0][1])

847

block_index, present = self._find_block_index_from_key((dirname, '', ''))

848

if not present:

849

## In future, when doing partial parsing, this should load and

850

# populate the entire block.

851

self._dirblocks.insert(block_index, (dirname, []))

852

return block_index

853

854

def _entries_to_current_state(self, new_entries):

855

"""Load new_entries into self.dirblocks.

856

857

Process new_entries into the current state object, making them the active

858

state.

859

860

:param new_entries: A sorted list of entries. This function does not sort

861

to prevent unneeded overhead when callers have a sorted list already.

862

:return: Nothing.

863

"""

864

assert new_entries[0][0][0:2] == ('', ''), \

865

"Missing root row %r" % (new_entries[0][0],)

866

# The two blocks here are deliberate: the root block and the

867

# contents-of-root block.

868

self._dirblocks = [('', []), ('', [])]

869

current_block = self._dirblocks[0][1]

870

current_dirname = ''

871

root_key = ('', '')

872

append_entry = current_block.append

873

for entry in new_entries:

874

if entry[0][0] != current_dirname:

875

# new block - different dirname

876

current_block = []

877

current_dirname = entry[0][0]

878

self._dirblocks.append((current_dirname, current_block))

879

append_entry = current_block.append

880

# append the entry to the current block

881

append_entry(entry)

882

self._split_root_dirblock_into_contents()

883

884

def _split_root_dirblock_into_contents(self):

885

"""Split the root dirblocks into root and contents-of-root.

886

887

After parsing by path, we end up with root entries and contents-of-root

888

entries in the same block. This loop splits them out again.

889

"""

890

# The above loop leaves the "root block" entries mixed with the

891

# "contents-of-root block". But we don't want an if check on

892

# all entries, so instead we just fix it up here.

893

assert self._dirblocks[1] == ('', [])

894

root_block = []

895

contents_of_root_block = []

896

for entry in self._dirblocks[0][1]:

897

if not entry[0][1]: # This is a root entry

898

root_block.append(entry)

899

else:

900

contents_of_root_block.append(entry)

901

self._dirblocks[0] = ('', root_block)

902

self._dirblocks[1] = ('', contents_of_root_block)

903

904

def _entry_to_line(self, entry):

905

"""Serialize entry to a NULL delimited line ready for _get_output_lines.

906

907

:param entry: An entry_tuple as defined in the module docstring.

908

"""

909

entire_entry = list(entry[0])

910

for tree_number, tree_data in enumerate(entry[1]):

911

# (minikind, fingerprint, size, executable, tree_specific_string)

912

entire_entry.extend(tree_data)

913

# 3 for the key, 5 for the fields per tree.

914

tree_offset = 3 + tree_number * 5

915

# minikind

916

entire_entry[tree_offset + 0] = tree_data[0]

917

# size

918

entire_entry[tree_offset + 2] = str(tree_data[2])

919

# executable

920

entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]

921

return '\0'.join(entire_entry)

922

923

def _fields_per_entry(self):

924

"""How many null separated fields should be in each entry row.

925

926

Each line now has an extra '\n' field which is not used

927

so we just skip over it

928

entry size:

929

3 fields for the key

930

+ number of fields per tree_data (5) * tree count

931

+ newline

932

"""

933

tree_count = 1 + self._num_present_parents()

934

return 3 + 5 * tree_count + 1

935

936

def _find_block(self, key, add_if_missing=False):

937

"""Return the block that key should be present in.

938

939

:param key: A dirstate entry key.

940

:return: The block tuple.

941

"""

942

block_index, present = self._find_block_index_from_key(key)

943

if not present:

944

if not add_if_missing:

945

# check to see if key is versioned itself - we might want to

946

# add it anyway, because dirs with no entries dont get a

947

# dirblock at parse time.

948

# This is an uncommon branch to take: most dirs have children,

949

# and most code works with versioned paths.

950

parent_base, parent_name = osutils.split(key[0])

951

if not self._get_block_entry_index(parent_base, parent_name, 0)[3]:

952

# some parent path has not been added - its an error to add

953

# this child

954

raise errors.NotVersionedError(key[0:2], str(self))

955

self._dirblocks.insert(block_index, (key[0], []))

956

return self._dirblocks[block_index]

957

958

def _find_block_index_from_key(self, key):

959

"""Find the dirblock index for a key.

960

961

:return: The block index, True if the block for the key is present.

962

"""

963

if key[0:2] == ('', ''):

964

return 0, True

965

block_index = bisect_dirblock(self._dirblocks, key[0], 1,

966

cache=self._split_path_cache)

967

# _right returns one-past-where-key is so we have to subtract

968

# one to use it. we use _right here because there are two

969

# '' blocks - the root, and the contents of root

970

# we always have a minimum of 2 in self._dirblocks: root and

971

# root-contents, and for '', we get 2 back, so this is

972

# simple and correct:

973

present = (block_index < len(self._dirblocks) and

974

self._dirblocks[block_index][0] == key[0])

975

return block_index, present

976

977

def _find_entry_index(self, key, block):

978

"""Find the entry index for a key in a block.

979

980

:return: The entry index, True if the entry for the key is present.

981

"""

982

entry_index = bisect.bisect_left(block, (key, []))

983

present = (entry_index < len(block) and

984

block[entry_index][0] == key)

985

return entry_index, present

986

987

@staticmethod

988

def from_tree(tree, dir_state_filename):

989

"""Create a dirstate from a bzr Tree.

990

991

:param tree: The tree which should provide parent information and

992

inventory ids.

993

:return: a DirState object which is currently locked for writing.

994

(it was locked by DirState.initialize)

995

"""

996

result = DirState.initialize(dir_state_filename)

997

try:

998

tree.lock_read()

999

try:

1000

parent_ids = tree.get_parent_ids()

1001

num_parents = len(parent_ids)

1002

parent_trees = []

1003

for parent_id in parent_ids:

1004

parent_tree = tree.branch.repository.revision_tree(parent_id)

1005

parent_trees.append((parent_id, parent_tree))

1006

parent_tree.lock_read()

1007

result.set_parent_trees(parent_trees, [])

1008

result.set_state_from_inventory(tree.inventory)

1009

finally:

1010

for revid, parent_tree in parent_trees:

1011

parent_tree.unlock()

1012

tree.unlock()

1013

except:

1014

# The caller won't have a chance to unlock this, so make sure we

1015

# cleanup ourselves

1016

result.unlock()

1017

raise

1018

return result

1019

1020

def get_sha1_for_entry(self, entry, abspath, stat_value=None):

1021

"""Return the sha1 sum for a given file.

1022

1023

:param entry: This is the dirblock entry for the file in question.

1024

:param abspath: The path on disk for this file.

1025

:param stat_value: (optional) if we already have done a stat on the

1026

file, re-use it.

1027

:return: The sha1 hexdigest of the file (40 bytes)

1028

"""

1029

# This code assumes that the entry passed in is directly held in one of

1030

# the internal _dirblocks. So the dirblock state must have already been

1031

# read.

1032

assert self._dirblock_state != DirState.NOT_IN_MEMORY

1033

# TODO: jam 20070301 Because we now allow kind changes (files => dirs)

1034

# we should actually base this check on the stat value, since

1035

# that is the absolute measurement of whether we have a file or

1036

# directory or link. That means that this function might actually

1037

# change an entry from being a file => dir or dir => file, etc.

1038

if entry[1][0][0] != 'f':

1039

return None

1040

if stat_value is None:

1041

stat_value = os.lstat(abspath)

1042

packed_stat = pack_stat(stat_value)

1043

saved_sha1_digest = entry[1][0][1]

1044

saved_file_size = entry[1][0][2]

1045

saved_packed_stat = entry[1][0][4]

1046

if (packed_stat == saved_packed_stat

1047

and saved_sha1_digest != ''

1048

# size should also be in packed_stat

1049

and saved_file_size == stat_value.st_size):

1050

# The stat hasn't changed since we saved, so we can potentially

1051

# re-use the saved sha hash.

1052

cutoff = self._sha_cutoff_time()

1053

if (stat_value.st_mtime < cutoff

1054

and stat_value.st_ctime < cutoff):

1055

# Return the existing fingerprint

1056

return saved_sha1_digest

1057

# If we have gotten this far, that means that we need to actually

1058

# process the file.

1059

new_sha1_digest = self._sha1_file(abspath)

1060

# TODO: jam 20070301 Is it worth checking to see if the new sha is

1061

# actually different? I'm guessing not, because we wouldn't have

1062

# gotten this far otherwise.

1063

entry[1][0] = ('f', new_sha1_digest, stat_value.st_size,

1064

entry[1][0][3], # Executable?

1065

packed_stat

1066

)

1067

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1068

return new_sha1_digest

1069

1070

def _sha_cutoff_time(self):

1071

"""Return cutoff time.

1072

1073

Files modified more recently than this time are at risk of being

1074

undetectably modified and so can't be cached.

1075

"""

1076

return int(time.time()) - 3

1077

1078

def _sha1_file(self, abspath):

1079

"""Calculate the SHA1 of a file by reading the full text"""

1080

f = file(abspath, 'rb', buffering=65000)

1081

try:

1082

return osutils.sha_file(f)

1083

finally:

1084

f.close()

1085

1086

def get_ghosts(self):

1087

"""Return a list of the parent tree revision ids that are ghosts."""

1088

self._read_header_if_needed()

1089

return self._ghosts

1090

1091

def get_lines(self):

1092

"""Serialise the entire dirstate to a sequence of lines."""

1093

if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and

1094

self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):

1095

# read whats on disk.

1096

self._state_file.seek(0)

1097

return self._state_file.readlines()

1098

lines = []

1099

lines.append(self._get_parents_line(self.get_parent_ids()))

1100

lines.append(self._get_ghosts_line(self._ghosts))

1101

# append the root line which is special cased

1102

lines.extend(map(self._entry_to_line, self._iter_entries()))

1103

return self._get_output_lines(lines)

1104

1105

def _get_ghosts_line(self, ghost_ids):

1106

"""Create a line for the state file for ghost information."""

1107

return '\0'.join([str(len(ghost_ids))] + ghost_ids)

1108

1109

def _get_parents_line(self, parent_ids):

1110

"""Create a line for the state file for parents information."""

1111

return '\0'.join([str(len(parent_ids))] + parent_ids)

1112

1113

def _get_fields_to_entry(self):

1114

"""Get a function which converts entry fields into a entry record.

1115

1116

This handles size and executable, as well as parent records.

1117

1118

:return: A function which takes a list of fields, and returns an

1119

appropriate record for storing in memory.

1120

"""

1121

# This is intentionally unrolled for performance

1122

num_present_parents = self._num_present_parents()

1123

if num_present_parents == 0:

1124

def fields_to_entry_0_parents(fields, _int=int):

1125

path_name_file_id_key = (fields[0], fields[1], fields[2])

1126

return (path_name_file_id_key, [

1127

( # Current tree

1128

fields[3], # minikind

1129

fields[4], # fingerprint

1130

_int(fields[5]), # size

1131

fields[6] == 'y', # executable

1132

fields[7], # packed_stat or revision_id

1133

)])

1134

return fields_to_entry_0_parents

1135

elif num_present_parents == 1:

1136

def fields_to_entry_1_parent(fields, _int=int):

1137

path_name_file_id_key = (fields[0], fields[1], fields[2])

1138

return (path_name_file_id_key, [

1139

( # Current tree

1140

fields[3], # minikind

1141

fields[4], # fingerprint

1142

_int(fields[5]), # size

1143

fields[6] == 'y', # executable

1144

fields[7], # packed_stat or revision_id

1145

1146

( # Parent 1

1147

fields[8], # minikind

1148

fields[9], # fingerprint

1149

_int(fields[10]), # size

1150

fields[11] == 'y', # executable

1151

fields[12], # packed_stat or revision_id

1152

1153

])

1154

return fields_to_entry_1_parent

1155

elif num_present_parents == 2:

1156

def fields_to_entry_2_parents(fields, _int=int):

1157

path_name_file_id_key = (fields[0], fields[1], fields[2])

1158

return (path_name_file_id_key, [

1159

( # Current tree

1160

fields[3], # minikind

1161

fields[4], # fingerprint

1162

_int(fields[5]), # size

1163

fields[6] == 'y', # executable

1164

fields[7], # packed_stat or revision_id

1165

1166

( # Parent 1

1167

fields[8], # minikind

1168

fields[9], # fingerprint

1169

_int(fields[10]), # size

1170

fields[11] == 'y', # executable

1171

fields[12], # packed_stat or revision_id

1172

1173

( # Parent 2

1174

fields[13], # minikind

1175

fields[14], # fingerprint

1176

_int(fields[15]), # size

1177

fields[16] == 'y', # executable

1178

fields[17], # packed_stat or revision_id

1179

1180

])

1181

return fields_to_entry_2_parents

1182

else:

1183

def fields_to_entry_n_parents(fields, _int=int):

1184

path_name_file_id_key = (fields[0], fields[1], fields[2])

1185

trees = [(fields[cur], # minikind

1186

fields[cur+1], # fingerprint

1187

_int(fields[cur+2]), # size

1188

fields[cur+3] == 'y', # executable

1189

fields[cur+4], # stat or revision_id

1190

) for cur in xrange(3, len(fields)-1, 5)]

1191

return path_name_file_id_key, trees

1192

return fields_to_entry_n_parents

1193

1194

def get_parent_ids(self):

1195

"""Return a list of the parent tree ids for the directory state."""

1196

self._read_header_if_needed()

1197

return list(self._parents)

1198

1199

def _get_block_entry_index(self, dirname, basename, tree_index):

1200

"""Get the coordinates for a path in the state structure.

1201

1202

:param dirname: The utf8 dirname to lookup.

1203

:param basename: The utf8 basename to lookup.

1204

:param tree_index: The index of the tree for which this lookup should

1205

be attempted.

1206

:return: A tuple describing where the path is located, or should be

1207

inserted. The tuple contains four fields: the block index, the row

1208

index, anda two booleans are True when the directory is present, and

1209

when the entire path is present. There is no guarantee that either

1210

coordinate is currently reachable unless the found field for it is

1211

True. For instance, a directory not present in the searched tree

1212

may be returned with a value one greater than the current highest

1213

block offset. The directory present field will always be True when

1214

the path present field is True. The directory present field does

1215

NOT indicate that the directory is present in the searched tree,

1216

rather it indicates that there are at least some files in some

1217

tree present there.

1218

"""

1219

self._read_dirblocks_if_needed()

1220

key = dirname, basename, ''

1221

block_index, present = self._find_block_index_from_key(key)

1222

if not present:

1223

# no such directory - return the dir index and 0 for the row.

1224

return block_index, 0, False, False

1225

block = self._dirblocks[block_index][1] # access the entries only

1226

entry_index, present = self._find_entry_index(key, block)

1227

# linear search through present entries at this path to find the one

1228

# requested.

1229

while entry_index < len(block) and block[entry_index][0][1] == basename:

1230

if block[entry_index][1][tree_index][0] not in \

1231

('a', 'r'): # absent, relocated

1232

return block_index, entry_index, True, True

1233

entry_index += 1

1234

return block_index, entry_index, True, False

1235

1236

def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None):

1237

"""Get the dirstate entry for path in tree tree_index

1238

1239

If either file_id or path is supplied, it is used as the key to lookup.

1240

If both are supplied, the fastest lookup is used, and an error is

1241

raised if they do not both point at the same row.

1242

1243

:param tree_index: The index of the tree we wish to locate this path

1244

in. If the path is present in that tree, the entry containing its

1245

details is returned, otherwise (None, None) is returned

1246

:param fileid_utf8: A utf8 file_id to look up.

1247

:param path_utf8: An utf8 path to be looked up.

1248

:return: The dirstate entry tuple for path, or (None, None)

1249

"""

1250

self._read_dirblocks_if_needed()

1251

if path_utf8 is not None:

1252

assert path_utf8.__class__ == str, 'path_utf8 is not a str: %s %s' % (type(path_utf8), path_utf8)

1253

# path lookups are faster

1254

dirname, basename = osutils.split(path_utf8)

1255

block_index, entry_index, dir_present, file_present = \

1256

self._get_block_entry_index(dirname, basename, tree_index)

1257

if not file_present:

1258

return None, None

1259

entry = self._dirblocks[block_index][1][entry_index]

1260

assert entry[0][2] and entry[1][tree_index][0] not in ('a', 'r'), 'unversioned entry?!?!'

1261

if fileid_utf8:

1262

if entry[0][2] != fileid_utf8:

1263

raise errors.BzrError('integrity error ? : mismatching'

1264

' tree_index, file_id and path')

1265

return entry

1266

else:

1267

assert fileid_utf8 is not None

1268

possible_keys = self._get_id_index().get(fileid_utf8, None)

1269

if not possible_keys:

1270

return None, None

1271

for key in possible_keys:

1272

block_index, present = \

1273

self._find_block_index_from_key(key)

1274

# strange, probably indicates an out of date

1275

# id index - for now, allow this.

1276

if not present:

1277

continue

1278

# WARNING: DO not change this code to use _get_block_entry_index

1279

# as that function is not suitable: it does not use the key

1280

# to lookup, and thus the wront coordinates are returned.

1281

block = self._dirblocks[block_index][1]

1282

entry_index, present = self._find_entry_index(key, block)

1283

if present:

1284

entry = self._dirblocks[block_index][1][entry_index]

1285

if entry[1][tree_index][0] in 'fdl':

1286

# this is the result we are looking for: the

1287

# real home of this file_id in this tree.

1288

return entry

1289

if entry[1][tree_index][0] == 'a':

1290

# there is no home for this entry in this tree

1291

return None, None

1292

assert entry[1][tree_index][0] == 'r'

1293

real_path = entry[1][tree_index][1]

1294

return self._get_entry(tree_index, fileid_utf8=fileid_utf8,

1295

path_utf8=real_path)

1296

return None, None

1297

1298

@classmethod

1299

def initialize(cls, path):

1300

"""Create a new dirstate on path.

1301

1302

The new dirstate will be an empty tree - that is it has no parents,

1303

and only a root node - which has id ROOT_ID.

1304

1305

The object will be write locked when returned to the caller,

1306

unless there was an exception in the writing, in which case it

1307

will be unlocked.

1308

1309

:param path: The name of the file for the dirstate.

1310

:return: A DirState object.

1311

"""

1312

# This constructs a new DirState object on a path, sets the _state_file

1313

# to a new empty file for that path. It then calls _set_data() with our

1314

# stock empty dirstate information - a root with ROOT_ID, no children,

1315

# and no parents. Finally it calls save() to ensure that this data will

1316

# persist.

1317

result = cls(path)

1318

# root dir and root dir contents with no children.

1319

empty_tree_dirblocks = [('', []), ('', [])]

1320

# a new root directory, with a NULLSTAT.

1321

empty_tree_dirblocks[0][1].append(

1322

(('', '', inventory.ROOT_ID), [

1323

('d', '', 0, False, DirState.NULLSTAT),

1324

]))

1325

result.lock_write()

1326

try:

1327

result._set_data([], empty_tree_dirblocks)

1328

result.save()

1329

except:

1330

result.unlock()

1331

raise

1332

return result

1333

1334

def _inv_entry_to_details(self, inv_entry):

1335

"""Convert an inventory entry (from a revision tree) to state details.

1336

1337

:param inv_entry: An inventory entry whose sha1 and link targets can be

1338

relied upon, and which has a revision set.

1339

:return: A details tuple - the details for a single tree at a path +

1340

id.

1341

"""

1342

kind = inv_entry.kind

1343

minikind = DirState._kind_to_minikind[kind]

1344

tree_data = inv_entry.revision

1345

assert len(tree_data) > 0, 'empty revision for the inv_entry.'

1346

if kind == 'directory':

1347

fingerprint = ''

1348

size = 0

1349

executable = False

1350

elif kind == 'symlink':

1351

fingerprint = inv_entry.symlink_target or ''

1352

size = 0

1353

executable = False

1354

elif kind == 'file':

1355

fingerprint = inv_entry.text_sha1 or ''

1356

size = inv_entry.text_size or 0

1357

executable = inv_entry.executable

1358

else:

1359

raise Exception

1360

return (minikind, fingerprint, size, executable, tree_data)

1361

1362

def _iter_entries(self):

1363

"""Iterate over all the entries in the dirstate.

1364

1365

Each yelt item is an entry in the standard format described in the

1366

docstring of bzrlib.dirstate.

1367

"""

1368

self._read_dirblocks_if_needed()

1369

for directory in self._dirblocks:

1370

for entry in directory[1]:

1371

yield entry

1372

1373

def _get_id_index(self):

1374

"""Get an id index of self._dirblocks."""

1375

if self._id_index is None:

1376

id_index = {}

1377

for key, tree_details in self._iter_entries():

1378

id_index.setdefault(key[2], set()).add(key)

1379

self._id_index = id_index

1380

return self._id_index

1381

1382

def _get_output_lines(self, lines):

1383

"""format lines for final output.

1384

1385

:param lines: A sequece of lines containing the parents list and the

1386

path lines.

1387

"""

1388

output_lines = [DirState.HEADER_FORMAT_3]

1389

lines.append('') # a final newline

1390

inventory_text = '\0\n\0'.join(lines)

1391

output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),))

1392

# -3, 1 for num parents, 1 for ghosts, 1 for final newline

1393

num_entries = len(lines)-3

1394

output_lines.append('num_entries: %s\n' % (num_entries,))

1395

output_lines.append(inventory_text)

1396

return output_lines

1397

1398

def _make_deleted_row(self, fileid_utf8, parents):

1399

"""Return a deleted for for fileid_utf8."""

1400

return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,

1401

''), parents

1402

1403

def _num_present_parents(self):

1404

"""The number of parent entries in each record row."""

1405

return len(self._parents) - len(self._ghosts)

1406

1407

@staticmethod

1408

def on_file(path):

1409

"""Construct a DirState on the file at path path.

1410

1411

:return: An unlocked DirState object, associated with the given path.

1412

"""

1413

result = DirState(path)

1414

return result

1415

1416

def _read_dirblocks_if_needed(self):

1417

"""Read in all the dirblocks from the file if they are not in memory.

1418

1419

This populates self._dirblocks, and sets self._dirblock_state to

1420

IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block

1421

1422

"""

1423

self._read_header_if_needed()

1424

if self._dirblock_state == DirState.NOT_IN_MEMORY:

1425

# move the _state_file pointer to after the header (in case bisect

1426

# has been called in the mean time)

1427

self._state_file.seek(self._end_of_header)

1428

text = self._state_file.read()

1429

# TODO: check the adler checksums. adler_measured = zlib.adler32(text)

1430

1431

fields = text.split('\0')

1432

# Remove the last blank entry

1433

trailing = fields.pop()

1434

assert trailing == ''

1435

# consider turning fields into a tuple.

1436

1437

# skip the first field which is the trailing null from the header.

1438

cur = 1

1439

# Each line now has an extra '\n' field which is not used

1440

# so we just skip over it

1441

# entry size:

1442

# 3 fields for the key

1443

# + number of fields per tree_data (5) * tree count

1444

# + newline

1445

num_present_parents = self._num_present_parents()

1446

tree_count = 1 + num_present_parents

1447

entry_size = self._fields_per_entry()

1448

expected_field_count = entry_size * self._num_entries

1449

if len(fields) - cur > expected_field_count:

1450

fields = fields[:expected_field_count + cur]

1451

trace.mutter('Unexpectedly long dirstate field count!')

1452

print "XXX: incorrectly truncated dirstate file bug triggered."

1453

field_count = len(fields)

1454

# this checks our adjustment, and also catches file too short.

1455

assert field_count - cur == expected_field_count, \

1456

'field count incorrect %s != %s, entry_size=%s, '\

1457

'num_entries=%s fields=%r' % (

1458

field_count - cur, expected_field_count, entry_size,

1459

self._num_entries, fields)

1460

1461

if num_present_parents == 1:

1462

# Bind external functions to local names

1463

_int = int

1464

# We access all fields in order, so we can just iterate over

1465

# them. Grab an straight iterator over the fields. (We use an

1466

# iterator because we don't want to do a lot of additions, nor

1467

# do we want to do a lot of slicing)

1468

next = iter(fields).next

1469

# Move the iterator to the current position

1470

for x in xrange(cur):

1471

next()

1472

# The two blocks here are deliberate: the root block and the

1473

# contents-of-root block.

1474

self._dirblocks = [('', []), ('', [])]

1475

current_block = self._dirblocks[0][1]

1476

current_dirname = ''

1477

append_entry = current_block.append

1478

for count in xrange(self._num_entries):

1479

dirname = next()

1480

name = next()

1481

file_id = next()

1482

if dirname != current_dirname:

1483

# new block - different dirname

1484

current_block = []

1485

current_dirname = dirname

1486

self._dirblocks.append((current_dirname, current_block))

1487

append_entry = current_block.append

1488

# we know current_dirname == dirname, so re-use it to avoid

1489

# creating new strings

1490

entry = ((current_dirname, name, file_id),

1491

[(# Current Tree

1492

next(), # minikind

1493

next(), # fingerprint

1494

_int(next()), # size

1495

next() == 'y', # executable

1496

next(), # packed_stat or revision_id

1497

1498

( # Parent 1

1499

next(), # minikind

1500

next(), # fingerprint

1501

_int(next()), # size

1502

next() == 'y', # executable

1503

next(), # packed_stat or revision_id

1504

1505

])

1506

trailing = next()

1507

assert trailing == '\n'

1508

# append the entry to the current block

1509

append_entry(entry)

1510

self._split_root_dirblock_into_contents()

1511

else:

1512

fields_to_entry = self._get_fields_to_entry()

1513

entries = [fields_to_entry(fields[pos:pos+entry_size])

1514

for pos in xrange(cur, field_count, entry_size)]

1515

self._entries_to_current_state(entries)

1516

# To convert from format 2 => format 3

1517

# self._dirblocks = sorted(self._dirblocks,

1518

# key=lambda blk:blk[0].split('/'))

1519

# To convert from format 3 => format 2

1520

# self._dirblocks = sorted(self._dirblocks)

1521

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

1522

1523

def _read_header(self):

1524

"""This reads in the metadata header, and the parent ids.

1525

1526

After reading in, the file should be positioned at the null

1527

just before the start of the first record in the file.

1528

1529

:return: (expected adler checksum, number of entries, parent list)

1530

"""

1531

self._read_prelude()

1532

parent_line = self._state_file.readline()

1533

info = parent_line.split('\0')

1534

num_parents = int(info[0])

1535

assert num_parents == len(info)-2, 'incorrect parent info line'

1536

self._parents = info[1:-1]

1537

1538

ghost_line = self._state_file.readline()

1539

info = ghost_line.split('\0')

1540

num_ghosts = int(info[1])

1541

assert num_ghosts == len(info)-3, 'incorrect ghost info line'

1542

self._ghosts = info[2:-1]

1543

self._header_state = DirState.IN_MEMORY_UNMODIFIED

1544

self._end_of_header = self._state_file.tell()

1545

1546

def _read_header_if_needed(self):

1547

"""Read the header of the dirstate file if needed."""

1548

# inline this as it will be called a lot

1549

if not self._lock_token:

1550

raise errors.ObjectNotLocked(self)

1551

if self._header_state == DirState.NOT_IN_MEMORY:

1552

self._read_header()

1553

1554

def _read_prelude(self):

1555

"""Read in the prelude header of the dirstate file

1556

1557

This only reads in the stuff that is not connected to the adler

1558

checksum. The position will be correct to read in the rest of

1559

the file and check the checksum after this point.

1560

The next entry in the file should be the number of parents,

1561

and their ids. Followed by a newline.

1562

"""

1563

header = self._state_file.readline()

1564

assert header == DirState.HEADER_FORMAT_3, \

1565

'invalid header line: %r' % (header,)

1566

adler_line = self._state_file.readline()

1567

assert adler_line.startswith('adler32: '), 'missing adler32 checksum'

1568

self.adler_expected = int(adler_line[len('adler32: '):-1])

1569

num_entries_line = self._state_file.readline()

1570

assert num_entries_line.startswith('num_entries: '), 'missing num_entries line'

1571

self._num_entries = int(num_entries_line[len('num_entries: '):-1])

1572

1573

def save(self):

1574

"""Save any pending changes created during this session.

1575

1576

We reuse the existing file, because that prevents race conditions with

1577

file creation, and use oslocks on it to prevent concurrent modification

1578

and reads - because dirstates incremental data aggretation is not

1579

compatible with reading a modified file, and replacing a file in use by

1580

another process is impossible on windows.

1581

1582

A dirstate in read only mode should be smart enough though to validate

1583

that the file has not changed, and otherwise discard its cache and

1584

start over, to allow for fine grained read lock duration, so 'status'

1585

wont block 'commit' - for example.

1586

"""

1587

if (self._header_state == DirState.IN_MEMORY_MODIFIED or

1588

self._dirblock_state == DirState.IN_MEMORY_MODIFIED):

1589

1590

if self._lock_state == 'w':

1591

out_file = self._state_file

1592

wlock = None

1593

else:

1594

# Try to grab a write lock so that we can update the file.

1595

try:

1596

wlock = lock.WriteLock(self._filename)

1597

except (errors.LockError, errors.LockContention), e:

1598

# We couldn't grab the lock, so just leave things dirty in

1599

# memory.

1600

return

1601

except IOError, e:

1602

# This may be a read-only tree, or someone else may have a

1603

# ReadLock. so handle the case when we cannot grab a write

1604

# lock

1605

if e.errno in (errno.ENOENT, errno.EPERM, errno.EACCES,

1606

errno.EAGAIN):

1607

# Ignore these errors and just don't save anything

1608

return

1609

raise

1610

out_file = wlock.f

1611

try:

1612

out_file.seek(0)

1613

out_file.writelines(self.get_lines())

1614

out_file.truncate()

1615

out_file.flush()

1616

self._header_state = DirState.IN_MEMORY_UNMODIFIED

1617

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

1618

finally:

1619

if wlock is not None:

1620

wlock.unlock()

1621

1622

def _set_data(self, parent_ids, dirblocks):

1623

"""Set the full dirstate data in memory.

1624

1625

This is an internal function used to completely replace the objects

1626

in memory state. It puts the dirstate into state 'full-dirty'.

1627

1628

:param parent_ids: A list of parent tree revision ids.

1629

:param dirblocks: A list containing one tuple for each directory in the

1630

tree. Each tuple contains the directory path and a list of entries

1631

found in that directory.

1632

"""

1633

# our memory copy is now authoritative.

1634

self._dirblocks = dirblocks

1635

self._header_state = DirState.IN_MEMORY_MODIFIED

1636

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1637

self._parents = list(parent_ids)

1638

self._id_index = None

1639

1640

def set_path_id(self, path, new_id):

1641

"""Change the id of path to new_id in the current working tree.

1642

1643

:param path: The path inside the tree to set - '' is the root, 'foo'

1644

is the path foo in the root.

1645

:param new_id: The new id to assign to the path. This must be a utf8

1646

file id (not unicode, and not None).

1647

"""

1648

# TODO: start warning here.

1649

assert new_id.__class__ == str

1650

self._read_dirblocks_if_needed()

1651

if len(path):

1652

import pdb;pdb.set_trace()

1653

# logic not written

1654

raise NotImplementedError(self.set_path_id)

1655

# TODO: check new id is unique

1656

entry = self._get_entry(0, path_utf8=path)

1657

if entry[0][2] == new_id:

1658

# Nothing to change.

1659

return

1660

# mark the old path absent, and insert a new root path

1661

self._make_absent(entry)

1662

self.update_minimal(('', '', new_id), 'd',

1663

path_utf8='', packed_stat=entry[1][0][4])

1664

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1665

if self._id_index is not None:

1666

self._id_index.setdefault(new_id, set()).add(entry[0])

1667

1668

def set_parent_trees(self, trees, ghosts):

1669

"""Set the parent trees for the dirstate.

1670

1671

:param trees: A list of revision_id, tree tuples. tree must be provided

1672

even if the revision_id refers to a ghost: supply an empty tree in

1673

this case.

1674

:param ghosts: A list of the revision_ids that are ghosts at the time

1675

of setting.

1676

"""

1677

# TODO: generate a list of parent indexes to preserve to save

1678

# processing specific parent trees. In the common case one tree will

1679

# be preserved - the left most parent.

1680

# TODO: if the parent tree is a dirstate, we might want to walk them

1681

# all by path in parallel for 'optimal' common-case performance.

1682

# generate new root row.

1683

self._read_dirblocks_if_needed()

1684

# TODO future sketch: Examine the existing parents to generate a change

1685

# map and then walk the new parent trees only, mapping them into the

1686

# dirstate. Walk the dirstate at the same time to remove unreferenced

1687

# entries.

1688

# for now:

1689

# sketch: loop over all entries in the dirstate, cherry picking

1690

# entries from the parent trees, if they are not ghost trees.

1691

# after we finish walking the dirstate, all entries not in the dirstate

1692

# are deletes, so we want to append them to the end as per the design

1693

# discussions. So do a set difference on ids with the parents to

1694

# get deletes, and add them to the end.

1695

# During the update process we need to answer the following questions:

1696

# - find other keys containing a fileid in order to create cross-path

1697

# links. We dont't trivially use the inventory from other trees

1698

# because this leads to either double touching, or to accessing

1699

# missing keys,

1700

# - find other keys containing a path

1701

# We accumulate each entry via this dictionary, including the root

1702

by_path = {}

1703

id_index = {}

1704

# we could do parallel iterators, but because file id data may be

1705

# scattered throughout, we dont save on index overhead: we have to look

1706

# at everything anyway. We can probably save cycles by reusing parent

1707

# data and doing an incremental update when adding an additional

1708

# parent, but for now the common cases are adding a new parent (merge),

1709

# and replacing completely (commit), and commit is more common: so

1710

# optimise merge later.

1711

1712

# ---- start generation of full tree mapping data

1713

# what trees should we use?

1714

parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]

1715

# how many trees do we end up with

1716

parent_count = len(parent_trees)

1717

1718

# one: the current tree

1719

for entry in self._iter_entries():

1720

# skip entries not in the current tree

1721

if entry[1][0][0] in ('a', 'r'): # absent, relocated

1722

continue

1723

by_path[entry[0]] = [entry[1][0]] + \

1724

[DirState.NULL_PARENT_DETAILS] * parent_count

1725

id_index[entry[0][2]] = set([entry[0]])

1726

1727

# now the parent trees:

1728

for tree_index, tree in enumerate(parent_trees):

1729

# the index is off by one, adjust it.

1730

tree_index = tree_index + 1

1731

# when we add new locations for a fileid we need these ranges for

1732

# any fileid in this tree as we set the by_path[id] to:

1733

# already_processed_tree_details + new_details + new_location_suffix

1734

# the suffix is from tree_index+1:parent_count+1.

1735

new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)

1736

# now stitch in all the entries from this tree

1737

for path, entry in tree.inventory.iter_entries_by_dir():

1738

# here we process each trees details for each item in the tree.

1739

# we first update any existing entries for the id at other paths,

1740

# then we either create or update the entry for the id at the

1741

# right path, and finally we add (if needed) a mapping from

1742

# file_id to this path. We do it in this order to allow us to

1743

# avoid checking all known paths for the id when generating a

1744

# new entry at this path: by adding the id->path mapping last,

1745

# all the mappings are valid and have correct relocation

1746

# records where needed.

1747

file_id = entry.file_id

1748

path_utf8 = path.encode('utf8')

1749

dirname, basename = osutils.split(path_utf8)

1750

new_entry_key = (dirname, basename, file_id)

1751

# tree index consistency: All other paths for this id in this tree

1752

# index must point to the correct path.

1753

for entry_key in id_index.setdefault(file_id, set()):

1754

# TODO:PROFILING: It might be faster to just update

1755

# rather than checking if we need to, and then overwrite

1756

# the one we are located at.

1757

if entry_key != new_entry_key:

1758

# this file id is at a different path in one of the

1759

# other trees, so put absent pointers there

1760

# This is the vertical axis in the matrix, all pointing

1761

# tot he real path.

1762

by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')

1763

# by path consistency: Insert into an existing path record (trivial), or

1764

# add a new one with relocation pointers for the other tree indexes.

1765

if new_entry_key in id_index[file_id]:

1766

# there is already an entry where this data belongs, just insert it.

1767

by_path[new_entry_key][tree_index] = \

1768

self._inv_entry_to_details(entry)

1769

else:

1770

# add relocated entries to the horizontal axis - this row

1771

# mapping from path,id. We need to look up the correct path

1772

# for the indexes from 0 to tree_index -1

1773

new_details = []

1774

for lookup_index in xrange(tree_index):

1775

# boundary case: this is the first occurence of file_id

1776

# so there are no id_indexs, possibly take this out of

1777

# the loop?

1778

if not len(id_index[file_id]):

1779

new_details.append(DirState.NULL_PARENT_DETAILS)

1780

else:

1781

# grab any one entry, use it to find the right path.

1782

# TODO: optimise this to reduce memory use in highly

1783

# fragmented situations by reusing the relocation

1784

# records.

1785

a_key = iter(id_index[file_id]).next()

1786

if by_path[a_key][lookup_index][0] in ('r', 'a'):

1787

# its a pointer or missing statement, use it as is.

1788

new_details.append(by_path[a_key][lookup_index])

1789

else:

1790

# we have the right key, make a pointer to it.

1791

real_path = ('/'.join(a_key[0:2])).strip('/')

1792

new_details.append(('r', real_path, 0, False, ''))

1793

new_details.append(self._inv_entry_to_details(entry))

1794

new_details.extend(new_location_suffix)

1795

by_path[new_entry_key] = new_details

1796

id_index[file_id].add(new_entry_key)

1797

# --- end generation of full tree mappings

1798

1799

# sort and output all the entries

1800

new_entries = sorted(by_path.items(),

1801

key=lambda entry:(entry[0][0].split('/'), entry[0][1]))

1802

self._entries_to_current_state(new_entries)

1803

self._parents = [rev_id for rev_id, tree in trees]

1804

self._ghosts = list(ghosts)

1805

self._header_state = DirState.IN_MEMORY_MODIFIED

1806

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1807

self._id_index = id_index

1808

1809

def set_state_from_inventory(self, new_inv):

1810

"""Set new_inv as the current state.

1811

1812

This API is called by tree transform, and will usually occur with

1813

existing parent trees.

1814

1815

:param new_inv: The inventory object to set current state from.

1816

"""

1817

self._read_dirblocks_if_needed()

1818

# sketch:

1819

# incremental algorithm:

1820

# two iterators: current data and new data, both in dirblock order.

1821

new_iterator = new_inv.iter_entries_by_dir()

1822

# we will be modifying the dirstate, so we need a stable iterator. In

1823

# future we might write one, for now we just clone the state into a

1824

# list - which is a shallow copy, so each

1825

old_iterator = iter(list(self._iter_entries()))

1826

# both must have roots so this is safe:

1827

current_new = new_iterator.next()

1828

current_old = old_iterator.next()

1829

def advance(iterator):

1830

try:

1831

return iterator.next()

1832

except StopIteration:

1833

return None

1834

while current_new or current_old:

1835

# skip entries in old that are not really there

1836

if current_old and current_old[1][0][0] in ('r', 'a'):

1837

# relocated or absent

1838

current_old = advance(old_iterator)

1839

continue

1840

if current_new:

1841

# convert new into dirblock style

1842

new_path_utf8 = current_new[0].encode('utf8')

1843

new_dirname, new_basename = osutils.split(new_path_utf8)

1844

new_id = current_new[1].file_id

1845

new_entry_key = (new_dirname, new_basename, new_id)

1846

current_new_minikind = \

1847

DirState._kind_to_minikind[current_new[1].kind]

1848

else:

1849

# for safety disable variables

1850

new_path_utf8 = new_dirname = new_basename = new_id = new_entry_key = None

1851

# 5 cases, we dont have a value that is strictly greater than everything, so

1852

# we make both end conditions explicit

1853

if not current_old:

1854

# old is finished: insert current_new into the state.

1855

self.update_minimal(new_entry_key, current_new_minikind,

1856

executable=current_new[1].executable,

1857

path_utf8=new_path_utf8)

1858

current_new = advance(new_iterator)

1859

elif not current_new:

1860

# new is finished

1861

self._make_absent(current_old)

1862

current_old = advance(old_iterator)

1863

elif new_entry_key == current_old[0]:

1864

# same - common case

1865

# TODO: update the record if anything significant has changed.

1866

# the minimal required trigger is if the execute bit or cached

1867

# kind has changed.

1868

if (current_old[1][0][3] != current_new[1].executable or

1869

current_old[1][0][0] != current_new_minikind):

1870

self.update_minimal(current_old[0], current_new_minikind,

1871

executable=current_new[1].executable,

1872

path_utf8=new_path_utf8)

1873

# both sides are dealt with, move on

1874

current_old = advance(old_iterator)

1875

current_new = advance(new_iterator)

1876

elif new_entry_key < current_old[0]:

1877

# new comes before:

1878

# add a entry for this and advance new

1879

self.update_minimal(new_entry_key, current_new_minikind,

1880

executable=current_new[1].executable,

1881

path_utf8=new_path_utf8)

1882

current_new = advance(new_iterator)

1883

else:

1884

# old comes before:

1885

self._make_absent(current_old)

1886

current_old = advance(old_iterator)

1887

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1888

self._id_index = None

1889

1890

def _make_absent(self, current_old):

1891

"""Mark current_old - an entry - as absent for tree 0.

1892

1893

:return: True if this was the last details entry for they entry key:

1894

that is, if the underlying block has had the entry removed, thus

1895

shrinking in length.

1896

"""

1897

# build up paths that this id will be left at after the change is made,

1898

# so we can update their cross references in tree 0

1899

all_remaining_keys = set()

1900

# Dont check the working tree, because its going.

1901

for details in current_old[1][1:]:

1902

if details[0] not in ('a', 'r'): # absent, relocated

1903

all_remaining_keys.add(current_old[0])

1904

elif details[0] == 'r': # relocated

1905

# record the key for the real path.

1906

all_remaining_keys.add(tuple(osutils.split(details[1])) + (current_old[0][2],))

1907

# absent rows are not present at any path.

1908

last_reference = current_old[0] not in all_remaining_keys

1909

if last_reference:

1910

# the current row consists entire of the current item (being marked

1911

# absent), and relocated or absent entries for the other trees:

1912

# Remove it, its meaningless.

1913

block = self._find_block(current_old[0])

1914

entry_index, present = self._find_entry_index(current_old[0], block[1])

1915

assert present, 'could not find entry for %s' % (current_old,)

1916

block[1].pop(entry_index)

1917

# if we have an id_index in use, remove this key from it for this id.

1918

if self._id_index is not None:

1919

self._id_index[current_old[0][2]].remove(current_old[0])

1920

# update all remaining keys for this id to record it as absent. The

1921

# existing details may either be the record we are making as deleted

1922

# (if there were other trees with the id present at this path), or may

1923

# be relocations.

1924

for update_key in all_remaining_keys:

1925

update_block_index, present = \

1926

self._find_block_index_from_key(update_key)

1927

assert present, 'could not find block for %s' % (update_key,)

1928

update_entry_index, present = \

1929

self._find_entry_index(update_key, self._dirblocks[update_block_index][1])

1930

assert present, 'could not find entry for %s' % (update_key,)

1931

update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]

1932

# it must not be absent at the moment

1933

assert update_tree_details[0][0] != 'a' # absent

1934

update_tree_details[0] = DirState.NULL_PARENT_DETAILS

1935

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1936

return last_reference

1937

1938

def update_minimal(self, key, minikind, executable=False, fingerprint='',

1939

packed_stat=None, size=0, path_utf8=None):

1940

"""Update an entry to the state in tree 0.

1941

1942

This will either create a new entry at 'key' or update an existing one.

1943

It also makes sure that any other records which might mention this are

1944

updated as well.

1945

1946

:param key: (dir, name, file_id) for the new entry

1947

:param minikind: The type for the entry ('f' == 'file', 'd' ==

1948

'directory'), etc.

1949

:param executable: Should the executable bit be set?

1950

:param fingerprint: Simple fingerprint for new entry.

1951

:param packed_stat: packed stat value for new entry.

1952

:param size: Size information for new entry

1953

:param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing

1954

extra computation.

1955

"""

1956

block = self._find_block(key)[1]

1957

if packed_stat is None:

1958

packed_stat = DirState.NULLSTAT

1959

entry_index, present = self._find_entry_index(key, block)

1960

new_details = (minikind, fingerprint, size, executable, packed_stat)

1961

id_index = self._get_id_index()

1962

if not present:

1963

# new entry, synthesis cross reference here,

1964

existing_keys = id_index.setdefault(key[2], set())

1965

if not existing_keys:

1966

# not currently in the state, simplest case

1967

new_entry = key, [new_details] + self._empty_parent_info()

1968

else:

1969

# present at one or more existing other paths.

1970

# grab one of them and use it to generate parent

1971

# relocation/absent entries.

1972

new_entry = key, [new_details]

1973

for other_key in existing_keys:

1974

# change the record at other to be a pointer to this new

1975

# record. The loop looks similar to the change to

1976

# relocations when updating an existing record but its not:

1977

# the test for existing kinds is different: this can be

1978

# factored out to a helper though.

1979

other_block_index, present = self._find_block_index_from_key(other_key)

1980

if not present:

1981

import pdb; pdb.set_trace()

1982

assert present, 'could not find block for %s' % (other_key,)

1983

other_entry_index, present = self._find_entry_index(other_key,

1984

self._dirblocks[other_block_index][1])

1985

if not present:

1986

import pdb; pdb.set_trace()

1987

assert present, 'could not find entry for %s' % (other_key,)

1988

assert path_utf8 is not None

1989

self._dirblocks[other_block_index][1][other_entry_index][1][0] = \

1990

('r', path_utf8, 0, False, '')

1991

1992

num_present_parents = self._num_present_parents()

1993

for lookup_index in xrange(1, num_present_parents + 1):

1994

# grab any one entry, use it to find the right path.

1995

# TODO: optimise this to reduce memory use in highly

1996

# fragmented situations by reusing the relocation

1997

# records.

1998

update_block_index, present = \

1999

self._find_block_index_from_key(other_key)

2000

assert present, 'could not find block for %s' % (other_key,)

2001

update_entry_index, present = \

2002

self._find_entry_index(other_key, self._dirblocks[update_block_index][1])

2003

assert present, 'could not find entry for %s' % (other_key,)

2004

update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]

2005

if update_details[0] in ('r', 'a'): # relocated, absent

2006

# its a pointer or absent in lookup_index's tree, use

2007

# it as is.

2008

new_entry[1].append(update_details)

2009

else:

2010

# we have the right key, make a pointer to it.

2011

pointer_path = osutils.pathjoin(*other_key[0:2])

2012

new_entry[1].append(('r', pointer_path, 0, False, ''))

2013

block.insert(entry_index, new_entry)

2014

existing_keys.add(key)

2015

else:

2016

# Does the new state matter?

2017

block[entry_index][1][0] = new_details

2018

# parents cannot be affected by what we do.

2019

# other occurences of this id can be found

2020

# from the id index.

2021

# ---

2022

# tree index consistency: All other paths for this id in this tree

2023

# index must point to the correct path. We have to loop here because

2024

# we may have passed entries in the state with this file id already

2025

# that were absent - where parent entries are - and they need to be

2026

# converted to relocated.

2027

assert path_utf8 is not None

2028

for entry_key in id_index.setdefault(key[2], set()):

2029

# TODO:PROFILING: It might be faster to just update

2030

# rather than checking if we need to, and then overwrite

2031

# the one we are located at.

2032

if entry_key != key:

2033

# this file id is at a different path in one of the

2034

# other trees, so put absent pointers there

2035

# This is the vertical axis in the matrix, all pointing

2036

# to the real path.

2037

block_index, present = self._find_block_index_from_key(entry_key)

2038

assert present

2039

entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])

2040

assert present

2041

self._dirblocks[block_index][1][entry_index][1][0] = \

2042

('r', path_utf8, 0, False, '')

2043

# add a containing dirblock if needed.

2044

if new_details[0] == 'd':

2045

subdir_key = (osutils.pathjoin(*key[0:2]), '', '')

2046

block_index, present = self._find_block_index_from_key(subdir_key)

2047

if not present:

2048

self._dirblocks.insert(block_index, (subdir_key[0], []))

2049

2050

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2051

2052

def _validate(self):

2053

"""Check that invariants on the dirblock are correct.

2054

2055

This can be useful in debugging; it shouldn't be necessary in

2056

normal code.

2057

"""

2058

from pprint import pformat

2059

if len(self._dirblocks) > 0:

2060

assert self._dirblocks[0][0] == '', \

2061

"dirblocks don't start with root block:\n" + \

2062

pformat(dirblocks)

2063

if len(self._dirblocks) > 1:

2064

assert self._dirblocks[1][0] == '', \

2065

"dirblocks missing root directory:\n" + \

2066

pformat(dirblocks)

2067

assert self._dirblocks[1:] == sorted(self._dirblocks[1:]), \

2068

"dirblocks are not in sorted order:\n" + \

2069

pformat(self._dirblocks)

2070

for dirblock in self._dirblocks:

2071

assert dirblock[1] == sorted(dirblock[1]), \

2072

"dirblock for %r is not sorted:\n%s" % \

2073

(dirblock[0], pformat(dirblock))

2074

2075

def _wipe_state(self):

2076

"""Forget all state information about the dirstate."""

2077

self._header_state = DirState.NOT_IN_MEMORY

2078

self._dirblock_state = DirState.NOT_IN_MEMORY

2079

self._parents = []

2080

self._ghosts = []

2081

self._dirblocks = []

2082

2083

def lock_read(self):

2084

"""Acquire a read lock on the dirstate"""

2085

if self._lock_token is not None:

2086

raise errors.LockContention(self._lock_token)

2087

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2088

# already in memory, we could read just the header and check for

2089

# any modification. If not modified, we can just leave things

2090

# alone

2091

self._lock_token = lock.ReadLock(self._filename)

2092

self._lock_state = 'r'

2093

self._state_file = self._lock_token.f

2094

self._wipe_state()

2095

2096

def lock_write(self):

2097

"""Acquire a write lock on the dirstate"""

2098

if self._lock_token is not None:

2099

raise errors.LockContention(self._lock_token)

2100

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2101

# already in memory, we could read just the header and check for

2102

# any modification. If not modified, we can just leave things

2103

# alone

2104

self._lock_token = lock.WriteLock(self._filename)

2105

self._lock_state = 'w'

2106

self._state_file = self._lock_token.f

2107

self._wipe_state()

2108

2109

def unlock(self):

2110

"""Drop any locks held on the dirstate"""

2111

if self._lock_token is None:

2112

raise errors.LockNotHeld(self)

2113

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2114

# already in memory, we could read just the header and check for

2115

# any modification. If not modified, we can just leave things

2116

# alone

2117

self._state_file = None

2118

self._lock_state = None

2119

self._lock_token.unlock()

2120

self._lock_token = None

2121

self._split_path_cache = {}

2122

2123

def _requires_lock(self):

2124

"""Checks that a lock is currently held by someone on the dirstate"""

2125

if not self._lock_token:

2126

raise errors.ObjectNotLocked(self)

2127

2128

2129

def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):

2130

"""Return the index where to insert dirname into the dirblocks.

2131

2132

The return value idx is such that all directories blocks in dirblock[:idx]

2133

have names < dirname, and all blocks in dirblock[idx:] have names >=

2134

dirname.

2135

2136

Optional args lo (default 0) and hi (default len(dirblocks)) bound the

2137

slice of a to be searched.

2138

"""

2139

if hi is None:

2140

hi = len(dirblocks)

2141

try:

2142

dirname_split = cache[dirname]

2143

except KeyError:

2144

dirname_split = dirname.split('/')

2145

cache[dirname] = dirname_split

2146

while lo < hi:

2147

mid = (lo+hi)//2

2148

# Grab the dirname for the current dirblock

2149

cur = dirblocks[mid][0]

2150

try:

2151

cur_split = cache[cur]

2152

except KeyError:

2153

cur_split = cur.split('/')

2154

cache[cur] = cur_split

2155

if cur_split < dirname_split: lo = mid+1

2156

else: hi = mid

2157

return lo

2158

2159

2160

_base64_encoder = codecs.getencoder('base64')

2161

2162

2163

def pack_stat(st, _encode=_base64_encoder, _pack=struct.pack):

2164

"""Convert stat values into a packed representation."""

2165

# jam 20060614 it isn't really worth removing more entries if we

2166

# are going to leave it in packed form.

2167

# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms

2168

# With all entries filesize is 5.9M and read time is mabye 280ms

2169

# well within the noise margin

2170

2171

# base64.encode always adds a final newline, so strip it off

2172

return _encode(_pack('>llllll'

2173

, st.st_size, int(st.st_mtime), int(st.st_ctime)

2174

, st.st_dev, st.st_ino, st.st_mode))[0][:-1]

Older »