/brz/remove-bazaar : revision 2425.1.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2007-04-19 02:27:44 UTC
mto: This revision was merged to the branch mainline in revision 2426.
Revision ID: robertc@robertcollins.net-20070419022744-pfdqz42kp1wizh43

``make docs`` now creates a man page at ``man1/bzr.1`` fixing bug 107388.
(Robert Collins)

files added:
build-api

bzr.ico

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bundle

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/export

bzrlib/export/dir_exporter.py

bzrlib/graph.py

bzrlib/inspect_for_copy.py

bzrlib/intset.py

bzrlib/patiencediff.py

bzrlib/plugins/__init__.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_api.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_graph.py

bzrlib/textui.py

bzrlib/transport/ftp.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/weave_commands.py

bzrlib/xml6.py

contrib/add-bzr-to-baz

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/index.txt

tools/biobench.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

files removed:
.coveragerc

.github

.github/workflows

.github/workflows/pythonpackage.yml

.gitignore

.mailmap

.testr.conf

CODE_OF_CONDUCT.md

MANIFEST.in

NEWS

README_BDIST_RPM

apport

apport/README

apport/brz-crashdb.conf

apport/source_brz.py

breezy/__main__.py

breezy/_annotator_py.py

breezy/_annotator_pyx.pyx

breezy/_bencode_pyx.h

breezy/_bencode_pyx.pyx

breezy/_chunks_to_lines_py.py

breezy/_chunks_to_lines_pyx.pyx

breezy/_export_c_api.h

breezy/_import_c_api.h

breezy/_known_graph_py.py

breezy/_known_graph_pyx.pyx

breezy/_readdir_py.py

breezy/_readdir_pyx.pyx

breezy/_rio_py.py

breezy/_rio_pyx.pyx

breezy/_simple_set_pyx.pxd

breezy/_simple_set_pyx.pyx

breezy/_static_tuple_c.c

breezy/_static_tuple_c.h

breezy/_static_tuple_c.pxd

breezy/_static_tuple_py.py

breezy/_termcolor.py

breezy/_walkdirs_win32.pyx

breezy/archive

breezy/archive/__init__.py

breezy/bedding.py

breezy/bencode.py

breezy/bisect.py

breezy/bisect_multi.py

breezy/branchbuilder.py

breezy/breakin.py

breezy/bugtracker.py

breezy/bzr

breezy/bzr/__init__.py

breezy/bzr/_btree_serializer_py.py

breezy/bzr/_btree_serializer_pyx.pyx

breezy/bzr/_chk_map_py.py

breezy/bzr/_chk_map_pyx.pyx

breezy/bzr/_dirstate_helpers_py.py

breezy/bzr/_dirstate_helpers_pyx.h

breezy/bzr/_dirstate_helpers_pyx.pyx

breezy/bzr/_groupcompress_py.py

breezy/bzr/_groupcompress_pyx.pyx

breezy/bzr/_knit_load_data_py.py

breezy/bzr/_knit_load_data_pyx.pyx

breezy/bzr/_str_helpers.pxd

breezy/bzr/branch.py

breezy/bzr/btree_index.py

breezy/bzr/bundle

breezy/bzr/bundle/serializer/v4.py

breezy/bzr/check.py

breezy/bzr/chk_map.py

breezy/bzr/chk_serializer.py

breezy/bzr/debug_commands.py

breezy/bzr/delta.h

breezy/bzr/diff-delta.c

breezy/bzr/fullhistory.py

breezy/bzr/groupcompress.py

breezy/bzr/groupcompress_repo.py

breezy/bzr/index.py

breezy/bzr/inventory_delta.py

breezy/bzr/inventorytree.py

breezy/bzr/knitpack_repo.py

breezy/bzr/pack.py

breezy/bzr/pack_repo.py

breezy/bzr/reconcile.py

breezy/bzr/remote.py

breezy/bzr/repository.py

breezy/bzr/serializer.py

breezy/bzr/smart/branch.py

breezy/bzr/smart/bzrdir.py

breezy/bzr/smart/message.py

breezy/bzr/smart/packrepository.py

breezy/bzr/smart/ping.py

breezy/bzr/smart/repository.py

breezy/bzr/smart/signals.py

breezy/bzr/tag.py

breezy/bzr/tests

breezy/bzr/tests/__init__.py

breezy/bzr/tests/blackbox

breezy/bzr/tests/blackbox/__init__.py

breezy/bzr/tests/blackbox/test_dump_btree.py

breezy/bzr/tests/matchers.py

breezy/bzr/tests/per_bzrdir

breezy/bzr/tests/per_bzrdir/__init__.py

breezy/bzr/tests/per_bzrdir/test_bzrdir.py

breezy/bzr/tests/per_inventory

breezy/bzr/tests/per_inventory/__init__.py

breezy/bzr/tests/per_inventory/basics.py

breezy/bzr/tests/per_pack_repository.py

breezy/bzr/tests/per_repository_chk

breezy/bzr/tests/per_repository_chk/__init__.py

breezy/bzr/tests/per_repository_chk/test_supported.py

breezy/bzr/tests/per_repository_chk/test_unsupported.py

breezy/bzr/tests/per_repository_vf

breezy/bzr/tests/per_repository_vf/__init__.py

breezy/bzr/tests/per_repository_vf/helpers.py

breezy/bzr/tests/per_repository_vf/test__generate_text_key_index.py

breezy/bzr/tests/per_repository_vf/test_add_inventory_by_delta.py

breezy/bzr/tests/per_repository_vf/test_check.py

breezy/bzr/tests/per_repository_vf/test_check_reconcile.py

breezy/bzr/tests/per_repository_vf/test_fetch.py

breezy/bzr/tests/per_repository_vf/test_fileid_involved.py

breezy/bzr/tests/per_repository_vf/test_find_text_key_references.py

breezy/bzr/tests/per_repository_vf/test_merge_directive.py

breezy/bzr/tests/per_repository_vf/test_reconcile.py

breezy/bzr/tests/per_repository_vf/test_refresh_data.py

breezy/bzr/tests/per_repository_vf/test_repository.py

breezy/bzr/tests/per_repository_vf/test_write_group.py

breezy/bzr/tests/test__btree_serializer.py

breezy/bzr/tests/test__chk_map.py

breezy/bzr/tests/test__dirstate_helpers.py

breezy/bzr/tests/test__groupcompress.py

breezy/bzr/tests/test_btree_index.py

breezy/bzr/tests/test_chk_map.py

breezy/bzr/tests/test_chk_serializer.py

breezy/bzr/tests/test_groupcompress.py

breezy/bzr/tests/test_index.py

breezy/bzr/tests/test_inventory_delta.py

breezy/bzr/tests/test_matchers.py

breezy/bzr/tests/test_pack.py

breezy/bzr/tests/test_remote.py

breezy/bzr/tests/test_serializer.py

breezy/bzr/tests/test_smart.py

breezy/bzr/tests/test_smart_request.py

breezy/bzr/tests/test_smart_signals.py

breezy/bzr/tests/test_tag.py

breezy/bzr/tests/test_transform.py

breezy/bzr/tests/test_versionedfile.py

breezy/bzr/tests/test_vf_search.py

breezy/bzr/tests/test_vfs_ratchet.py

breezy/bzr/tests/test_workingtree.py

breezy/bzr/transform.py

breezy/bzr/vf_repository.py

breezy/bzr/vf_search.py

breezy/bzr/workingtree.py

breezy/bzr/workingtree_3.py

breezy/bzr/xml5.py

breezy/bzr/xml6.py

breezy/bzr_distutils.py

breezy/cethread.py

breezy/chunk_writer.py

breezy/clean_tree.py

breezy/cmd_test_script.py

breezy/cmdline.py

breezy/colordiff.py

breezy/controldir.py

breezy/counted_lock.py

breezy/crash.py

breezy/directory_service.py

breezy/dirty_tracker.py

breezy/doc_generate/conf.py

breezy/email_message.py

breezy/estimate_compressed_size.py

breezy/export_pot.py

breezy/fetch_ghosts.py

breezy/fifo_cache.py

breezy/filter_tree.py

breezy/filters

breezy/filters/__init__.py

breezy/filters/eol.py

breezy/foreign.py

breezy/git

breezy/git/.testr.conf

breezy/git/TODO

breezy/git/__init__.py

breezy/git/annotate.py

breezy/git/branch.py

breezy/git/bzr-receive-pack

breezy/git/bzr-upload-pack

breezy/git/cache.py

breezy/git/commands.py

breezy/git/commit.py

breezy/git/config.py

breezy/git/dir.py

breezy/git/directory.py

breezy/git/errors.py

breezy/git/fetch.py

breezy/git/filegraph.py

breezy/git/git-remote-bzr

breezy/git/git-remote-bzr.1

breezy/git/git_remote_helper.py

breezy/git/help.py

breezy/git/hg.py

breezy/git/interrepo.py

breezy/git/mapping.py

breezy/git/memorytree.py

breezy/git/notes

breezy/git/notes/git-serve.txt

breezy/git/notes/mapping.txt

breezy/git/notes/roundtripping.txt

breezy/git/object_store.py

breezy/git/pristine_tar.py

breezy/git/push.py

breezy/git/refs.py

breezy/git/remote.py

breezy/git/repository.py

breezy/git/revspec.py

breezy/git/roundtrip.py

breezy/git/send.py

breezy/git/server.py

breezy/git/tests

breezy/git/tests/__init__.py

breezy/git/tests/test_blackbox.py

breezy/git/tests/test_branch.py

breezy/git/tests/test_builder.py

breezy/git/tests/test_cache.py

breezy/git/tests/test_dir.py

breezy/git/tests/test_fetch.py

breezy/git/tests/test_git_remote_helper.py

breezy/git/tests/test_mapping.py

breezy/git/tests/test_memorytree.py

breezy/git/tests/test_object_store.py

breezy/git/tests/test_pristine_tar.py

breezy/git/tests/test_push.py

breezy/git/tests/test_refs.py

breezy/git/tests/test_remote.py

breezy/git/tests/test_repository.py

breezy/git/tests/test_revspec.py

breezy/git/tests/test_roundtrip.py

breezy/git/tests/test_server.py

breezy/git/tests/test_transportgit.py

breezy/git/tests/test_tree.py

breezy/git/tests/test_unpeel_map.py

breezy/git/tests/test_urls.py

breezy/git/tests/test_workingtree.py

breezy/git/transform.py

breezy/git/transportgit.py

breezy/git/tree.py

breezy/git/unpeel_map.py

breezy/git/urls.py

breezy/git/workingtree.py

breezy/graph.py

breezy/grep.py

breezy/help_topics

breezy/help_topics/en

breezy/help_topics/en/authentication.txt

breezy/help_topics/en/conflict-types.txt

breezy/help_topics/en/content-filters.txt

breezy/help_topics/en/debug-flags.txt

breezy/help_topics/en/diverged-branches.txt

breezy/help_topics/en/eol.txt

breezy/help_topics/en/glossary.txt

breezy/help_topics/en/log-formats.txt

breezy/help_topics/en/missing-extensions.txt

breezy/help_topics/en/patterns.txt

breezy/help_topics/en/rules.txt

breezy/help_topics/en/url-special-chars.txt

breezy/help_topics/es

breezy/help_topics/es/conflict-types.txt

breezy/i18n.py

breezy/library_state.py

breezy/location.py

breezy/lru_cache.py

breezy/mail_client.py

breezy/memorybranch.py

breezy/mergeable.py

breezy/mergetools.py

breezy/multiparent.py

breezy/multiwalker.py

breezy/plugins/__init__.py

breezy/plugins/bash_completion

breezy/plugins/bash_completion/README.txt

breezy/plugins/bash_completion/__init__.py

breezy/plugins/bash_completion/bashcomp.py

breezy/plugins/bash_completion/tests

breezy/plugins/bash_completion/tests/__init__.py

breezy/plugins/bash_completion/tests/test_bashcomp.py

breezy/plugins/changelog_merge

breezy/plugins/changelog_merge/__init__.py

breezy/plugins/changelog_merge/changelog_merge.py

breezy/plugins/changelog_merge/tests

breezy/plugins/changelog_merge/tests/__init__.py

breezy/plugins/changelog_merge/tests/test_changelog_merge.py

breezy/plugins/commitfromnews

breezy/plugins/commitfromnews/__init__.py

breezy/plugins/commitfromnews/committemplate.py

breezy/plugins/commitfromnews/tests

breezy/plugins/commitfromnews/tests/__init__.py

breezy/plugins/commitfromnews/tests/test_committemplate.py

breezy/plugins/commitfromnews/tests/test_msgeditor.py

breezy/plugins/cvs

breezy/plugins/cvs/__init__.py

breezy/plugins/darcs

breezy/plugins/darcs/__init__.py

breezy/plugins/email

breezy/plugins/email/__init__.py

breezy/plugins/email/emailer.py

breezy/plugins/email/tests

breezy/plugins/email/tests/__init__.py

breezy/plugins/email/tests/testemail.py

breezy/plugins/fastimport

breezy/plugins/fastimport/NEWS

breezy/plugins/fastimport/__init__.py

breezy/plugins/fastimport/branch_mapper.py

breezy/plugins/fastimport/branch_updater.py

breezy/plugins/fastimport/bzr_commit_handler.py

breezy/plugins/fastimport/cache_manager.py

breezy/plugins/fastimport/cmds.py

breezy/plugins/fastimport/doc

breezy/plugins/fastimport/doc/notes.txt

breezy/plugins/fastimport/exporter.py

breezy/plugins/fastimport/helpers.py

breezy/plugins/fastimport/idmapfile.py

breezy/plugins/fastimport/marks_file.py

breezy/plugins/fastimport/processors

breezy/plugins/fastimport/processors/__init__.py

breezy/plugins/fastimport/processors/generic_processor.py

breezy/plugins/fastimport/revision_store.py

breezy/plugins/fastimport/tests

breezy/plugins/fastimport/tests/__init__.py

breezy/plugins/fastimport/tests/test_branch_mapper.py

breezy/plugins/fastimport/tests/test_commands.py

breezy/plugins/fastimport/tests/test_exporter.py

breezy/plugins/fastimport/tests/test_generic_processor.py

breezy/plugins/fastimport/tests/test_head_tracking.py

breezy/plugins/fastimport/tests/test_marks_file.py

breezy/plugins/fastimport/tests/test_revision_store.py

breezy/plugins/fastimport/user_mapper.py

breezy/plugins/flake8

breezy/plugins/flake8/__init__.py

breezy/plugins/fossil

breezy/plugins/fossil/__init__.py

breezy/plugins/github

breezy/plugins/github/__init__.py

breezy/plugins/github/cmds.py

breezy/plugins/github/hoster.py

breezy/plugins/github/tests

breezy/plugins/github/tests/__init__.py

breezy/plugins/gitlab

breezy/plugins/gitlab/__init__.py

breezy/plugins/gitlab/cmds.py

breezy/plugins/gitlab/hoster.py

breezy/plugins/gitlab/tests

breezy/plugins/gitlab/tests/__init__.py

breezy/plugins/gitlab/tests/test_hoster.py

breezy/plugins/hg

breezy/plugins/hg/__init__.py

breezy/plugins/launchpad/account.py

breezy/plugins/launchpad/cmds.py

breezy/plugins/launchpad/hoster.py

breezy/plugins/launchpad/lp_api.py

breezy/plugins/launchpad/lp_propose.py

breezy/plugins/launchpad/test_account.py

breezy/plugins/launchpad/test_lp_api.py

breezy/plugins/launchpad/test_lp_login.py

breezy/plugins/launchpad/test_lp_open.py

breezy/plugins/launchpad/test_lp_service.py

breezy/plugins/launchpad/uris.py

breezy/plugins/mtn

breezy/plugins/mtn/__init__.py

breezy/plugins/netrc_credential_store

breezy/plugins/netrc_credential_store/__init__.py

breezy/plugins/netrc_credential_store/tests

breezy/plugins/netrc_credential_store/tests/__init__.py

breezy/plugins/netrc_credential_store/tests/test_netrc.py

breezy/plugins/news_merge

breezy/plugins/news_merge/README

breezy/plugins/news_merge/__init__.py

breezy/plugins/news_merge/news_merge.py

breezy/plugins/news_merge/parser.py

breezy/plugins/news_merge/tests

breezy/plugins/news_merge/tests/__init__.py

breezy/plugins/news_merge/tests/test_news_merge.py

breezy/plugins/po_merge

breezy/plugins/po_merge/README

breezy/plugins/po_merge/__init__.py

breezy/plugins/po_merge/po_merge.py

breezy/plugins/po_merge/tests

breezy/plugins/po_merge/tests/__init__.py

breezy/plugins/po_merge/tests/test_po_merge.py

breezy/plugins/propose

breezy/plugins/propose/README.rst

breezy/plugins/propose/TODO

breezy/plugins/propose/__init__.py

breezy/plugins/propose/cmds.py

breezy/plugins/propose/tests

breezy/plugins/propose/tests/__init__.py

breezy/plugins/quilt

breezy/plugins/quilt/__init__.py

breezy/plugins/quilt/merge.py

breezy/plugins/quilt/quilt.py

breezy/plugins/quilt/tests

breezy/plugins/quilt/tests/__init__.py

breezy/plugins/quilt/tests/test_merge.py

breezy/plugins/quilt/tests/test_wrapper.py

breezy/plugins/quilt/wrapper.py

breezy/plugins/repodebug

breezy/plugins/repodebug/__init__.py

breezy/plugins/repodebug/check_chk.py

breezy/plugins/repodebug/chk_used_by.py

breezy/plugins/repodebug/fetch_all_records.py

breezy/plugins/repodebug/file_refs.py

breezy/plugins/repodebug/missing_keys_for_stacking_fixer.py

breezy/plugins/repodebug/repo_has_key.py

breezy/plugins/repodebug/repo_keys.py

breezy/plugins/repodebug/tests

breezy/plugins/repodebug/tests/__init__.py

breezy/plugins/rewrite

breezy/plugins/rewrite/README

breezy/plugins/rewrite/__init__.py

breezy/plugins/rewrite/commands.py

breezy/plugins/rewrite/maptree.py

breezy/plugins/rewrite/pseudonyms.py

breezy/plugins/rewrite/rebase.py

breezy/plugins/rewrite/tests

breezy/plugins/rewrite/tests/__init__.py

breezy/plugins/rewrite/tests/test_blackbox.py

breezy/plugins/rewrite/tests/test_maptree.py

breezy/plugins/rewrite/tests/test_pseudonyms.py

breezy/plugins/rewrite/tests/test_rebase.py

breezy/plugins/rewrite/tests/test_upgrade.py

breezy/plugins/rewrite/upgrade.py

breezy/plugins/stats

breezy/plugins/stats/__init__.py

breezy/plugins/stats/classify.py

breezy/plugins/stats/cmds.py

breezy/plugins/stats/test_blackbox.py

breezy/plugins/stats/test_classify.py

breezy/plugins/stats/test_stats.py

breezy/plugins/svn

breezy/plugins/svn/__init__.py

breezy/plugins/svn/revspec.py

breezy/plugins/upload

breezy/plugins/upload/.bzrignore

breezy/plugins/upload/NEWS

breezy/plugins/upload/README

breezy/plugins/upload/__init__.py

breezy/plugins/upload/cmds.py

breezy/plugins/upload/tests

breezy/plugins/upload/tests/__init__.py

breezy/plugins/upload/tests/test_auto_upload_hook.py

breezy/plugins/upload/tests/test_upload.py

breezy/plugins/weave_fmt

breezy/plugins/weave_fmt/__init__.py

breezy/plugins/weave_fmt/branch.py

breezy/plugins/weave_fmt/bzrdir.py

breezy/plugins/weave_fmt/test_bzrdir.py

breezy/plugins/weave_fmt/test_repository.py

breezy/plugins/weave_fmt/test_workingtree.py

breezy/plugins/weave_fmt/workingtree.py

breezy/plugins/zsh_completion

breezy/plugins/zsh_completion/__init__.py

breezy/plugins/zsh_completion/tests

breezy/plugins/zsh_completion/tests/__init__.py

breezy/plugins/zsh_completion/tests/test_zshcomp.py

breezy/plugins/zsh_completion/zshcomp.py

breezy/propose.py

breezy/push.py

breezy/python-compat.h

breezy/pyutils.py

breezy/readdir.h

breezy/reconfigure.py

breezy/recordcounter.py

breezy/rename_map.py

breezy/rules.py

breezy/send.py

breezy/shelf.py

breezy/shelf_ui.py

breezy/smtp_connection.py

breezy/static_tuple.py

breezy/switch.py

breezy/terminal.py

breezy/tests/blackbox/test_alias.py

breezy/tests/blackbox/test_big_file.py

breezy/tests/blackbox/test_bisect.py

breezy/tests/blackbox/test_branches.py

breezy/tests/blackbox/test_bundle_info.py

breezy/tests/blackbox/test_check.py

breezy/tests/blackbox/test_clean_tree.py

breezy/tests/blackbox/test_clone.py

breezy/tests/blackbox/test_config.py

breezy/tests/blackbox/test_cp.py

breezy/tests/blackbox/test_deleted.py

breezy/tests/blackbox/test_export_pot.py

breezy/tests/blackbox/test_fetch_ghosts.py

breezy/tests/blackbox/test_filesystem_cicp.py

breezy/tests/blackbox/test_filtered_view_ops.py

breezy/tests/blackbox/test_hooks.py

breezy/tests/blackbox/test_import.py

breezy/tests/blackbox/test_link_tree.py

breezy/tests/blackbox/test_lookup_revision.py

breezy/tests/blackbox/test_lsprof.py

breezy/tests/blackbox/test_mkdir.py

breezy/tests/blackbox/test_modified.py

breezy/tests/blackbox/test_pack.py

breezy/tests/blackbox/test_patch.py

breezy/tests/blackbox/test_ping.py

breezy/tests/blackbox/test_plugins.py

breezy/tests/blackbox/test_reconfigure.py

breezy/tests/blackbox/test_reference.py

breezy/tests/blackbox/test_remember_option.py

breezy/tests/blackbox/test_repair_workingtree.py

breezy/tests/blackbox/test_resolve.py

breezy/tests/blackbox/test_resolve_location.py

breezy/tests/blackbox/test_rmbranch.py

breezy/tests/blackbox/test_script.py

breezy/tests/blackbox/test_shell_complete.py

breezy/tests/blackbox/test_shelve.py

breezy/tests/blackbox/test_switch.py

breezy/tests/blackbox/test_unknowns.py

breezy/tests/blackbox/test_verify_signatures.py

breezy/tests/blackbox/test_view.py

breezy/tests/commands

breezy/tests/commands/__init__.py

breezy/tests/commands/test_branch.py

breezy/tests/commands/test_cat.py

breezy/tests/commands/test_checkout.py

breezy/tests/commands/test_commit.py

breezy/tests/commands/test_init.py

breezy/tests/commands/test_init_repository.py

breezy/tests/commands/test_merge.py

breezy/tests/commands/test_missing.py

breezy/tests/commands/test_pull.py

breezy/tests/commands/test_push.py

breezy/tests/commands/test_revert.py

breezy/tests/commands/test_update.py

breezy/tests/fake_command.py

breezy/tests/features.py

breezy/tests/file_utils.py

breezy/tests/fixtures.py

breezy/tests/https_server.py

breezy/tests/matchers.py

breezy/tests/per_branch/test_check.py

breezy/tests/per_branch/test_config.py

breezy/tests/per_branch/test_create_clone.py

breezy/tests/per_branch/test_dotted_revno_to_revision_id.py

breezy/tests/per_branch/test_get_rev_id.py

breezy/tests/per_branch/test_get_revision_id_to_revno_map.py

breezy/tests/per_branch/test_iter_merge_sorted_revisions.py

breezy/tests/per_branch/test_reconcile.py

breezy/tests/per_branch/test_revision_id_to_dotted_revno.py

breezy/tests/per_branch/test_revision_id_to_revno.py

breezy/tests/per_branch/test_sprout.py

breezy/tests/per_branch/test_stacking.py

breezy/tests/per_controldir/test_format.py

breezy/tests/per_controldir/test_push.py

breezy/tests/per_controldir_colo

breezy/tests/per_controldir_colo/__init__.py

breezy/tests/per_controldir_colo/test_supported.py

breezy/tests/per_controldir_colo/test_unsupported.py

breezy/tests/per_foreign_vcs

breezy/tests/per_foreign_vcs/__init__.py

breezy/tests/per_foreign_vcs/test_branch.py

breezy/tests/per_foreign_vcs/test_repository.py

breezy/tests/per_interbranch

breezy/tests/per_interbranch/__init__.py

breezy/tests/per_interbranch/test_copy_content_into.py

breezy/tests/per_interbranch/test_fetch.py

breezy/tests/per_interbranch/test_get.py

breezy/tests/per_interbranch/test_pull.py

breezy/tests/per_interbranch/test_push.py

breezy/tests/per_interrepository/test_fetch.py

breezy/tests/per_intertree/test_file_content_matches.py

breezy/tests/per_intertree/test_find_path.py

breezy/tests/per_merger.py

breezy/tests/per_repository/test_add_fallback_repository.py

breezy/tests/per_repository/test_check.py

breezy/tests/per_repository/test_fetch.py

breezy/tests/per_repository/test_file_graph.py

breezy/tests/per_repository/test_get_parent_map.py

breezy/tests/per_repository/test_get_rev_id_for_revno.py

breezy/tests/per_repository/test_has_revisions.py

breezy/tests/per_repository/test_has_same_location.py

breezy/tests/per_repository/test_locking.py

breezy/tests/per_repository/test_pack.py

breezy/tests/per_repository/test_refresh_data.py

breezy/tests/per_repository/test_signatures.py

breezy/tests/per_repository/test_write_group.py

breezy/tests/per_repository_reference

breezy/tests/per_repository_reference/__init__.py

breezy/tests/per_repository_reference/test__make_parents_provider.py

breezy/tests/per_repository_reference/test_add_inventory.py

breezy/tests/per_repository_reference/test_add_revision.py

breezy/tests/per_repository_reference/test_add_signature_text.py

breezy/tests/per_repository_reference/test_all_revision_ids.py

breezy/tests/per_repository_reference/test_break_lock.py

breezy/tests/per_repository_reference/test_check.py

breezy/tests/per_repository_reference/test_commit_with_stacking.py

breezy/tests/per_repository_reference/test_default_stacking.py

breezy/tests/per_repository_reference/test_fetch.py

breezy/tests/per_repository_reference/test_get_record_stream.py

breezy/tests/per_repository_reference/test_get_rev_id_for_revno.py

breezy/tests/per_repository_reference/test_graph.py

breezy/tests/per_repository_reference/test_initialize.py

breezy/tests/per_repository_reference/test_unlock.py

breezy/tests/per_tree/test_annotate_iter.py

breezy/tests/per_tree/test_archive.py

breezy/tests/per_tree/test_export.py

breezy/tests/per_tree/test_get_file_with_stat.py

breezy/tests/per_tree/test_get_root_id.py

breezy/tests/per_tree/test_ids.py

breezy/tests/per_tree/test_is_executable.py

breezy/tests/per_tree/test_iter_search_rules.py

breezy/tests/per_tree/test_locking.py

breezy/tests/per_tree/test_path_content_summary.py

breezy/tests/per_tree/test_transform.py

breezy/tests/per_uifactory

breezy/tests/per_uifactory/__init__.py

breezy/tests/per_workingtree/test_annotate_iter.py

breezy/tests/per_workingtree/test_canonical_path.py

breezy/tests/per_workingtree/test_check.py

breezy/tests/per_workingtree/test_check_state.py

breezy/tests/per_workingtree/test_content_filters.py

breezy/tests/per_workingtree/test_eol_conversion.py

breezy/tests/per_workingtree/test_remove.py

breezy/tests/per_workingtree/test_shelf_manager.py

breezy/tests/per_workingtree/test_symlinks.py

breezy/tests/per_workingtree/test_transform.py

breezy/tests/per_workingtree/test_uncommit.py

breezy/tests/per_workingtree/test_views.py

breezy/tests/scenarios.py

breezy/tests/script.py

breezy/tests/ssl_certs

breezy/tests/ssl_certs/__init__.py

breezy/tests/ssl_certs/ca.crt

breezy/tests/ssl_certs/ca.key

breezy/tests/ssl_certs/create_ssls.py

breezy/tests/ssl_certs/server.crt

breezy/tests/ssl_certs/server.csr

breezy/tests/ssl_certs/server.extensions.cnf

breezy/tests/ssl_certs/server_with_pass.key

breezy/tests/ssl_certs/server_without_pass.key

breezy/tests/test__annotator.py

breezy/tests/test__bencode.py

breezy/tests/test__chunks_to_lines.py

breezy/tests/test__known_graph.py

breezy/tests/test__rio.py

breezy/tests/test__simple_set.py

breezy/tests/test__static_tuple.py

breezy/tests/test__walkdirs_win32.py

breezy/tests/test_bedding.py

breezy/tests/test_bisect.py

breezy/tests/test_bisect_multi.py

breezy/tests/test_branchbuilder.py

breezy/tests/test_bugtracker.py

breezy/tests/test_cethread.py

breezy/tests/test_chunk_writer.py

breezy/tests/test_clean_tree.py

breezy/tests/test_cmdline.py

breezy/tests/test_controldir.py

breezy/tests/test_counted_lock.py

breezy/tests/test_crash.py

breezy/tests/test_debug.py

breezy/tests/test_directory_service.py

breezy/tests/test_dirty_tracker.py

breezy/tests/test_email_message.py

breezy/tests/test_eol_filters.py

breezy/tests/test_estimate_compressed_size.py

breezy/tests/test_export.py

breezy/tests/test_export_pot.py

breezy/tests/test_features.py

breezy/tests/test_fetch_ghosts.py

breezy/tests/test_fifo_cache.py

breezy/tests/test_filter_tree.py

breezy/tests/test_filters.py

breezy/tests/test_fixtures.py

breezy/tests/test_foreign.py

breezy/tests/test_graph.py

breezy/tests/test_grep.py

breezy/tests/test_help.py

breezy/tests/test_hooks.py

breezy/tests/test_https_urllib.py

breezy/tests/test_i18n.py

breezy/tests/test_import_tariff.py

breezy/tests/test_info.py

breezy/tests/test_library_state.py

breezy/tests/test_location.py

breezy/tests/test_lock.py

breezy/tests/test_lru_cache.py

breezy/tests/test_lsprof.py

breezy/tests/test_mail_client.py

breezy/tests/test_matchers.py

breezy/tests/test_memorybranch.py

breezy/tests/test_mergeable.py

breezy/tests/test_mergetools.py

breezy/tests/test_multiparent.py

breezy/tests/test_multiwalker.py

breezy/tests/test_mutabletree.py

breezy/tests/test_patches_data/binary-after-normal.patch

breezy/tests/test_patches_data/binary.patch

breezy/tests/test_patches_data/diff-7

breezy/tests/test_patches_data/mod-7

breezy/tests/test_patches_data/orig-7

breezy/tests/test_propose.py

breezy/tests/test_pyutils.py

breezy/tests/test_reconfigure.py

breezy/tests/test_rename_map.py

breezy/tests/test_rules.py

breezy/tests/test_scenarios.py

breezy/tests/test_script.py

breezy/tests/test_server.py

breezy/tests/test_shelf.py

breezy/tests/test_shelf_ui.py

breezy/tests/test_smtp_connection.py

breezy/tests/test_switch.py

breezy/tests/test_test_server.py

breezy/tests/test_transport_log.py

breezy/tests/test_treeshape.py

breezy/tests/test_uncommit.py

breezy/tests/test_upgrade_stacked.py

breezy/tests/test_upstream_import.py

breezy/tests/test_url_policy_open.py

breezy/tests/test_utextwrap.py

breezy/tests/test_views.py

breezy/tests/test_win32utils.py

breezy/tests/test_workspace.py

breezy/tests/testui.py

breezy/tests/transport_util.py

breezy/tests/ui_testing.py

breezy/transport/brokenrename.py

breezy/transport/gio_transport.py

breezy/transport/log.py

breezy/transport/nosmart.py

breezy/transport/pathfilter.py

breezy/transport/trace.py

breezy/transport/unlistable.py

breezy/upstream_import.py

breezy/url_policy_open.py

breezy/utextwrap.py

breezy/util/simplemapi.py

breezy/util/tests

breezy/util/tests/__init__.py

breezy/version_info_formats/format_custom.py

breezy/views.py

breezy/workspace.py

build.cmd

byov.conf

contrib/bash/brz

contrib/bash/brzbashprompt.sh

contrib/brz_access

contrib/bzr_ssh_path_limiter

contrib/debian

contrib/debian/default

contrib/debian/init.d

doc/developers

doc/developers/_static

doc/developers/_static/brz-doc.css

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/api

doc/developers/api/index.txt

doc/developers/apport.txt

doc/developers/authentication-ring.txt

doc/developers/branding.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/code-review.txt

doc/developers/code-style.txt

doc/developers/colocated-branches.txt

doc/developers/conf.py

doc/developers/config-rationale.txt

doc/developers/configuration.txt

doc/developers/container-format.txt

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/dirstate.txt

doc/developers/documenting-changes.txt

doc/developers/ec2.txt

doc/developers/feature-flags.txt

doc/developers/fetch.txt

doc/developers/groupcompress-design.txt

doc/developers/implementation-notes.txt

doc/developers/improved_chk_index.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/miscellaneous-notes.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance.dot

doc/developers/plans

doc/developers/plans/index.txt

doc/developers/plans/nested-trees.txt

doc/developers/plans/performance

doc/developers/plans/performance/add.txt

doc/developers/plans/performance/annotate.txt

doc/developers/plans/performance/bundle-creation.txt

doc/developers/plans/performance/commit.txt

doc/developers/plans/performance/diff.txt

doc/developers/plans/performance/directory-fingerprints.txt

doc/developers/plans/performance/gc.txt

doc/developers/plans/performance/incremental-push-pull.txt

doc/developers/plans/performance/initial-push-pull.txt

doc/developers/plans/performance/merge-scaling.txt

doc/developers/plans/performance/missing.txt

doc/developers/plans/performance/performance-use-case-analysis.txt

doc/developers/plans/performance/planned-change-integration.txt

doc/developers/plans/performance/planned-performance-changes.txt

doc/developers/plans/performance/revert.txt

doc/developers/plans/performance/roadmap.txt

doc/developers/plans/performance/status.txt

doc/developers/plans/performance/uncommit.txt

doc/developers/plans/performance/update.txt

doc/developers/plans/tortoise-strategy.txt

doc/developers/plugin-api.txt

doc/developers/plugin-development.txt

doc/developers/ppa.txt

doc/developers/principles.txt

doc/developers/profiling.txt

doc/developers/proposals

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revision-properties.txt

doc/developers/specifications.txt

doc/developers/testing.txt

doc/developers/transports.txt

doc/developers/ui.txt

doc/developers/win32_build_setup.txt

doc/en

doc/en/Makefile

doc/en/_static

doc/en/_static/en

doc/en/_static/en/Makefile

doc/en/_static/en/brz-en-quick-reference.pdf

doc/en/_static/en/brz-en-quick-reference.png

doc/en/_static/en/brz-en-quick-reference.svg

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/admin-guide

doc/en/admin-guide/advanced.txt

doc/en/admin-guide/backup.txt

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/hooks-plugins.txt

doc/en/admin-guide/index-plain.txt

doc/en/admin-guide/index.txt

doc/en/admin-guide/integration.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/licence.txt

doc/en/admin-guide/migration.txt

doc/en/admin-guide/other-setups.txt

doc/en/admin-guide/security.txt

doc/en/admin-guide/simple-setups.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/index.txt

doc/en/release-notes

doc/en/release-notes/brz-3.0.txt

doc/en/release-notes/brz-3.1.txt

doc/en/release-notes/brz-3.2.txt

doc/en/release-notes/bzr-0.1.txt

doc/en/release-notes/bzr-0.10.txt

doc/en/release-notes/bzr-0.11.txt

doc/en/release-notes/bzr-0.12.txt

doc/en/release-notes/bzr-0.13.txt

doc/en/release-notes/bzr-0.14.txt

doc/en/release-notes/bzr-0.15.txt

doc/en/release-notes/bzr-0.16.txt

doc/en/release-notes/bzr-0.17.txt

doc/en/release-notes/bzr-0.18.txt

doc/en/release-notes/bzr-0.6.txt

doc/en/release-notes/bzr-0.7.txt

doc/en/release-notes/bzr-0.8.txt

doc/en/release-notes/bzr-0.9.txt

doc/en/release-notes/bzr-0.90.txt

doc/en/release-notes/bzr-0.91.txt

doc/en/release-notes/bzr-0.92.txt

doc/en/release-notes/bzr-1.0.txt

doc/en/release-notes/bzr-1.1.txt

doc/en/release-notes/bzr-1.10.txt

doc/en/release-notes/bzr-1.11.txt

doc/en/release-notes/bzr-1.12.txt

doc/en/release-notes/bzr-1.13.txt

doc/en/release-notes/bzr-1.14.txt

doc/en/release-notes/bzr-1.15.txt

doc/en/release-notes/bzr-1.16.txt

doc/en/release-notes/bzr-1.17.txt

doc/en/release-notes/bzr-1.18.txt

doc/en/release-notes/bzr-1.2.txt

doc/en/release-notes/bzr-1.3.txt

doc/en/release-notes/bzr-1.4.txt

doc/en/release-notes/bzr-1.5.txt

doc/en/release-notes/bzr-1.6.txt

doc/en/release-notes/bzr-1.7.txt

doc/en/release-notes/bzr-1.8.txt

doc/en/release-notes/bzr-1.9.txt

doc/en/release-notes/bzr-2.0.txt

doc/en/release-notes/bzr-2.1.txt

doc/en/release-notes/bzr-2.2.txt

doc/en/release-notes/bzr-2.4.txt

doc/en/release-notes/bzr-2.5.txt

doc/en/release-notes/bzr-2.6.txt

doc/en/release-notes/bzr-2.7.txt

doc/en/release-notes/bzr-2.8.txt

doc/en/release-notes/fork.txt

doc/en/release-notes/release-template.txt

doc/en/release-notes/series-template.txt

doc/en/tutorials

doc/en/tutorials/index.txt

doc/en/tutorials/licence.txt

doc/en/tutorials/using_breezy_with_github.txt

doc/en/tutorials/using_breezy_with_launchpad.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/licence.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/breezy_workflows.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_breezy.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/git_limitations.txt

doc/en/user-guide/gpg_signatures.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_breezy.txt

doc/en/user-guide/introducing_breezy.txt

doc/en/user-guide/licence.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/switch_store.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/en/whats-new

doc/en/whats-new/template.txt

doc/en/whats-new/whats-new-in-2.1.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/en/whats-new/whats-new-in-2.3.txt

doc/en/whats-new/whats-new-in-2.4.txt

doc/en/whats-new/whats-new-in-2.5.txt

doc/en/whats-new/whats-new-in-2.6.txt

doc/en/whats-new/whats-new-in-2.7.txt

doc/en/whats-new/whats-new-in-2.8.txt

doc/en/whats-new/whats-new-in-3.0.txt

doc/en/whats-new/whats-new-in-3.1.txt

doc/en/whats-new/whats-new-in-3.2.txt

doc/index.txt

doc/news-template.txt

po/ar.po

po/ast.po

po/brz.pot

po/bs.po

po/ca.po

po/cs.po

po/de.po

po/el.po

po/en_AU.po

po/en_GB.po

po/es.po

po/fa.po

po/fo.po

po/fr.po

po/gl.po

po/he.po

po/id.po

po/it.po

po/ja.po

po/ko.po

po/ms.po

po/my.po

po/nb.po

po/nl.po

po/oc.po

po/pl.po

po/pt_BR.po

po/ro.po

po/ru.po

po/sco.po

po/si.po

po/sk.po

po/sr.po

po/sv.po

po/tr.po

po/ug.po

po/uk.po

po/vi.po

po/zh_CN.po

setup.cfg

tools/brz_epydoc

tools/brz_epydoc_uid.py

tools/brzflakes.py

tools/check-newsbugs.py

tools/fixed-in.py

tools/flake8_lazy_import.py

tools/generate_release_notes.py

tools/package_docs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-control.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/subunit-sum

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/py2exe_boot_common.py

tools/win32/run_script.py

files renamed:
doc/developers/HACKING.txt => HACKING

doc/en/release-notes/bzr-2.3.txt => NEWS

README.rst => README

brz => bzr

breezy/ => bzrlib/

breezy/bzr/bundle/__init__.py => bzrlib/bundle/__init__.py

breezy/bzr/bundle/apply_bundle.py => bzrlib/bundle/apply_bundle.py

breezy/bzr/bundle/bundle_data.py => bzrlib/bundle/bundle_data.py

breezy/bzr/bundle/commands.py => bzrlib/bundle/commands.py

breezy/bzr/bundle/serializer/ => bzrlib/bundle/serializer/

breezy/bzr/bzrdir.py => bzrlib/bzrdir.py

breezy/bzr/dirstate.py => bzrlib/dirstate.py

breezy/export.py => bzrlib/export/__init__.py

breezy/archive/tar.py => bzrlib/export/tar_exporter.py

breezy/archive/zip.py => bzrlib/export/zip_exporter.py

breezy/bzr/fetch.py => bzrlib/fetch.py

breezy/bzr/generate_ids.py => bzrlib/generate_ids.py

breezy/help_topics/__init__.py => bzrlib/help_topics.py

breezy/bzr/inventory.py => bzrlib/inventory.py

breezy/bzr/knit.py => bzrlib/knit.py

breezy/plugins/launchpad/lp_directory.py => bzrlib/plugins/launchpad/lp_indirect.py

breezy/plugins/launchpad/test_lp_directory.py => bzrlib/plugins/launchpad/test_lp_indirect.py

breezy/bzr/knitrepo.py => bzrlib/repofmt/knitrepo.py

breezy/plugins/weave_fmt/repository.py => bzrlib/repofmt/weaverepo.py

breezy/commit_signature_commands.py => bzrlib/sign_my_commits.py

breezy/bzr/smart/ => bzrlib/smart/

breezy/plugins/weave_fmt/store/ => bzrlib/store/

breezy/plugins/weave_fmt/store/versioned.py => bzrlib/store/versioned/__init__.py

breezy/bzr/testament.py => bzrlib/testament.py

breezy/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

breezy/tests/http_server.py => bzrlib/tests/HttpServer.py

breezy/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

breezy/tests/per_branch/ => bzrlib/tests/branch_implementations/

breezy/tests/per_controldir/ => bzrlib/tests/bzrdir_implementations/

breezy/tests/per_controldir/test_controldir.py => bzrlib/tests/bzrdir_implementations/test_bzrdir.py

breezy/tests/per_interrepository/ => bzrlib/tests/interrepository_implementations/

breezy/tests/per_intertree/ => bzrlib/tests/intertree_implementations/

breezy/tests/per_repository/ => bzrlib/tests/repository_implementations/

breezy/bzr/tests/test_bundle.py => bzrlib/tests/test_bundle.py

breezy/bzr/tests/test_bzrdir.py => bzrlib/tests/test_bzrdir.py

breezy/bzr/tests/test_dirstate.py => bzrlib/tests/test_dirstate.py

breezy/bzr/tests/test_generate_ids.py => bzrlib/tests/test_generate_ids.py

breezy/bzr/tests/test_inv.py => bzrlib/tests/test_inv.py

breezy/bzr/tests/test_knit.py => bzrlib/tests/test_knit.py

breezy/bzr/tests/test_read_bundle.py => bzrlib/tests/test_read_bundle.py

breezy/bzr/tests/test_repository.py => bzrlib/tests/test_repository.py

breezy/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

breezy/bzr/tests/test_smart_transport.py => bzrlib/tests/test_smart_transport.py

breezy/plugins/weave_fmt/test_store.py => bzrlib/tests/test_store.py

breezy/bzr/tests/test_testament.py => bzrlib/tests/test_testament.py

breezy/tests/per_transport.py => bzrlib/tests/test_transport_implementations.py

breezy/bzr/tests/per_versionedfile.py => bzrlib/tests/test_versionedfile.py

breezy/bzr/tests/test_weave.py => bzrlib/tests/test_weave.py

breezy/bzr/tests/test_workingtree_4.py => bzrlib/tests/test_workingtree_4.py

breezy/bzr/tests/test_xml.py => bzrlib/tests/test_xml.py

breezy/tests/per_tree/ => bzrlib/tests/tree_implementations/

breezy/tests/per_tree/test_symlinks.py => bzrlib/tests/tree_implementations/test_inv.py

breezy/tests/per_workingtree/ => bzrlib/tests/workingtree_implementations/

breezy/bzr/textinv.py => bzrlib/textinv.py

breezy/util/_bencode_py.py => bzrlib/util/bencode.py

breezy/bzr/versionedfile.py => bzrlib/versionedfile.py

breezy/bzr/weave.py => bzrlib/weave.py

breezy/bzr/weavefile.py => bzrlib/weavefile.py

breezy/bzr/workingtree_4.py => bzrlib/workingtree_4.py

breezy/plugins/weave_fmt/xml4.py => bzrlib/xml4.py

breezy/bzr/xml8.py => bzrlib/xml5.py

breezy/bzr/xml7.py => bzrlib/xml7.py

breezy/bzr/xml_serializer.py => bzrlib/xml_serializer.py

contrib/emacs/brz-mode.el => contrib/emacs/bzr-mode.el

doc/en/tutorials/centralized_workflow.txt => doc/centralized_workflow.txt

breezy/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/http_smart_server.txt => doc/http_smart_server.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/server.txt => doc/server.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

doc/en/user-guide/version_info.txt => doc/version_info.txt

tools/generate_docs.py => generate_docs.py

breezy/doc_generate/ => tools/doc_generate/

tools/win32/breezy.url => tools/win32/bazaar.url

tools/win32/brz-win32-bdist-postinstall.py => tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/brz.iss.cog => tools/win32/bzr.iss.cog

tools/win32/brz_postinstall.py => tools/win32/bzr_postinstall.py

tools/win32/start_brz.bat => tools/win32/start_bzr.bat

files modified:
.bzrignore

COPYING.txt

INSTALL

Makefile

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/smart/__init__.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/win32utils.py

bzrlib/workingtree.py

doc/default.css

profile_imports.py

setup.py *

tools/capture_tree.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

from io import BytesIO

import os

from ..lazy_import import lazy_import

lazy_import(globals(), """

import patiencediff

import gzip

from breezy import (

debug,

diff,

static_tuple,

trace,

tsort,

tuned_gzip,

ui,

)

from breezy.bzr import (

index as _mod_index,

pack,

)

from breezy.bzr import pack_repo

from breezy.i18n import gettext

""")

from .. import (

annotate,

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

)

from ..errors import (

InternalBzrError,

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

NoSuchFile,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from ..osutils import (

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import (

contains_whitespace,

sha_string,

contains_linebreaks,

sha_strings,

split_lines,

)

from ..bzr.versionedfile import (

_KeyRefs,

AbsentContentFactory,

100

adapter_registry,

101

ConstantMapper,

102

ContentFactory,

103

ExistingContent,

104

sort_groupcompress,

105

UnavailableRepresentation,

106

VersionedFilesWithFallbacks,

107

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

108

103

109

104

110

105

# TODO: Split out code specific to this format into an associated object.

122

117

123

118

DATA_SUFFIX = '.knit'

124

119

INDEX_SUFFIX = '.kndx'

125

_STREAM_MIN_BUFFER_SIZE = 5 * 1024 * 1024

126

127

128

class KnitError(InternalBzrError):

129

130

_fmt = "Knit error"

131

132

133

class KnitCorrupt(KnitError):

134

135

_fmt = "Knit %(filename)s corrupt: %(how)s"

136

137

def __init__(self, filename, how):

138

KnitError.__init__(self)

139

self.filename = filename

140

self.how = how

141

142

143

class SHA1KnitCorrupt(KnitCorrupt):

144

145

_fmt = ("Knit %(filename)s corrupt: sha-1 of reconstructed text does not "

146

"match expected sha-1. key %(key)s expected sha %(expected)s actual "

147

"sha %(actual)s")

148

149

def __init__(self, filename, actual, expected, key, content):

150

KnitError.__init__(self)

151

self.filename = filename

152

self.actual = actual

153

self.expected = expected

154

self.key = key

155

self.content = content

156

157

158

class KnitDataStreamIncompatible(KnitError):

159

# Not raised anymore, as we can convert data streams. In future we may

160

# need it again for more exotic cases, so we're keeping it around for now.

161

162

_fmt = "Cannot insert knit data stream of format \"%(stream_format)s\" into knit of format \"%(target_format)s\"."

163

164

def __init__(self, stream_format, target_format):

165

self.stream_format = stream_format

166

self.target_format = target_format

167

168

169

class KnitDataStreamUnknown(KnitError):

170

# Indicates a data stream we don't know how to handle.

171

172

_fmt = "Cannot parse knit data stream of format \"%(stream_format)s\"."

173

174

def __init__(self, stream_format):

175

self.stream_format = stream_format

176

177

178

class KnitHeaderError(KnitError):

179

180

_fmt = 'Knit header error: %(badline)r unexpected for file "%(filename)s".'

181

182

def __init__(self, badline, filename):

183

KnitError.__init__(self)

184

self.badline = badline

185

self.filename = filename

186

187

188

class KnitIndexUnknownMethod(KnitError):

189

"""Raised when we don't understand the storage method.

190

191

Currently only 'fulltext' and 'line-delta' are supported.

192

"""

193

194

_fmt = ("Knit index %(filename)s does not have a known method"

195

" in options: %(options)r")

196

197

def __init__(self, filename, options):

198

KnitError.__init__(self)

199

self.filename = filename

200

self.options = options

201

202

203

class KnitAdapter(object):

204

"""Base class for knit record adaption."""

205

206

def __init__(self, basis_vf):

207

"""Create an adapter which accesses full texts from basis_vf.

208

209

:param basis_vf: A versioned file to access basis texts of deltas from.

210

May be None for adapters that do not need to access basis texts.

211

"""

212

self._data = KnitVersionedFiles(None, None)

213

self._annotate_factory = KnitAnnotateFactory()

214

self._plain_factory = KnitPlainFactory()

215

self._basis_vf = basis_vf

216

217

218

class FTAnnotatedToUnannotated(KnitAdapter):

219

"""An adapter from FT annotated knits to unannotated ones."""

220

221

def get_bytes(self, factory, target_storage_kind):

222

if target_storage_kind != 'knit-ft-gz':

223

raise UnavailableRepresentation(

224

factory.key, target_storage_kind, factory.storage_kind)

225

annotated_compressed_bytes = factory._raw_record

226

rec, contents = \

227

self._data._parse_record_unchecked(annotated_compressed_bytes)

228

content = self._annotate_factory.parse_fulltext(contents, rec[1])

229

size, chunks = self._data._record_to_data(

230

(rec[1],), rec[3], content.text())

231

return b''.join(chunks)

232

233

234

class DeltaAnnotatedToUnannotated(KnitAdapter):

235

"""An adapter for deltas from annotated to unannotated."""

236

237

def get_bytes(self, factory, target_storage_kind):

238

if target_storage_kind != 'knit-delta-gz':

239

raise UnavailableRepresentation(

240

factory.key, target_storage_kind, factory.storage_kind)

241

annotated_compressed_bytes = factory._raw_record

242

rec, contents = \

243

self._data._parse_record_unchecked(annotated_compressed_bytes)

244

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

245

plain=True)

246

contents = self._plain_factory.lower_line_delta(delta)

247

size, chunks = self._data._record_to_data((rec[1],), rec[3], contents)

248

return b''.join(chunks)

249

250

251

class FTAnnotatedToFullText(KnitAdapter):

252

"""An adapter from FT annotated knits to unannotated ones."""

253

254

def get_bytes(self, factory, target_storage_kind):

255

annotated_compressed_bytes = factory._raw_record

256

rec, contents = \

257

self._data._parse_record_unchecked(annotated_compressed_bytes)

258

content, delta = self._annotate_factory.parse_record(factory.key[-1],

259

contents, factory._build_details, None)

260

if target_storage_kind == 'fulltext':

261

return b''.join(content.text())

262

elif target_storage_kind in ('chunked', 'lines'):

263

return content.text()

264

raise UnavailableRepresentation(

265

factory.key, target_storage_kind, factory.storage_kind)

266

267

268

class DeltaAnnotatedToFullText(KnitAdapter):

269

"""An adapter for deltas from annotated to unannotated."""

270

271

def get_bytes(self, factory, target_storage_kind):

272

annotated_compressed_bytes = factory._raw_record

273

rec, contents = \

274

self._data._parse_record_unchecked(annotated_compressed_bytes)

275

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

276

plain=True)

277

compression_parent = factory.parents[0]

278

basis_entry = next(self._basis_vf.get_record_stream(

279

[compression_parent], 'unordered', True))

280

if basis_entry.storage_kind == 'absent':

281

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

282

basis_lines = basis_entry.get_bytes_as('lines')

283

# Manually apply the delta because we have one annotated content and

284

# one plain.

285

basis_content = PlainKnitContent(basis_lines, compression_parent)

286

basis_content.apply_delta(delta, rec[1])

287

basis_content._should_strip_eol = factory._build_details[1]

288

289

if target_storage_kind == 'fulltext':

290

return b''.join(basis_content.text())

291

elif target_storage_kind in ('chunked', 'lines'):

292

return basis_content.text()

293

raise UnavailableRepresentation(

294

factory.key, target_storage_kind, factory.storage_kind)

295

296

297

class FTPlainToFullText(KnitAdapter):

298

"""An adapter from FT plain knits to unannotated ones."""

299

300

def get_bytes(self, factory, target_storage_kind):

301

compressed_bytes = factory._raw_record

302

rec, contents = \

303

self._data._parse_record_unchecked(compressed_bytes)

304

content, delta = self._plain_factory.parse_record(factory.key[-1],

305

contents, factory._build_details, None)

306

if target_storage_kind == 'fulltext':

307

return b''.join(content.text())

308

elif target_storage_kind in ('chunked', 'lines'):

309

return content.text()

310

raise UnavailableRepresentation(

311

factory.key, target_storage_kind, factory.storage_kind)

312

313

314

class DeltaPlainToFullText(KnitAdapter):

315

"""An adapter for deltas from annotated to unannotated."""

316

317

def get_bytes(self, factory, target_storage_kind):

318

compressed_bytes = factory._raw_record

319

rec, contents = \

320

self._data._parse_record_unchecked(compressed_bytes)

321

delta = self._plain_factory.parse_line_delta(contents, rec[1])

322

compression_parent = factory.parents[0]

323

# XXX: string splitting overhead.

324

basis_entry = next(self._basis_vf.get_record_stream(

325

[compression_parent], 'unordered', True))

326

if basis_entry.storage_kind == 'absent':

327

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

328

basis_lines = basis_entry.get_bytes_as('lines')

329

basis_content = PlainKnitContent(basis_lines, compression_parent)

330

# Manually apply the delta because we have one annotated content and

331

# one plain.

332

content, _ = self._plain_factory.parse_record(rec[1], contents,

333

factory._build_details, basis_content)

334

if target_storage_kind == 'fulltext':

335

return b''.join(content.text())

336

elif target_storage_kind in ('chunked', 'lines'):

337

return content.text()

338

raise UnavailableRepresentation(

339

factory.key, target_storage_kind, factory.storage_kind)

340

341

342

class KnitContentFactory(ContentFactory):

343

"""Content factory for streaming from knits.

344

345

:seealso ContentFactory:

346

"""

347

348

def __init__(self, key, parents, build_details, sha1, raw_record,

349

annotated, knit=None, network_bytes=None):

350

"""Create a KnitContentFactory for key.

351

352

:param key: The key.

353

:param parents: The parents.

354

:param build_details: The build details as returned from

355

get_build_details.

356

:param sha1: The sha1 expected from the full text of this object.

357

:param raw_record: The bytes of the knit data from disk.

358

:param annotated: True if the raw data is annotated.

359

:param network_bytes: None to calculate the network bytes on demand,

360

not-none if they are already known.

361

"""

362

ContentFactory.__init__(self)

363

self.sha1 = sha1

364

self.key = key

365

self.parents = parents

366

if build_details[0] == 'line-delta':

367

kind = 'delta'

368

else:

369

kind = 'ft'

370

if annotated:

371

annotated_kind = 'annotated-'

372

else:

373

annotated_kind = ''

374

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

375

self._raw_record = raw_record

376

self._network_bytes = network_bytes

377

self._build_details = build_details

378

self._knit = knit

379

380

def _create_network_bytes(self):

381

"""Create a fully serialised network version for transmission."""

382

# storage_kind, key, parents, Noeol, raw_record

383

key_bytes = b'\x00'.join(self.key)

384

if self.parents is None:

385

parent_bytes = b'None:'

386

else:

387

parent_bytes = b'\t'.join(b'\x00'.join(key)

388

for key in self.parents)

389

if self._build_details[1]:

390

noeol = b'N'

391

else:

392

noeol = b' '

393

network_bytes = b"%s\n%s\n%s\n%s%s" % (

394

self.storage_kind.encode('ascii'), key_bytes,

395

parent_bytes, noeol, self._raw_record)

396

self._network_bytes = network_bytes

397

398

def get_bytes_as(self, storage_kind):

399

if storage_kind == self.storage_kind:

400

if self._network_bytes is None:

401

self._create_network_bytes()

402

return self._network_bytes

403

if ('-ft-' in self.storage_kind

404

and storage_kind in ('chunked', 'fulltext', 'lines')):

405

adapter_key = (self.storage_kind, storage_kind)

406

adapter_factory = adapter_registry.get(adapter_key)

407

adapter = adapter_factory(None)

408

return adapter.get_bytes(self, storage_kind)

409

if self._knit is not None:

410

# Not redundant with direct conversion above - that only handles

411

# fulltext cases.

412

if storage_kind in ('chunked', 'lines'):

413

return self._knit.get_lines(self.key[0])

414

elif storage_kind == 'fulltext':

415

return self._knit.get_text(self.key[0])

416

raise UnavailableRepresentation(self.key, storage_kind,

417

self.storage_kind)

418

419

def iter_bytes_as(self, storage_kind):

420

return iter(self.get_bytes_as(storage_kind))

421

422

423

class LazyKnitContentFactory(ContentFactory):

424

"""A ContentFactory which can either generate full text or a wire form.

425

426

:seealso ContentFactory:

427

"""

428

429

def __init__(self, key, parents, generator, first):

430

"""Create a LazyKnitContentFactory.

431

432

:param key: The key of the record.

433

:param parents: The parents of the record.

434

:param generator: A _ContentMapGenerator containing the record for this

435

key.

436

:param first: Is this the first content object returned from generator?

437

if it is, its storage kind is knit-delta-closure, otherwise it is

438

knit-delta-closure-ref

439

"""

440

self.key = key

441

self.parents = parents

442

self.sha1 = None

443

self.size = None

444

self._generator = generator

445

self.storage_kind = "knit-delta-closure"

446

if not first:

447

self.storage_kind = self.storage_kind + "-ref"

448

self._first = first

449

450

def get_bytes_as(self, storage_kind):

451

if storage_kind == self.storage_kind:

452

if self._first:

453

return self._generator._wire_bytes()

454

else:

455

# all the keys etc are contained in the bytes returned in the

456

# first record.

457

return b''

458

if storage_kind in ('chunked', 'fulltext', 'lines'):

459

chunks = self._generator._get_one_work(self.key).text()

460

if storage_kind in ('chunked', 'lines'):

461

return chunks

462

else:

463

return b''.join(chunks)

464

raise UnavailableRepresentation(self.key, storage_kind,

465

self.storage_kind)

466

467

def iter_bytes_as(self, storage_kind):

468

if storage_kind in ('chunked', 'lines'):

469

chunks = self._generator._get_one_work(self.key).text()

470

return iter(chunks)

471

raise errors.UnavailableRepresentation(self.key, storage_kind,

472

self.storage_kind)

473

474

475

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

476

"""Convert a network record to a iterator over stream records.

477

478

:param storage_kind: The storage kind of the record.

479

Must be 'knit-delta-closure'.

480

:param bytes: The bytes of the record on the network.

481

"""

482

generator = _NetworkContentMapGenerator(bytes, line_end)

483

return generator.get_record_stream()

484

485

486

def knit_network_to_record(storage_kind, bytes, line_end):

487

"""Convert a network record to a record object.

488

489

:param storage_kind: The storage kind of the record.

490

:param bytes: The bytes of the record on the network.

491

"""

492

start = line_end

493

line_end = bytes.find(b'\n', start)

494

key = tuple(bytes[start:line_end].split(b'\x00'))

495

start = line_end + 1

496

line_end = bytes.find(b'\n', start)

497

parent_line = bytes[start:line_end]

498

if parent_line == b'None:':

499

parents = None

500

else:

501

parents = tuple(

502

[tuple(segment.split(b'\x00')) for segment in parent_line.split(b'\t')

503

if segment])

504

start = line_end + 1

505

noeol = bytes[start:start + 1] == b'N'

506

if 'ft' in storage_kind:

507

method = 'fulltext'

508

else:

509

method = 'line-delta'

510

build_details = (method, noeol)

511

start = start + 1

512

raw_record = bytes[start:]

513

annotated = 'annotated' in storage_kind

514

return [KnitContentFactory(key, parents, build_details, None, raw_record,

515

annotated, network_bytes=bytes)]

516

120

517

121

518

122

class KnitContent(object):

519

"""Content of a knit version to which deltas can be applied.

520

521

This is always stored in memory as a list of lines with \\n at the end,

522

plus a flag saying if the final ending is really there or not, because that

523

corresponds to the on-disk knit representation.

524

"""

525

526

def __init__(self):

527

self._should_strip_eol = False

528

529

def apply_delta(self, delta, new_version_id):

530

"""Apply delta to this object to become new_version_id."""

531

raise NotImplementedError(self.apply_delta)

123

"""Content of a knit version to which deltas can be applied."""

124

125

def __init__(self, lines):

126

self._lines = lines

127

128

def annotate_iter(self):

129

"""Yield tuples of (origin, text) for each content line."""

130

return iter(self._lines)

131

132

def annotate(self):

133

"""Return a list of (origin, text) tuples."""

134

return list(self.annotate_iter())

532

135

533

136

def line_delta_iter(self, new_lines):

534

137

"""Generate line-based delta from this content to new_lines."""

535

138

new_texts = new_lines.text()

536

139

old_texts = self.text()

537

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

140

s = KnitSequenceMatcher(None, old_texts, new_texts)

538

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

539

142

if tag == 'equal':

540

143

continue

544

147

def line_delta(self, new_lines):

545

148

return list(self.line_delta_iter(new_lines))

546

149

547

@staticmethod

548

def get_line_delta_blocks(knit_delta, source, target):

549

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

550

target_len = len(target)

551

s_pos = 0

552

t_pos = 0

553

for s_begin, s_end, t_len, new_text in knit_delta:

554

true_n = s_begin - s_pos

555

n = true_n

556

if n > 0:

557

# knit deltas do not provide reliable info about whether the

558

# last line of a file matches, due to eol handling.

559

if source[s_pos + n - 1] != target[t_pos + n - 1]:

560

n -= 1

561

if n > 0:

562

yield s_pos, t_pos, n

563

t_pos += t_len + true_n

564

s_pos = s_end

565

n = target_len - t_pos

566

if n > 0:

567

if source[s_pos + n - 1] != target[t_pos + n - 1]:

568

n -= 1

569

if n > 0:

570

yield s_pos, t_pos, n

571

yield s_pos + (target_len - t_pos), target_len, 0

572

573

574

class AnnotatedKnitContent(KnitContent):

575

"""Annotated content."""

576

577

def __init__(self, lines):

578

KnitContent.__init__(self)

579

self._lines = list(lines)

580

581

def annotate(self):

582

"""Return a list of (origin, text) for each content line."""

583

lines = self._lines[:]

584

if self._should_strip_eol:

585

origin, last_line = lines[-1]

586

lines[-1] = (origin, last_line.rstrip(b'\n'))

587

return lines

588

589

def apply_delta(self, delta, new_version_id):

590

"""Apply delta to this object to become new_version_id."""

591

offset = 0

592

lines = self._lines

593

for start, end, count, delta_lines in delta:

594

lines[offset + start:offset + end] = delta_lines

595

offset = offset + (start - end) + count

596

597

def text(self):

598

try:

599

lines = [text for origin, text in self._lines]

600

except ValueError as e:

601

# most commonly (only?) caused by the internal form of the knit

602

# missing annotation information because of a bug - see thread

603

# around 20071015

604

raise KnitCorrupt(self,

605

"line in annotated knit missing annotation information: %s"

606

% (e,))

607

if self._should_strip_eol:

608

lines[-1] = lines[-1].rstrip(b'\n')

609

return lines

610

611

def copy(self):

612

return AnnotatedKnitContent(self._lines)

613

614

615

class PlainKnitContent(KnitContent):

616

"""Unannotated content.

617

618

When annotate[_iter] is called on this content, the same version is reported

619

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

620

objects.

621

"""

622

623

def __init__(self, lines, version_id):

624

KnitContent.__init__(self)

625

self._lines = lines

626

self._version_id = version_id

627

628

def annotate(self):

629

"""Return a list of (origin, text) for each content line."""

630

return [(self._version_id, line) for line in self._lines]

631

632

def apply_delta(self, delta, new_version_id):

633

"""Apply delta to this object to become new_version_id."""

634

offset = 0

635

lines = self._lines

636

for start, end, count, delta_lines in delta:

637

lines[offset + start:offset + end] = delta_lines

638

offset = offset + (start - end) + count

639

self._version_id = new_version_id

640

641

def copy(self):

642

return PlainKnitContent(self._lines[:], self._version_id)

643

644

def text(self):

645

lines = self._lines

646

if self._should_strip_eol:

647

lines = lines[:]

648

lines[-1] = lines[-1].rstrip(b'\n')

649

return lines

150

def text(self):

151

return [text for origin, text in self._lines]

152

153

def copy(self):

154

return KnitContent(self._lines[:])

650

155

651

156

652

157

class _KnitFactory(object):

653

"""Base class for common Factory functions."""

654

655

def parse_record(self, version_id, record, record_details,

656

base_content, copy_base_content=True):

657

"""Parse a record into a full content object.

658

659

:param version_id: The official version id for this content

660

:param record: The data returned by read_records_iter()

661

:param record_details: Details about the record returned by

662

get_build_details

663

:param base_content: If get_build_details returns a compression_parent,

664

you must return a base_content here, else use None

665

:param copy_base_content: When building from the base_content, decide

666

you can either copy it and return a new object, or modify it in

667

place.

668

:return: (content, delta) A Content object and possibly a line-delta,

669

delta may be None

670

"""

671

method, noeol = record_details

672

if method == 'line-delta':

673

if copy_base_content:

674

content = base_content.copy()

675

else:

676

content = base_content

677

delta = self.parse_line_delta(record, version_id)

678

content.apply_delta(delta, version_id)

679

else:

680

content = self.parse_fulltext(record, version_id)

681

delta = None

682

content._should_strip_eol = noeol

683

return (content, delta)

158

"""Base factory for creating content objects."""

159

160

def make(self, lines, version_id):

161

num_lines = len(lines)

162

return KnitContent(zip([version_id] * num_lines, lines))

684

163

685

164

686

165

class KnitAnnotateFactory(_KnitFactory):

688

167

689

168

annotated = True

690

169

691

def make(self, lines, version_id):

692

num_lines = len(lines)

693

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

694

695

170

def parse_fulltext(self, content, version_id):

696

171

"""Convert fulltext to internal representation

697

172

704

179

# but the code itself doesn't really depend on that.

705

180

# Figure out a way to not require the overhead of turning the

706

181

# list back into tuples.

707

lines = (tuple(line.split(b' ', 1)) for line in content)

708

return AnnotatedKnitContent(lines)

709

710

def parse_line_delta(self, lines, version_id, plain=False):

182

lines = [tuple(line.split(' ', 1)) for line in content]

183

return KnitContent(lines)

184

185

def parse_line_delta_iter(self, lines):

186

return iter(self.parse_line_delta(lines))

187

188

def parse_line_delta(self, lines, version_id):

711

189

"""Convert a line based delta into internal representation.

712

190

713

191

line delta is in the form of:

716

194

revid(utf8) newline\n

717

195

internal representation is

718

196

(start, end, count, [1..count tuples (revid, newline)])

719

720

:param plain: If True, the lines are returned as a plain

721

list without annotations, not as a list of (origin, content) tuples, i.e.

722

(start, end, count, [1..count newline])

723

197

"""

724

198

result = []

725

199

lines = iter(lines)

200

next = lines.next

726

201

727

202

cache = {}

728

729

203

def cache_and_return(line):

730

origin, text = line.split(b' ', 1)

204

origin, text = line.split(' ', 1)

731

205

return cache.setdefault(origin, origin), text

732

206

733

207

# walk through the lines parsing.

734

# Note that the plain test is explicitly pulled out of the

735

# loop to minimise any performance impact

736

if plain:

737

for header in lines:

738

start, end, count = [int(n) for n in header.split(b',')]

739

contents = [next(lines).split(b' ', 1)[1]

740

for _ in range(count)]

741

result.append((start, end, count, contents))

742

else:

743

for header in lines:

744

start, end, count = [int(n) for n in header.split(b',')]

745

contents = [tuple(next(lines).split(b' ', 1))

746

for _ in range(count)]

747

result.append((start, end, count, contents))

208

for header in lines:

209

start, end, count = [int(n) for n in header.split(',')]

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

211

result.append((start, end, count, contents))

748

212

return result

749

213

750

214

def get_fulltext_content(self, lines):

751

215

"""Extract just the content lines from a fulltext."""

752

return (line.split(b' ', 1)[1] for line in lines)

216

return (line.split(' ', 1)[1] for line in lines)

753

217

754

218

def get_linedelta_content(self, lines):

755

219

"""Extract just the content from a line delta.

758

222

Only the actual content lines.

759

223

"""

760

224

lines = iter(lines)

225

next = lines.next

761

226

for header in lines:

762

header = header.split(b',')

227

header = header.split(',')

763

228

count = int(header[2])

764

for _ in range(count):

765

origin, text = next(lines).split(b' ', 1)

229

for i in xrange(count):

230

origin, text = next().split(' ', 1)

766

231

yield text

767

232

768

233

def lower_fulltext(self, content):

770

235

771

236

see parse_fulltext which this inverts.

772

237

"""

773

return [b'%s %s' % (o, t) for o, t in content._lines]

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

774

241

775

242

def lower_line_delta(self, delta):

776

243

"""convert a delta into a serializable form.

781

248

# the origin is a valid utf-8 line, eventually we could remove it

782

249

out = []

783

250

for start, end, c, lines in delta:

784

out.append(b'%d,%d,%d\n' % (start, end, c))

785

out.extend(origin + b' ' + text

251

out.append('%d,%d,%d\n' % (start, end, c))

252

out.extend(origin + ' ' + text

786

253

for origin, text in lines)

787

254

return out

788

255

789

def annotate(self, knit, key):

790

content = knit._get_content(key)

791

# adjust for the fact that serialised annotations are only key suffixes

792

# for this factory.

793

if isinstance(key, tuple):

794

prefix = key[:-1]

795

origins = content.annotate()

796

result = []

797

for origin, line in origins:

798

result.append((prefix + (origin,), line))

799

return result

800

else:

801

# XXX: This smells a bit. Why would key ever be a non-tuple here?

802

# Aren't keys defined to be tuples? -- spiv 20080618

803

return content.annotate()

804

805

256

806

257

class KnitPlainFactory(_KnitFactory):

807

258

"""Factory for creating plain Content objects."""

808

259

809

260

annotated = False

810

261

811

def make(self, lines, version_id):

812

return PlainKnitContent(lines, version_id)

813

814

262

def parse_fulltext(self, content, version_id):

815

263

"""This parses an unannotated fulltext.

816

264

825

273

while cur < num_lines:

826

274

header = lines[cur]

827

275

cur += 1

828

start, end, c = [int(n) for n in header.split(b',')]

829

yield start, end, c, lines[cur:cur + c]

276

start, end, c = [int(n) for n in header.split(',')]

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

830

278

cur += c

831

279

832

280

def parse_line_delta(self, lines, version_id):

843

291

Only the actual content lines.

844

292

"""

845

293

lines = iter(lines)

294

next = lines.next

846

295

for header in lines:

847

header = header.split(b',')

296

header = header.split(',')

848

297

count = int(header[2])

849

for _ in range(count):

850

yield next(lines)

298

for i in xrange(count):

299

yield next()

851

300

852

301

def lower_fulltext(self, content):

853

302

return content.text()

855

304

def lower_line_delta(self, delta):

856

305

out = []

857

306

for start, end, c, lines in delta:

858

out.append(b'%d,%d,%d\n' % (start, end, c))

859

out.extend(lines)

307

out.append('%d,%d,%d\n' % (start, end, c))

308

out.extend([text for origin, text in lines])

860

309

return out

861

310

862

def annotate(self, knit, key):

863

annotator = _KnitAnnotator(knit)

864

return annotator.annotate_flat(key)

865

866

867

def make_file_factory(annotated, mapper):

868

"""Create a factory for creating a file based KnitVersionedFiles.

869

870

This is only functional enough to run interface tests, it doesn't try to

871

provide a full pack environment.

872

873

:param annotated: knit annotations are wanted.

874

:param mapper: The mapper from keys to paths.

875

"""

876

def factory(transport):

877

index = _KndxIndex(transport, mapper, lambda: None,

878

lambda: True, lambda: True)

879

access = _KnitKeyAccess(transport, mapper)

880

return KnitVersionedFiles(index, access, annotated=annotated)

881

return factory

882

883

884

def make_pack_factory(graph, delta, keylength):

885

"""Create a factory for creating a pack based VersionedFiles.

886

887

This is only functional enough to run interface tests, it doesn't try to

888

provide a full pack environment.

889

890

:param graph: Store a graph.

891

:param delta: Delta compress contents.

892

:param keylength: How long should keys be.

893

"""

894

def factory(transport):

895

parents = graph or delta

896

ref_length = 0

897

if graph:

898

ref_length += 1

899

if delta:

900

ref_length += 1

901

max_delta_chain = 200

902

else:

903

max_delta_chain = 0

904

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

905

key_elements=keylength)

906

stream = transport.open_write_stream('newpack')

907

writer = pack.ContainerWriter(stream.write)

908

writer.begin()

909

index = _KnitGraphIndex(graph_index, lambda: True, parents=parents,

910

deltas=delta, add_callback=graph_index.add_nodes)

911

access = pack_repo._DirectPackAccess({})

912

access.set_writer(writer, graph_index, (transport, 'newpack'))

913

result = KnitVersionedFiles(index, access,

914

max_delta_chain=max_delta_chain)

915

result.stream = stream

916

result.writer = writer

917

return result

918

return factory

919

920

921

def cleanup_pack_knit(versioned_files):

922

versioned_files.stream.close()

923

versioned_files.writer.end()

924

925

926

def _get_total_build_size(self, keys, positions):

927

"""Determine the total bytes to build these keys.

928

929

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

930

don't inherit from a common base.)

931

932

:param keys: Keys that we want to build

933

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

934

as returned by _get_components_positions)

935

:return: Number of bytes to build those keys

936

"""

937

all_build_index_memos = {}

938

build_keys = keys

939

while build_keys:

940

next_keys = set()

941

for key in build_keys:

942

# This is mostly for the 'stacked' case

943

# Where we will be getting the data from a fallback

944

if key not in positions:

945

continue

946

_, index_memo, compression_parent = positions[key]

947

all_build_index_memos[key] = index_memo

948

if compression_parent not in all_build_index_memos:

949

next_keys.add(compression_parent)

950

build_keys = next_keys

951

return sum(index_memo[2]

952

for index_memo in all_build_index_memos.values())

953

954

955

class KnitVersionedFiles(VersionedFilesWithFallbacks):

956

"""Storage for many versioned files using knit compression.

957

958

Backend storage is managed by indices and data objects.

959

960

:ivar _index: A _KnitGraphIndex or similar that can describe the

961

parents, graph, compression and data location of entries in this

962

KnitVersionedFiles. Note that this is only the index for

963

*this* vfs; if there are fallbacks they must be queried separately.

964

"""

965

966

def __init__(self, index, data_access, max_delta_chain=200,

967

annotated=False, reload_func=None):

968

"""Create a KnitVersionedFiles with index and data_access.

969

970

:param index: The index for the knit data.

971

:param data_access: The access object to store and retrieve knit

972

records.

973

:param max_delta_chain: The maximum number of deltas to permit during

974

insertion. Set to 0 to prohibit the use of deltas.

975

:param annotated: Set to True to cause annotations to be calculated and

976

stored during insertion.

977

:param reload_func: An function that can be called if we think we need

978

to reload the pack listing and try again. See

979

'breezy.bzr.pack_repo.AggregateIndex' for the signature.

311

312

def make_empty_knit(transport, relpath):

313

"""Construct a empty knit at the specified location."""

314

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

315

k._data._open_file()

316

317

318

class KnitVersionedFile(VersionedFile):

319

"""Weave-like structure with faster random access.

320

321

A knit stores a number of texts and a summary of the relationships

322

between them. Texts are identified by a string version-id. Texts

323

are normally stored and retrieved as a series of lines, but can

324

also be passed as single strings.

325

326

Lines are stored with the trailing newline (if any) included, to

327

avoid special cases for files with no final newline. Lines are

328

composed of 8-bit characters, not unicode. The combination of

329

these approaches should mean any 'binary' file can be safely

330

stored and retrieved.

331

"""

332

333

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

334

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

335

create=False, create_parent_dir=False, delay_create=False,

336

dir_mode=None):

337

"""Construct a knit at location specified by relpath.

338

339

:param create: If not True, only open an existing knit.

340

:param create_parent_dir: If True, create the parent directory if

341

creating the file fails. (This is used for stores with

342

hash-prefixes that may not exist yet)

343

:param delay_create: The calling code is aware that the knit won't

344

actually be created until the first data is stored.

980

345

"""

981

self._index = index

982

self._access = data_access

983

self._max_delta_chain = max_delta_chain

984

if annotated:

985

self._factory = KnitAnnotateFactory()

986

else:

987

self._factory = KnitPlainFactory()

988

self._immediate_fallback_vfs = []

989

self._reload_func = reload_func

346

if deprecated_passed(basis_knit):

347

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

348

" deprecated as of bzr 0.9.",

349

DeprecationWarning, stacklevel=2)

350

if access_mode is None:

351

access_mode = 'w'

352

super(KnitVersionedFile, self).__init__(access_mode)

353

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

354

self.transport = transport

355

self.filename = relpath

356

self.factory = factory or KnitAnnotateFactory()

357

self.writable = (access_mode == 'w')

358

self.delta = delta

359

360

self._max_delta_chain = 200

361

362

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

363

access_mode, create=create, file_mode=file_mode,

364

create_parent_dir=create_parent_dir, delay_create=delay_create,

365

dir_mode=dir_mode)

366

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

367

access_mode, create=create and not len(self), file_mode=file_mode,

368

create_parent_dir=create_parent_dir, delay_create=delay_create,

369

dir_mode=dir_mode)

990

370

991

371

def __repr__(self):

992

return "%s(%r, %r)" % (

993

self.__class__.__name__,

994

self._index,

995

self._access)

996

997

def without_fallbacks(self):

998

"""Return a clone of this object without any fallbacks configured."""

999

return KnitVersionedFiles(self._index, self._access,

1000

self._max_delta_chain, self._factory.annotated,

1001

self._reload_func)

1002

1003

def add_fallback_versioned_files(self, a_versioned_files):

1004

"""Add a source of texts for texts not present in this knit.

1005

1006

:param a_versioned_files: A VersionedFiles object.

1007

"""

1008

self._immediate_fallback_vfs.append(a_versioned_files)

1009

1010

def add_lines(self, key, parents, lines, parent_texts=None,

1011

left_matching_blocks=None, nostore_sha=None, random_id=False,

1012

check_content=True):

1013

"""See VersionedFiles.add_lines()."""

1014

self._index._check_write_ok()

1015

self._check_add(key, lines, random_id, check_content)

1016

if parents is None:

1017

# The caller might pass None if there is no graph data, but kndx

1018

# indexes can't directly store that, so we give them

1019

# an empty tuple instead.

1020

parents = ()

1021

line_bytes = b''.join(lines)

1022

return self._add(key, lines, parents,

1023

parent_texts, left_matching_blocks, nostore_sha, random_id,

1024

line_bytes=line_bytes)

1025

1026

def add_content(self, content_factory, parent_texts=None,

1027

left_matching_blocks=None, nostore_sha=None,

1028

random_id=False):

1029

"""See VersionedFiles.add_content()."""

1030

self._index._check_write_ok()

1031

key = content_factory.key

1032

parents = content_factory.parents

1033

self._check_add(key, None, random_id, check_content=False)

1034

if parents is None:

1035

# The caller might pass None if there is no graph data, but kndx

1036

# indexes can't directly store that, so we give them

1037

# an empty tuple instead.

1038

parents = ()

1039

lines = content_factory.get_bytes_as('lines')

1040

line_bytes = content_factory.get_bytes_as('fulltext')

1041

return self._add(key, lines, parents,

1042

parent_texts, left_matching_blocks, nostore_sha, random_id,

1043

line_bytes=line_bytes)

1044

1045

def _add(self, key, lines, parents, parent_texts,

1046

left_matching_blocks, nostore_sha, random_id,

1047

line_bytes):

1048

"""Add a set of lines on top of version specified by parents.

1049

1050

Any versions not present will be converted into ghosts.

1051

1052

:param lines: A list of strings where each one is a single line (has a

1053

single newline at the end of the string) This is now optional

1054

(callers can pass None). It is left in its location for backwards

1055

compatibility. It should ''.join(lines) must == line_bytes

1056

:param line_bytes: A single string containing the content

1057

1058

We pass both lines and line_bytes because different routes bring the

1059

values to this function. And for memory efficiency, we don't want to

1060

have to split/join on-demand.

1061

"""

1062

# first thing, if the content is something we don't need to store, find

1063

# that out.

1064

digest = sha_string(line_bytes)

1065

if nostore_sha == digest:

1066

raise ExistingContent

1067

1068

present_parents = []

1069

if parent_texts is None:

1070

parent_texts = {}

1071

# Do a single query to ascertain parent presence; we only compress

1072

# against parents in the same kvf.

1073

present_parent_map = self._index.get_parent_map(parents)

1074

for parent in parents:

1075

if parent in present_parent_map:

1076

present_parents.append(parent)

1077

1078

# Currently we can only compress against the left most present parent.

1079

if (len(present_parents) == 0

1080

or present_parents[0] != parents[0]):

1081

delta = False

1082

else:

1083

# To speed the extract of texts the delta chain is limited

1084

# to a fixed number of deltas. This should minimize both

1085

# I/O and the time spend applying deltas.

1086

delta = self._check_should_delta(present_parents[0])

1087

1088

text_length = len(line_bytes)

1089

options = []

1090

no_eol = False

1091

# Note: line_bytes is not modified to add a newline, that is tracked

1092

# via the no_eol flag. 'lines' *is* modified, because that is the

1093

# general values needed by the Content code.

1094

if line_bytes and not line_bytes.endswith(b'\n'):

1095

options.append(b'no-eol')

1096

no_eol = True

1097

# Copy the existing list, or create a new one

1098

if lines is None:

1099

lines = osutils.split_lines(line_bytes)

1100

else:

1101

lines = lines[:]

1102

# Replace the last line with one that ends in a final newline

1103

lines[-1] = lines[-1] + b'\n'

1104

if lines is None:

1105

lines = osutils.split_lines(line_bytes)

1106

1107

for element in key[:-1]:

1108

if not isinstance(element, bytes):

1109

raise TypeError("key contains non-bytestrings: %r" % (key,))

1110

if key[-1] is None:

1111

key = key[:-1] + (b'sha1:' + digest,)

1112

elif not isinstance(key[-1], bytes):

1113

raise TypeError("key contains non-bytestrings: %r" % (key,))

1114

# Knit hunks are still last-element only

1115

version_id = key[-1]

1116

content = self._factory.make(lines, version_id)

1117

if no_eol:

1118

# Hint to the content object that its text() call should strip the

1119

# EOL.

1120

content._should_strip_eol = True

1121

if delta or (self._factory.annotated and len(present_parents) > 0):

1122

# Merge annotations from parent texts if needed.

1123

delta_hunks = self._merge_annotations(content, present_parents,

1124

parent_texts, delta, self._factory.annotated,

1125

left_matching_blocks)

1126

1127

if delta:

1128

options.append(b'line-delta')

1129

store_lines = self._factory.lower_line_delta(delta_hunks)

1130

size, data = self._record_to_data(key, digest, store_lines)

1131

else:

1132

options.append(b'fulltext')

1133

# isinstance is slower and we have no hierarchy.

1134

if self._factory.__class__ is KnitPlainFactory:

1135

# Use the already joined bytes saving iteration time in

1136

# _record_to_data.

1137

dense_lines = [line_bytes]

1138

if no_eol:

1139

dense_lines.append(b'\n')

1140

size, data = self._record_to_data(key, digest,

1141

lines, dense_lines)

1142

else:

1143

# get mixed annotation + content and feed it into the

1144

# serialiser.

1145

store_lines = self._factory.lower_fulltext(content)

1146

size, data = self._record_to_data(key, digest, store_lines)

1147

1148

access_memo = self._access.add_raw_record(key, size, data)

1149

self._index.add_records(

1150

((key, options, access_memo, parents),),

1151

random_id=random_id)

1152

return digest, text_length, content

1153

1154

def annotate(self, key):

1155

"""See VersionedFiles.annotate."""

1156

return self._factory.annotate(self, key)

1157

1158

def get_annotator(self):

1159

return _KnitAnnotator(self)

1160

1161

def check(self, progress_bar=None, keys=None):

1162

"""See VersionedFiles.check()."""

1163

if keys is None:

1164

return self._logical_check()

1165

else:

1166

# At the moment, check does not extra work over get_record_stream

1167

return self.get_record_stream(keys, 'unordered', True)

1168

1169

def _logical_check(self):

1170

# This doesn't actually test extraction of everything, but that will

1171

# impact 'bzr check' substantially, and needs to be integrated with

1172

# care. However, it does check for the obvious problem of a delta with

1173

# no basis.

1174

keys = self._index.keys()

1175

parent_map = self.get_parent_map(keys)

1176

for key in keys:

1177

if self._index.get_method(key) != 'fulltext':

1178

compression_parent = parent_map[key][0]

1179

if compression_parent not in parent_map:

1180

raise KnitCorrupt(self,

1181

"Missing basis parent %s for %s" % (

1182

compression_parent, key))

1183

for fallback_vfs in self._immediate_fallback_vfs:

1184

fallback_vfs.check()

1185

1186

def _check_add(self, key, lines, random_id, check_content):

1187

"""check that version_id and lines are safe to add."""

1188

if not all(isinstance(x, bytes) or x is None for x in key):

1189

raise TypeError(key)

1190

version_id = key[-1]

1191

if version_id is not None:

1192

if contains_whitespace(version_id):

1193

raise InvalidRevisionId(version_id, self)

1194

self.check_not_reserved_id(version_id)

1195

# TODO: If random_id==False and the key is already present, we should

1196

# probably check that the existing content is identical to what is

1197

# being inserted, and otherwise raise an exception. This would make

1198

# the bundle code simpler.

1199

if check_content:

1200

self._check_lines_not_unicode(lines)

1201

self._check_lines_are_lines(lines)

1202

1203

def _check_header(self, key, line):

1204

rec = self._split_header(line)

1205

self._check_header_version(rec, key[-1])

1206

return rec

1207

1208

def _check_header_version(self, rec, version_id):

1209

"""Checks the header version on original format knit records.

1210

1211

These have the last component of the key embedded in the record.

1212

"""

1213

if rec[1] != version_id:

1214

raise KnitCorrupt(self,

1215

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

1216

1217

def _check_should_delta(self, parent):

372

return '%s(%s)' % (self.__class__.__name__,

373

self.transport.abspath(self.filename))

374

375

def _check_should_delta(self, first_parents):

1218

376

"""Iterate back through the parent listing, looking for a fulltext.

1219

377

1220

378

This is used when we want to decide whether to add a delta or a new

1227

385

"""

1228

386

delta_size = 0

1229

387

fulltext_size = None

1230

for count in range(self._max_delta_chain):

1231

try:

1232

# Note that this only looks in the index of this particular

1233

# KnitVersionedFiles, not in the fallbacks. This ensures that

1234

# we won't store a delta spanning physical repository

1235

# boundaries.

1236

build_details = self._index.get_build_details([parent])

1237

parent_details = build_details[parent]

1238

except (RevisionNotPresent, KeyError) as e:

1239

# Some basis is not locally present: always fulltext

1240

return False

1241

index_memo, compression_parent, _, _ = parent_details

1242

_, _, size = index_memo

1243

if compression_parent is None:

388

delta_parents = first_parents

389

for count in xrange(self._max_delta_chain):

390

parent = delta_parents[0]

391

method = self._index.get_method(parent)

392

pos, size = self._index.get_position(parent)

393

if method == 'fulltext':

1244

394

fulltext_size = size

1245

395

break

1246

396

delta_size += size

1247

# We don't explicitly check for presence because this is in an

1248

# inner loop, and if it's missing it'll fail anyhow.

1249

parent = compression_parent

397

delta_parents = self._index.get_parents(parent)

1250

398

else:

1251

399

# We couldn't find a fulltext, so we must create a new one

1252

400

return False

1253

# Simple heuristic - if the total I/O wold be greater as a delta than

1254

# the originally installed fulltext, we create a new fulltext.

401

1255

402

return fulltext_size > delta_size

1256

403

1257

def _build_details_to_components(self, build_details):

1258

"""Convert a build_details tuple to a position tuple."""

1259

# record_details, access_memo, compression_parent

1260

return build_details[3], build_details[0], build_details[1]

1261

1262

def _get_components_positions(self, keys, allow_missing=False):

1263

"""Produce a map of position data for the components of keys.

404

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

405

"""See VersionedFile._add_delta()."""

406

self._check_add(version_id, []) # should we check the lines ?

407

self._check_versions_present(parents)

408

present_parents = []

409

ghosts = []

410

parent_texts = {}

411

for parent in parents:

412

if not self.has_version(parent):

413

ghosts.append(parent)

414

else:

415

present_parents.append(parent)

416

417

if delta_parent is None:

418

# reconstitute as full text.

419

assert len(delta) == 1 or len(delta) == 0

420

if len(delta):

421

assert delta[0][0] == 0

422

assert delta[0][1] == 0, delta[0][1]

423

return super(KnitVersionedFile, self)._add_delta(version_id,

424

parents,

425

delta_parent,

426

sha1,

427

noeol,

428

delta)

429

430

digest = sha1

431

432

options = []

433

if noeol:

434

options.append('no-eol')

435

436

if delta_parent is not None:

437

# determine the current delta chain length.

438

# To speed the extract of texts the delta chain is limited

439

# to a fixed number of deltas. This should minimize both

440

# I/O and the time spend applying deltas.

441

# The window was changed to a maximum of 200 deltas, but also added

442

# was a check that the total compressed size of the deltas is

443

# smaller than the compressed size of the fulltext.

444

if not self._check_should_delta([delta_parent]):

445

# We don't want a delta here, just do a normal insertion.

446

return super(KnitVersionedFile, self)._add_delta(version_id,

447

parents,

448

delta_parent,

449

sha1,

450

noeol,

451

delta)

452

453

options.append('line-delta')

454

store_lines = self.factory.lower_line_delta(delta)

455

456

where, size = self._data.add_record(version_id, digest, store_lines)

457

self._index.add_version(version_id, options, where, size, parents)

458

459

def _add_raw_records(self, records, data):

460

"""Add all the records 'records' with data pre-joined in 'data'.

461

462

:param records: A list of tuples(version_id, options, parents, size).

463

:param data: The data for the records. When it is written, the records

464

are adjusted to have pos pointing into data by the sum of

465

the preceding records sizes.

466

"""

467

# write all the data

468

pos = self._data.add_raw_record(data)

469

offset = 0

470

index_entries = []

471

for (version_id, options, parents, size) in records:

472

index_entries.append((version_id, options, pos+offset,

473

size, parents))

474

if self._data._do_cache:

475

self._data._cache[version_id] = data[offset:offset+size]

476

offset += size

477

self._index.add_versions(index_entries)

478

479

def enable_cache(self):

480

"""Start caching data for this knit"""

481

self._data.enable_cache()

482

483

def clear_cache(self):

484

"""Clear the data cache only."""

485

self._data.clear_cache()

486

487

def copy_to(self, name, transport):

488

"""See VersionedFile.copy_to()."""

489

# copy the current index to a temp index to avoid racing with local

490

# writes

491

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

492

self.transport.get(self._index._filename))

493

# copy the data file

494

f = self._data._open_file()

495

try:

496

transport.put_file(name + DATA_SUFFIX, f)

497

finally:

498

f.close()

499

# move the copied index into place

500

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

501

502

def create_empty(self, name, transport, mode=None):

503

return KnitVersionedFile(name, transport, factory=self.factory,

504

delta=self.delta, create=True)

505

506

def _fix_parents(self, version_id, new_parents):

507

"""Fix the parents list for version.

508

509

This is done by appending a new version to the index

510

with identical data except for the parents list.

511

the parents list must be a superset of the current

512

list.

513

"""

514

current_values = self._index._cache[version_id]

515

assert set(current_values[4]).difference(set(new_parents)) == set()

516

self._index.add_version(version_id,

517

current_values[1],

518

current_values[2],

519

current_values[3],

520

new_parents)

521

522

def get_delta(self, version_id):

523

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

525

self.check_not_reserved_id(version_id)

526

if not self.has_version(version_id):

527

raise RevisionNotPresent(version_id, self.filename)

528

529

parents = self.get_parents(version_id)

530

if len(parents):

531

parent = parents[0]

532

else:

533

parent = None

534

data_pos, data_size = self._index.get_position(version_id)

535

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

536

noeol = 'no-eol' in self._index.get_options(version_id)

537

if 'fulltext' == self._index.get_method(version_id):

538

new_content = self.factory.parse_fulltext(data, version_id)

539

if parent is not None:

540

reference_content = self._get_content(parent)

541

old_texts = reference_content.text()

542

else:

543

old_texts = []

544

new_texts = new_content.text()

545

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

546

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

547

else:

548

delta = self.factory.parse_line_delta(data, version_id)

549

return parent, sha1, noeol, delta

550

551

def get_graph_with_ghosts(self):

552

"""See VersionedFile.get_graph_with_ghosts()."""

553

graph_items = self._index.get_graph()

554

return dict(graph_items)

555

556

def get_sha1(self, version_id):

557

"""See VersionedFile.get_sha1()."""

558

version_id = osutils.safe_revision_id(version_id)

559

record_map = self._get_record_map([version_id])

560

method, content, digest, next = record_map[version_id]

561

return digest

562

563

@staticmethod

564

def get_suffixes():

565

"""See VersionedFile.get_suffixes()."""

566

return [DATA_SUFFIX, INDEX_SUFFIX]

567

568

def has_ghost(self, version_id):

569

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

571

# maybe we have it

572

if self.has_version(version_id):

573

return False

574

# optimisable if needed by memoising the _ghosts set.

575

items = self._index.get_graph()

576

for node, parents in items:

577

for parent in parents:

578

if parent not in self._index._cache:

579

if parent == version_id:

580

return True

581

return False

582

583

def versions(self):

584

"""See VersionedFile.versions."""

585

return self._index.get_versions()

586

587

def has_version(self, version_id):

588

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

590

return self._index.has_version(version_id)

591

592

__contains__ = has_version

593

594

def _merge_annotations(self, content, parents, parent_texts={},

595

delta=None, annotated=None):

596

"""Merge annotations for content. This is done by comparing

597

the annotations based on changed to the text.

598

"""

599

if annotated:

600

delta_seq = None

601

for parent_id in parents:

602

merge_content = self._get_content(parent_id, parent_texts)

603

seq = patiencediff.PatienceSequenceMatcher(

604

None, merge_content.text(), content.text())

605

if delta_seq is None:

606

# setup a delta seq to reuse.

607

delta_seq = seq

608

for i, j, n in seq.get_matching_blocks():

609

if n == 0:

610

continue

611

# this appears to copy (origin, text) pairs across to the new

612

# content for any line that matches the last-checked parent.

613

# FIXME: save the sequence control data for delta compression

614

# against the most relevant parent rather than rediffing.

615

content._lines[j:j+n] = merge_content._lines[i:i+n]

616

if delta:

617

if not annotated:

618

reference_content = self._get_content(parents[0], parent_texts)

619

new_texts = content.text()

620

old_texts = reference_content.text()

621

delta_seq = patiencediff.PatienceSequenceMatcher(

622

None, old_texts, new_texts)

623

return self._make_line_delta(delta_seq, content)

624

625

def _make_line_delta(self, delta_seq, new_content):

626

"""Generate a line delta from delta_seq and new_content."""

627

diff_hunks = []

628

for op in delta_seq.get_opcodes():

629

if op[0] == 'equal':

630

continue

631

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

632

return diff_hunks

633

634

def _get_components_positions(self, version_ids):

635

"""Produce a map of position data for the components of versions.

1264

636

1265

637

This data is intended to be used for retrieving the knit records.

1266

638

1267

A dict of key to (record_details, index_memo, next, parents) is

639

A dict of version_id to (method, data_pos, data_size, next) is

1268

640

returned.

1269

1270

* method is the way referenced data should be applied.

1271

* index_memo is the handle to pass to the data access to actually get

1272

the data

1273

* next is the build-parent of the version, or None for fulltexts.

1274

* parents is the version_ids of the parents of this version

1275

1276

:param allow_missing: If True do not raise an error on a missing

1277

component, just ignore it.

641

method is the way referenced data should be applied.

642

data_pos is the position of the data in the knit.

643

data_size is the size of the data in the knit.

644

next is the build-parent of the version, or None for fulltexts.

1278

645

"""

1279

646

component_data = {}

1280

pending_components = keys

1281

while pending_components:

1282

build_details = self._index.get_build_details(pending_components)

1283

current_components = set(pending_components)

1284

pending_components = set()

1285

for key, details in build_details.items():

1286

(index_memo, compression_parent, parents,

1287

record_details) = details

1288

if compression_parent is not None:

1289

pending_components.add(compression_parent)

1290

component_data[key] = self._build_details_to_components(

1291

details)

1292

missing = current_components.difference(build_details)

1293

if missing and not allow_missing:

1294

raise errors.RevisionNotPresent(missing.pop(), self)

647

for version_id in version_ids:

648

cursor = version_id

649

650

while cursor is not None and cursor not in component_data:

651

method = self._index.get_method(cursor)

652

if method == 'fulltext':

653

next = None

654

else:

655

next = self.get_parents(cursor)[0]

656

data_pos, data_size = self._index.get_position(cursor)

657

component_data[cursor] = (method, data_pos, data_size, next)

658

cursor = next

1295

659

return component_data

1296

1297

def _get_content(self, key, parent_texts={}):

660

661

def _get_content(self, version_id, parent_texts={}):

1298

662

"""Returns a content object that makes up the specified

1299

663

version."""

1300

cached_version = parent_texts.get(key, None)

664

if not self.has_version(version_id):

665

raise RevisionNotPresent(version_id, self.filename)

666

667

cached_version = parent_texts.get(version_id, None)

1301

668

if cached_version is not None:

1302

# Ensure the cache dict is valid.

1303

if not self.get_parent_map([key]):

1304

raise RevisionNotPresent(key, self)

1305

669

return cached_version

1306

generator = _VFContentMapGenerator(self, [key])

1307

return generator._get_content(key)

1308

1309

def get_parent_map(self, keys):

1310

"""Get a map of the graph parents of keys.

1311

1312

:param keys: The keys to look up parents for.

1313

:return: A mapping from keys to parents. Absent keys are absent from

1314

the mapping.

1315

"""

1316

return self._get_parent_map_with_sources(keys)[0]

1317

1318

def _get_parent_map_with_sources(self, keys):

1319

"""Get a map of the parents of keys.

1320

1321

:param keys: The keys to look up parents for.

1322

:return: A tuple. The first element is a mapping from keys to parents.

1323

Absent keys are absent from the mapping. The second element is a

1324

list with the locations each key was found in. The first element

1325

is the in-this-knit parents, the second the first fallback source,

1326

and so on.

1327

"""

1328

result = {}

1329

sources = [self._index] + self._immediate_fallback_vfs

1330

source_results = []

1331

missing = set(keys)

1332

for source in sources:

1333

if not missing:

1334

break

1335

new_result = source.get_parent_map(missing)

1336

source_results.append(new_result)

1337

result.update(new_result)

1338

missing.difference_update(set(new_result))

1339

return result, source_results

1340

1341

def _get_record_map(self, keys, allow_missing=False):

670

671

text_map, contents_map = self._get_content_maps([version_id])

672

return contents_map[version_id]

673

674

def _check_versions_present(self, version_ids):

675

"""Check that all specified versions are present."""

676

self._index.check_versions_present(version_ids)

677

678

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

679

"""See VersionedFile.add_lines_with_ghosts()."""

680

self._check_add(version_id, lines)

681

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

682

683

def _add_lines(self, version_id, parents, lines, parent_texts):

684

"""See VersionedFile.add_lines."""

685

self._check_add(version_id, lines)

686

self._check_versions_present(parents)

687

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

688

689

def _check_add(self, version_id, lines):

690

"""check that version_id and lines are safe to add."""

691

assert self.writable, "knit is not opened for write"

692

### FIXME escape. RBC 20060228

693

if contains_whitespace(version_id):

694

raise InvalidRevisionId(version_id, self.filename)

695

self.check_not_reserved_id(version_id)

696

if self.has_version(version_id):

697

raise RevisionAlreadyPresent(version_id, self.filename)

698

self._check_lines_not_unicode(lines)

699

self._check_lines_are_lines(lines)

700

701

def _add(self, version_id, lines, parents, delta, parent_texts):

702

"""Add a set of lines on top of version specified by parents.

703

704

If delta is true, compress the text as a line-delta against

705

the first parent.

706

707

Any versions not present will be converted into ghosts.

708

"""

709

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

710

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

711

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

712

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

713

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

714

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

715

# +1383 0 8.0370 8.0370 +<len>

716

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

717

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

718

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

719

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

720

721

present_parents = []

722

ghosts = []

723

if parent_texts is None:

724

parent_texts = {}

725

for parent in parents:

726

if not self.has_version(parent):

727

ghosts.append(parent)

728

else:

729

present_parents.append(parent)

730

731

if delta and not len(present_parents):

732

delta = False

733

734

digest = sha_strings(lines)

735

options = []

736

if lines:

737

if lines[-1][-1] != '\n':

738

options.append('no-eol')

739

lines[-1] = lines[-1] + '\n'

740

741

if len(present_parents) and delta:

742

# To speed the extract of texts the delta chain is limited

743

# to a fixed number of deltas. This should minimize both

744

# I/O and the time spend applying deltas.

745

delta = self._check_should_delta(present_parents)

746

747

assert isinstance(version_id, str)

748

lines = self.factory.make(lines, version_id)

749

if delta or (self.factory.annotated and len(present_parents) > 0):

750

# Merge annotations from parent texts if so is needed.

751

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

752

delta, self.factory.annotated)

753

754

if delta:

755

options.append('line-delta')

756

store_lines = self.factory.lower_line_delta(delta_hunks)

757

else:

758

options.append('fulltext')

759

store_lines = self.factory.lower_fulltext(lines)

760

761

where, size = self._data.add_record(version_id, digest, store_lines)

762

self._index.add_version(version_id, options, where, size, parents)

763

return lines

764

765

def check(self, progress_bar=None):

766

"""See VersionedFile.check()."""

767

768

def _clone_text(self, new_version_id, old_version_id, parents):

769

"""See VersionedFile.clone_text()."""

770

# FIXME RBC 20060228 make fast by only inserting an index with null

771

# delta.

772

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

773

774

def get_lines(self, version_id):

775

"""See VersionedFile.get_lines()."""

776

return self.get_line_list([version_id])[0]

777

778

def _get_record_map(self, version_ids):

1342

779

"""Produce a dictionary of knit records.

1343

1344

:return: {key:(record, record_details, digest, next)}

1345

1346

* record: data returned from read_records (a KnitContentobject)

1347

* record_details: opaque information to pass to parse_record

1348

* digest: SHA1 digest of the full text after all steps are done

1349

* next: build-parent of the version, i.e. the leftmost ancestor.

1350

Will be None if the record is not a delta.

1351

1352

:param keys: The keys to build a map for

1353

:param allow_missing: If some records are missing, rather than

1354

error, just return the data that could be generated.

1355

"""

1356

raw_map = self._get_record_map_unparsed(keys,

1357

allow_missing=allow_missing)

1358

return self._raw_map_to_record_map(raw_map)

1359

1360

def _raw_map_to_record_map(self, raw_map):

1361

"""Parse the contents of _get_record_map_unparsed.

1362

1363

:return: see _get_record_map.

1364

"""

1365

result = {}

1366

for key in raw_map:

1367

data, record_details, next = raw_map[key]

1368

content, digest = self._parse_record(key[-1], data)

1369

result[key] = content, record_details, digest, next

1370

return result

1371

1372

def _get_record_map_unparsed(self, keys, allow_missing=False):

1373

"""Get the raw data for reconstructing keys without parsing it.

1374

1375

:return: A dict suitable for parsing via _raw_map_to_record_map.

1376

key-> raw_bytes, (method, noeol), compression_parent

1377

"""

1378

# This retries the whole request if anything fails. Potentially we

1379

# could be a bit more selective. We could track the keys whose records

1380

# we have successfully found, and then only request the new records

1381

# from there. However, _get_components_positions grabs the whole build

1382

# chain, which means we'll likely try to grab the same records again

1383

# anyway. Also, can the build chains change as part of a pack

1384

# operation? We wouldn't want to end up with a broken chain.

1385

while True:

1386

try:

1387

position_map = self._get_components_positions(keys,

1388

allow_missing=allow_missing)

1389

# key = component_id, r = record_details, i_m = index_memo,

1390

# n = next

1391

records = [(key, i_m) for key, (r, i_m, n)

1392

in position_map.items()]

1393

# Sort by the index memo, so that we request records from the

1394

# same pack file together, and in forward-sorted order

1395

records.sort(key=operator.itemgetter(1))

1396

raw_record_map = {}

1397

for key, data in self._read_records_iter_unchecked(records):

1398

(record_details, index_memo, next) = position_map[key]

1399

raw_record_map[key] = data, record_details, next

1400

return raw_record_map

1401

except errors.RetryWithNewPacks as e:

1402

self._access.reload_or_raise(e)

1403

1404

@classmethod

1405

def _split_by_prefix(cls, keys):

1406

"""For the given keys, split them up based on their prefix.

1407

1408

To keep memory pressure somewhat under control, split the

1409

requests back into per-file-id requests, otherwise "bzr co"

1410

extracts the full tree into memory before writing it to disk.

1411

This should be revisited if _get_content_maps() can ever cross

1412

file-id boundaries.

1413

1414

The keys for a given file_id are kept in the same relative order.

1415

Ordering between file_ids is not, though prefix_order will return the

1416

order that the key was first seen.

1417

1418

:param keys: An iterable of key tuples

1419

:return: (split_map, prefix_order)

1420

split_map A dictionary mapping prefix => keys

1421

prefix_order The order that we saw the various prefixes

1422

"""

1423

split_by_prefix = {}

1424

prefix_order = []

1425

for key in keys:

1426

if len(key) == 1:

1427

prefix = b''

1428

else:

1429

prefix = key[0]

1430

1431

if prefix in split_by_prefix:

1432

split_by_prefix[prefix].append(key)

1433

else:

1434

split_by_prefix[prefix] = [key]

1435

prefix_order.append(prefix)

1436

return split_by_prefix, prefix_order

1437

1438

def _group_keys_for_io(self, keys, non_local_keys, positions,

1439

_min_buffer_size=_STREAM_MIN_BUFFER_SIZE):

1440

"""For the given keys, group them into 'best-sized' requests.

1441

1442

The idea is to avoid making 1 request per file, but to never try to

1443

unpack an entire 1.5GB source tree in a single pass. Also when

1444

possible, we should try to group requests to the same pack file

1445

together.

1446

1447

:return: list of (keys, non_local) tuples that indicate what keys

1448

should be fetched next.

1449

"""

1450

# TODO: Ideally we would group on 2 factors. We want to extract texts

1451

# from the same pack file together, and we want to extract all

1452

# the texts for a given build-chain together. Ultimately it

1453

# probably needs a better global view.

1454

total_keys = len(keys)

1455

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1456

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1457

cur_keys = []

1458

cur_non_local = set()

1459

cur_size = 0

1460

result = []

1461

sizes = []

1462

for prefix in prefix_order:

1463

keys = prefix_split_keys[prefix]

1464

non_local = prefix_split_non_local_keys.get(prefix, [])

1465

1466

this_size = self._index._get_total_build_size(keys, positions)

1467

cur_size += this_size

1468

cur_keys.extend(keys)

1469

cur_non_local.update(non_local)

1470

if cur_size > _min_buffer_size:

1471

result.append((cur_keys, cur_non_local))

1472

sizes.append(cur_size)

1473

cur_keys = []

1474

cur_non_local = set()

1475

cur_size = 0

1476

if cur_keys:

1477

result.append((cur_keys, cur_non_local))

1478

sizes.append(cur_size)

1479

return result

1480

1481

def get_record_stream(self, keys, ordering, include_delta_closure):

1482

"""Get a stream of records for keys.

1483

1484

:param keys: The keys to include.

1485

:param ordering: Either 'unordered' or 'topological'. A topologically

1486

sorted stream has compression parents strictly before their

1487

children.

1488

:param include_delta_closure: If True then the closure across any

1489

compression parents will be included (in the opaque data).

1490

:return: An iterator of ContentFactory objects, each of which is only

1491

valid until the iterator is advanced.

1492

"""

1493

# keys might be a generator

1494

keys = set(keys)

1495

if not keys:

1496

return

1497

if not self._index.has_graph:

1498

# Cannot sort when no graph has been stored.

1499

ordering = 'unordered'

1500

1501

remaining_keys = keys

1502

while True:

1503

try:

1504

keys = set(remaining_keys)

1505

for content_factory in self._get_remaining_record_stream(keys,

1506

ordering, include_delta_closure):

1507

remaining_keys.discard(content_factory.key)

1508

yield content_factory

1509

return

1510

except errors.RetryWithNewPacks as e:

1511

self._access.reload_or_raise(e)

1512

1513

def _get_remaining_record_stream(self, keys, ordering,

1514

include_delta_closure):

1515

"""This function is the 'retry' portion for get_record_stream."""

1516

if include_delta_closure:

1517

positions = self._get_components_positions(

1518

keys, allow_missing=True)

1519

else:

1520

build_details = self._index.get_build_details(keys)

1521

# map from key to

1522

# (record_details, access_memo, compression_parent_key)

1523

positions = dict((key, self._build_details_to_components(details))

1524

for key, details in build_details.items())

1525

absent_keys = keys.difference(set(positions))

1526

# There may be more absent keys : if we're missing the basis component

1527

# and are trying to include the delta closure.

1528

# XXX: We should not ever need to examine remote sources because we do

1529

# not permit deltas across versioned files boundaries.

1530

if include_delta_closure:

1531

needed_from_fallback = set()

1532

# Build up reconstructable_keys dict. key:True in this dict means

1533

# the key can be reconstructed.

1534

reconstructable_keys = {}

1535

for key in keys:

1536

# the delta chain

1537

try:

1538

chain = [key, positions[key][2]]

1539

except KeyError:

1540

needed_from_fallback.add(key)

1541

continue

1542

result = True

1543

while chain[-1] is not None:

1544

if chain[-1] in reconstructable_keys:

1545

result = reconstructable_keys[chain[-1]]

1546

break

1547

else:

1548

try:

1549

chain.append(positions[chain[-1]][2])

1550

except KeyError:

1551

# missing basis component

1552

needed_from_fallback.add(chain[-1])

1553

result = True

1554

break

1555

for chain_key in chain[:-1]:

1556

reconstructable_keys[chain_key] = result

1557

if not result:

1558

needed_from_fallback.add(key)

1559

# Double index lookups here : need a unified api ?

1560

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1561

if ordering in ('topological', 'groupcompress'):

1562

if ordering == 'topological':

1563

# Global topological sort

1564

present_keys = tsort.topo_sort(global_map)

1565

else:

1566

present_keys = sort_groupcompress(global_map)

1567

# Now group by source:

1568

source_keys = []

1569

current_source = None

1570

for key in present_keys:

1571

for parent_map in parent_maps:

1572

if key in parent_map:

1573

key_source = parent_map

1574

break

1575

if current_source is not key_source:

1576

source_keys.append((key_source, []))

1577

current_source = key_source

1578

source_keys[-1][1].append(key)

1579

else:

1580

if ordering != 'unordered':

1581

raise AssertionError('valid values for ordering are:'

1582

' "unordered", "groupcompress" or "topological" not: %r'

1583

% (ordering,))

1584

# Just group by source; remote sources first.

1585

present_keys = []

1586

source_keys = []

1587

for parent_map in reversed(parent_maps):

1588

source_keys.append((parent_map, []))

1589

for key in parent_map:

1590

present_keys.append(key)

1591

source_keys[-1][1].append(key)

1592

# We have been requested to return these records in an order that

1593

# suits us. So we ask the index to give us an optimally sorted

1594

# order.

1595

for source, sub_keys in source_keys:

1596

if source is parent_maps[0]:

1597

# Only sort the keys for this VF

1598

self._index._sort_keys_by_io(sub_keys, positions)

1599

absent_keys = keys - set(global_map)

1600

for key in absent_keys:

1601

yield AbsentContentFactory(key)

1602

# restrict our view to the keys we can answer.

1603

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1604

# XXX: At that point we need to consider the impact of double reads by

1605

# utilising components multiple times.

1606

if include_delta_closure:

1607

# XXX: get_content_maps performs its own index queries; allow state

1608

# to be passed in.

1609

non_local_keys = needed_from_fallback - absent_keys

1610

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1611

non_local_keys,

1612

positions):

1613

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1614

global_map,

1615

ordering=ordering)

1616

for record in generator.get_record_stream():

1617

yield record

1618

else:

1619

for source, keys in source_keys:

1620

if source is parent_maps[0]:

1621

# this KnitVersionedFiles

1622

records = [(key, positions[key][1]) for key in keys]

1623

for key, raw_data in self._read_records_iter_unchecked(records):

1624

(record_details, index_memo, _) = positions[key]

1625

yield KnitContentFactory(key, global_map[key],

1626

record_details, None, raw_data, self._factory.annotated, None)

1627

else:

1628

vf = self._immediate_fallback_vfs[parent_maps.index(

1629

source) - 1]

1630

for record in vf.get_record_stream(keys, ordering,

1631

include_delta_closure):

1632

yield record

1633

1634

def get_sha1s(self, keys):

1635

"""See VersionedFiles.get_sha1s()."""

1636

missing = set(keys)

1637

record_map = self._get_record_map(missing, allow_missing=True)

1638

result = {}

1639

for key, details in record_map.items():

1640

if key not in missing:

1641

continue

1642

# record entry 2 is the 'digest'.

1643

result[key] = details[2]

1644

missing.difference_update(set(result))

1645

for source in self._immediate_fallback_vfs:

1646

if not missing:

1647

break

1648

new_result = source.get_sha1s(missing)

1649

result.update(new_result)

1650

missing.difference_update(set(new_result))

1651

return result

1652

1653

def insert_record_stream(self, stream):

1654

"""Insert a record stream into this container.

1655

1656

:param stream: A stream of records to insert.

1657

:return: None

1658

:seealso VersionedFiles.get_record_stream:

1659

"""

1660

def get_adapter(adapter_key):

1661

try:

1662

return adapters[adapter_key]

1663

except KeyError:

1664

adapter_factory = adapter_registry.get(adapter_key)

1665

adapter = adapter_factory(self)

1666

adapters[adapter_key] = adapter

1667

return adapter

1668

delta_types = set()

1669

if self._factory.annotated:

1670

# self is annotated, we need annotated knits to use directly.

1671

annotated = "annotated-"

1672

convertibles = []

1673

else:

1674

# self is not annotated, but we can strip annotations cheaply.

1675

annotated = ""

1676

convertibles = {"knit-annotated-ft-gz"}

1677

if self._max_delta_chain:

1678

delta_types.add("knit-annotated-delta-gz")

1679

convertibles.add("knit-annotated-delta-gz")

1680

# The set of types we can cheaply adapt without needing basis texts.

1681

native_types = set()

1682

if self._max_delta_chain:

1683

native_types.add("knit-%sdelta-gz" % annotated)

1684

delta_types.add("knit-%sdelta-gz" % annotated)

1685

native_types.add("knit-%sft-gz" % annotated)

1686

knit_types = native_types.union(convertibles)

1687

adapters = {}

1688

# Buffer all index entries that we can't add immediately because their

1689

# basis parent is missing. We don't buffer all because generating

1690

# annotations may require access to some of the new records. However we

1691

# can't generate annotations from new deltas until their basis parent

1692

# is present anyway, so we get away with not needing an index that

1693

# includes the new keys.

1694

1695

# See <http://launchpad.net/bugs/300177> about ordering of compression

1696

# parents in the records - to be conservative, we insist that all

1697

# parents must be present to avoid expanding to a fulltext.

1698

1699

# key = basis_parent, value = index entry to add

1700

buffered_index_entries = {}

1701

for record in stream:

1702

kind = record.storage_kind

1703

if kind.startswith('knit-') and kind.endswith('-gz'):

1704

# Check that the ID in the header of the raw knit bytes matches

1705

# the record metadata.

1706

raw_data = record._raw_record

1707

df, rec = self._parse_record_header(record.key, raw_data)

1708

df.close()

1709

buffered = False

1710

parents = record.parents

1711

if record.storage_kind in delta_types:

1712

# TODO: eventually the record itself should track

1713

# compression_parent

1714

compression_parent = parents[0]

1715

else:

1716

compression_parent = None

1717

# Raise an error when a record is missing.

1718

if record.storage_kind == 'absent':

1719

raise RevisionNotPresent([record.key], self)

1720

elif ((record.storage_kind in knit_types) and

1721

(compression_parent is None or

1722

not self._immediate_fallback_vfs or

1723

compression_parent in self._index or

1724

compression_parent not in self)):

1725

# we can insert the knit record literally if either it has no

1726

# compression parent OR we already have its basis in this kvf

1727

# OR the basis is not present even in the fallbacks. In the

1728

# last case it will either turn up later in the stream and all

1729

# will be well, or it won't turn up at all and we'll raise an

1730

# error at the end.

1731

1732

# TODO: self.__contains__ is somewhat redundant with

1733

# self._index.__contains__; we really want something that directly

1734

# asks if it's only present in the fallbacks. -- mbp 20081119

1735

if record.storage_kind not in native_types:

1736

try:

1737

adapter_key = (record.storage_kind, "knit-delta-gz")

1738

adapter = get_adapter(adapter_key)

1739

except KeyError:

1740

adapter_key = (record.storage_kind, "knit-ft-gz")

1741

adapter = get_adapter(adapter_key)

1742

bytes = adapter.get_bytes(record, adapter_key[1])

1743

else:

1744

# It's a knit record, it has a _raw_record field (even if

1745

# it was reconstituted from a network stream).

1746

bytes = record._raw_record

1747

options = [record._build_details[0].encode('ascii')]

1748

if record._build_details[1]:

1749

options.append(b'no-eol')

1750

# Just blat it across.

1751

# Note: This does end up adding data on duplicate keys. As

1752

# modern repositories use atomic insertions this should not

1753

# lead to excessive growth in the event of interrupted fetches.

1754

# 'knit' repositories may suffer excessive growth, but as a

1755

# deprecated format this is tolerable. It can be fixed if

1756

# needed by in the kndx index support raising on a duplicate

1757

# add with identical parents and options.

1758

access_memo = self._access.add_raw_record(

1759

record.key, len(bytes), [bytes])

1760

index_entry = (record.key, options, access_memo, parents)

1761

if b'fulltext' not in options:

1762

# Not a fulltext, so we need to make sure the compression

1763

# parent will also be present.

1764

# Note that pack backed knits don't need to buffer here

1765

# because they buffer all writes to the transaction level,

1766

# but we don't expose that difference at the index level. If

1767

# the query here has sufficient cost to show up in

1768

# profiling we should do that.

1769

1770

# They're required to be physically in this

1771

# KnitVersionedFiles, not in a fallback.

1772

if compression_parent not in self._index:

1773

pending = buffered_index_entries.setdefault(

1774

compression_parent, [])

1775

pending.append(index_entry)

1776

buffered = True

1777

if not buffered:

1778

self._index.add_records([index_entry])

1779

elif record.storage_kind in ('chunked', 'file'):

1780

self.add_lines(record.key, parents, record.get_bytes_as('lines'))

1781

else:

1782

# Not suitable for direct insertion as a

1783

# delta, either because it's not the right format, or this

1784

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1785

# 0) or because it depends on a base only present in the

1786

# fallback kvfs.

1787

self._access.flush()

1788

try:

1789

# Try getting a fulltext directly from the record.

1790

lines = record.get_bytes_as('lines')

1791

except UnavailableRepresentation:

1792

adapter_key = record.storage_kind, 'lines'

1793

adapter = get_adapter(adapter_key)

1794

lines = adapter.get_bytes(record, 'lines')

1795

try:

1796

self.add_lines(record.key, parents, lines)

1797

except errors.RevisionAlreadyPresent:

1798

pass

1799

# Add any records whose basis parent is now available.

1800

if not buffered:

1801

added_keys = [record.key]

1802

while added_keys:

1803

key = added_keys.pop(0)

1804

if key in buffered_index_entries:

1805

index_entries = buffered_index_entries[key]

1806

self._index.add_records(index_entries)

1807

added_keys.extend(

1808

[index_entry[0] for index_entry in index_entries])

1809

del buffered_index_entries[key]

1810

if buffered_index_entries:

1811

# There were index entries buffered at the end of the stream,

1812

# So these need to be added (if the index supports holding such

1813

# entries for later insertion)

1814

all_entries = []

1815

for key in buffered_index_entries:

1816

index_entries = buffered_index_entries[key]

1817

all_entries.extend(index_entries)

1818

self._index.add_records(

1819

all_entries, missing_compression_parents=True)

1820

1821

def get_missing_compression_parent_keys(self):

1822

"""Return an iterable of keys of missing compression parents.

1823

1824

Check this after calling insert_record_stream to find out if there are

1825

any missing compression parents. If there are, the records that

1826

depend on them are not able to be inserted safely. For atomic

1827

KnitVersionedFiles built on packs, the transaction should be aborted or

1828

suspended - commit will fail at this point. Nonatomic knits will error

1829

earlier because they have no staging area to put pending entries into.

1830

"""

1831

return self._index.get_missing_compression_parents()

1832

1833

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1834

"""Iterate over the lines in the versioned files from keys.

1835

1836

This may return lines from other keys. Each item the returned

1837

iterator yields is a tuple of a line and a text version that that line

1838

is present in (not introduced in).

1839

1840

Ordering of results is in whatever order is most suitable for the

1841

underlying storage format.

1842

1843

If a progress bar is supplied, it may be used to indicate progress.

1844

The caller is responsible for cleaning up progress bars (because this

1845

is an iterator).

1846

1847

NOTES:

1848

* Lines are normalised by the underlying store: they will all have \\n

1849

terminators.

1850

* Lines are returned in arbitrary order.

1851

* If a requested key did not change any lines (or didn't have any

1852

lines), it may not be mentioned at all in the result.

1853

1854

:param pb: Progress bar supplied by caller.

1855

:return: An iterator over (line, key).

1856

"""

1857

if pb is None:

1858

pb = ui.ui_factory.nested_progress_bar()

1859

keys = set(keys)

1860

total = len(keys)

1861

done = False

1862

while not done:

1863

try:

1864

# we don't care about inclusions, the caller cares.

1865

# but we need to setup a list of records to visit.

1866

# we need key, position, length

1867

key_records = []

1868

build_details = self._index.get_build_details(keys)

1869

for key, details in build_details.items():

1870

if key in keys:

1871

key_records.append((key, details[0]))

1872

records_iter = enumerate(self._read_records_iter(key_records))

1873

for (key_idx, (key, data, sha_value)) in records_iter:

1874

pb.update(gettext('Walking content'), key_idx, total)

1875

compression_parent = build_details[key][1]

1876

if compression_parent is None:

1877

# fulltext

1878

line_iterator = self._factory.get_fulltext_content(

1879

data)

1880

else:

1881

# Delta

1882

line_iterator = self._factory.get_linedelta_content(

1883

data)

1884

# Now that we are yielding the data for this key, remove it

1885

# from the list

1886

keys.remove(key)

1887

# XXX: It might be more efficient to yield (key,

1888

# line_iterator) in the future. However for now, this is a

1889

# simpler change to integrate into the rest of the

1890

# codebase. RBC 20071110

1891

for line in line_iterator:

1892

yield line, key

1893

done = True

1894

except errors.RetryWithNewPacks as e:

1895

self._access.reload_or_raise(e)

1896

# If there are still keys we've not yet found, we look in the fallback

1897

# vfs, and hope to find them there. Note that if the keys are found

1898

# but had no changes or no content, the fallback may not return

1899

# anything.

1900

if keys and not self._immediate_fallback_vfs:

1901

# XXX: strictly the second parameter is meant to be the file id

1902

# but it's not easily accessible here.

1903

raise RevisionNotPresent(keys, repr(self))

1904

for source in self._immediate_fallback_vfs:

1905

if not keys:

1906

break

1907

source_keys = set()

1908

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1909

source_keys.add(key)

1910

yield line, key

1911

keys.difference_update(source_keys)

1912

pb.update(gettext('Walking content'), total, total)

1913

1914

def _make_line_delta(self, delta_seq, new_content):

1915

"""Generate a line delta from delta_seq and new_content."""

1916

diff_hunks = []

1917

for op in delta_seq.get_opcodes():

1918

if op[0] == 'equal':

1919

continue

1920

diff_hunks.append(

1921

(op[1], op[2], op[4] - op[3], new_content._lines[op[3]:op[4]]))

1922

return diff_hunks

1923

1924

def _merge_annotations(self, content, parents, parent_texts={},

1925

delta=None, annotated=None,

1926

left_matching_blocks=None):

1927

"""Merge annotations for content and generate deltas.

1928

1929

This is done by comparing the annotations based on changes to the text

1930

and generating a delta on the resulting full texts. If annotations are

1931

not being created then a simple delta is created.

1932

"""

1933

if left_matching_blocks is not None:

1934

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1935

else:

1936

delta_seq = None

1937

if annotated:

1938

for parent_key in parents:

1939

merge_content = self._get_content(parent_key, parent_texts)

1940

if (parent_key == parents[0] and delta_seq is not None):

1941

seq = delta_seq

1942

else:

1943

seq = patiencediff.PatienceSequenceMatcher(

1944

None, merge_content.text(), content.text())

1945

for i, j, n in seq.get_matching_blocks():

1946

if n == 0:

1947

continue

1948

# this copies (origin, text) pairs across to the new

1949

# content for any line that matches the last-checked

1950

# parent.

1951

content._lines[j:j + n] = merge_content._lines[i:i + n]

1952

# XXX: Robert says the following block is a workaround for a

1953

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1954

if content._lines and not content._lines[-1][1].endswith(b'\n'):

1955

# The copied annotation was from a line without a trailing EOL,

1956

# reinstate one for the content object, to ensure correct

1957

# serialization.

1958

line = content._lines[-1][1] + b'\n'

1959

content._lines[-1] = (content._lines[-1][0], line)

1960

if delta:

1961

if delta_seq is None:

1962

reference_content = self._get_content(parents[0], parent_texts)

1963

new_texts = content.text()

1964

old_texts = reference_content.text()

1965

delta_seq = patiencediff.PatienceSequenceMatcher(

1966

None, old_texts, new_texts)

1967

return self._make_line_delta(delta_seq, content)

1968

1969

def _parse_record(self, version_id, data):

1970

"""Parse an original format knit record.

1971

1972

These have the last element of the key only present in the stored data.

1973

"""

1974

rec, record_contents = self._parse_record_unchecked(data)

1975

self._check_header_version(rec, version_id)

1976

return record_contents, rec[3]

1977

1978

def _parse_record_header(self, key, raw_data):

1979

"""Parse a record header for consistency.

1980

1981

:return: the header and the decompressor stream.

1982

as (stream, header_record)

1983

"""

1984

df = gzip.GzipFile(mode='rb', fileobj=BytesIO(raw_data))

1985

try:

1986

# Current serialise

1987

rec = self._check_header(key, df.readline())

1988

except Exception as e:

1989

raise KnitCorrupt(self,

1990

"While reading {%s} got %s(%s)"

1991

% (key, e.__class__.__name__, str(e)))

1992

return df, rec

1993

1994

def _parse_record_unchecked(self, data):

1995

# profiling notes:

1996

# 4168 calls in 2880 217 internal

1997

# 4168 calls to _parse_record_header in 2121

1998

# 4168 calls to readlines in 330

1999

with gzip.GzipFile(mode='rb', fileobj=BytesIO(data)) as df:

2000

try:

2001

record_contents = df.readlines()

2002

except Exception as e:

2003

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

2004

(data, e.__class__.__name__, str(e)))

2005

header = record_contents.pop(0)

2006

rec = self._split_header(header)

2007

last_line = record_contents.pop()

2008

if len(record_contents) != int(rec[2]):

2009

raise KnitCorrupt(self,

2010

'incorrect number of lines %s != %s'

2011

' for version {%s} %s'

2012

% (len(record_contents), int(rec[2]),

2013

rec[1], record_contents))

2014

if last_line != b'end %s\n' % rec[1]:

2015

raise KnitCorrupt(self,

2016

'unexpected version end line %r, wanted %r'

2017

% (last_line, rec[1]))

2018

return rec, record_contents

2019

2020

def _read_records_iter(self, records):

2021

"""Read text records from data file and yield result.

2022

2023

The result will be returned in whatever is the fastest to read.

2024

Not by the order requested. Also, multiple requests for the same

2025

record will only yield 1 response.

2026

2027

:param records: A list of (key, access_memo) entries

2028

:return: Yields (key, contents, digest) in the order

2029

read, not the order requested

2030

"""

2031

if not records:

2032

return

2033

2034

# XXX: This smells wrong, IO may not be getting ordered right.

2035

needed_records = sorted(set(records), key=operator.itemgetter(1))

2036

if not needed_records:

2037

return

2038

2039

# The transport optimizes the fetching as well

2040

# (ie, reads continuous ranges.)

2041

raw_data = self._access.get_raw_records(

2042

[index_memo for key, index_memo in needed_records])

2043

2044

for (key, index_memo), data in zip(needed_records, raw_data):

2045

content, digest = self._parse_record(key[-1], data)

2046

yield key, content, digest

2047

2048

def _read_records_iter_raw(self, records):

2049

"""Read text records from data file and yield raw data.

2050

2051

This unpacks enough of the text record to validate the id is

2052

as expected but thats all.

2053

2054

Each item the iterator yields is (key, bytes,

2055

expected_sha1_of_full_text).

2056

"""

2057

for key, data in self._read_records_iter_unchecked(records):

2058

# validate the header (note that we can only use the suffix in

2059

# current knit records).

2060

df, rec = self._parse_record_header(key, data)

2061

df.close()

2062

yield key, data, rec[3]

2063

2064

def _read_records_iter_unchecked(self, records):

2065

"""Read text records from data file and yield raw data.

2066

2067

No validation is done.

2068

2069

Yields tuples of (key, data).

2070

"""

2071

# setup an iterator of the external records:

2072

# uses readv so nice and fast we hope.

2073

if len(records):

2074

# grab the disk data needed.

2075

needed_offsets = [index_memo for key, index_memo

2076

in records]

2077

raw_records = self._access.get_raw_records(needed_offsets)

2078

2079

for key, index_memo in records:

2080

data = next(raw_records)

2081

yield key, data

2082

2083

def _record_to_data(self, key, digest, lines, dense_lines=None):

2084

"""Convert key, digest, lines into a raw data block.

2085

2086

:param key: The key of the record. Currently keys are always serialised

2087

using just the trailing component.

2088

:param dense_lines: The bytes of lines but in a denser form. For

2089

instance, if lines is a list of 1000 bytestrings each ending in

2090

\\n, dense_lines may be a list with one line in it, containing all

2091

the 1000's lines and their \\n's. Using dense_lines if it is

2092

already known is a win because the string join to create bytes in

2093

this function spends less time resizing the final string.

2094

:return: (len, chunked bytestring with compressed data)

2095

"""

2096

chunks = [b"version %s %d %s\n" % (key[-1], len(lines), digest)]

2097

chunks.extend(dense_lines or lines)

2098

chunks.append(b"end " + key[-1] + b"\n")

2099

for chunk in chunks:

2100

if not isinstance(chunk, bytes):

2101

raise AssertionError(

2102

'data must be plain bytes was %s' % type(chunk))

2103

if lines and not lines[-1].endswith(b'\n'):

2104

raise ValueError('corrupt lines value %r' % lines)

2105

compressed_chunks = tuned_gzip.chunks_to_gzip(chunks)

2106

return sum(map(len, compressed_chunks)), compressed_chunks

2107

2108

def _split_header(self, line):

2109

rec = line.split()

2110

if len(rec) != 4:

2111

raise KnitCorrupt(self,

2112

'unexpected number of elements in record header')

2113

return rec

2114

2115

def keys(self):

2116

"""See VersionedFiles.keys."""

2117

if 'evil' in debug.debug_flags:

2118

trace.mutter_callsite(2, "keys scales with size of history")

2119

sources = [self._index] + self._immediate_fallback_vfs

2120

result = set()

2121

for source in sources:

2122

result.update(source.keys())

2123

return result

2124

2125

2126

class _ContentMapGenerator(object):

2127

"""Generate texts or expose raw deltas for a set of texts."""

2128

2129

def __init__(self, ordering='unordered'):

2130

self._ordering = ordering

2131

2132

def _get_content(self, key):

2133

"""Get the content object for key."""

2134

# Note that _get_content is only called when the _ContentMapGenerator

2135

# has been constructed with just one key requested for reconstruction.

2136

if key in self.nonlocal_keys:

2137

record = next(self.get_record_stream())

2138

# Create a content object on the fly

2139

lines = record.get_bytes_as('lines')

2140

return PlainKnitContent(lines, record.key)

2141

else:

2142

# local keys we can ask for directly

2143

return self._get_one_work(key)

2144

2145

def get_record_stream(self):

2146

"""Get a record stream for the keys requested during __init__."""

2147

for record in self._work():

2148

yield record

2149

2150

def _work(self):

2151

"""Produce maps of text and KnitContents as dicts.

2152

780

781

The keys are version_ids, the values are tuples of (method, content,

782

digest, next).

783

method is the way the content should be applied.

784

content is a KnitContent object.

785

digest is the SHA1 digest of this version id after all steps are done

786

next is the build-parent of the version, i.e. the leftmost ancestor.

787

If the method is fulltext, next will be None.

788

"""

789

position_map = self._get_components_positions(version_ids)

790

# c = component_id, m = method, p = position, s = size, n = next

791

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

792

record_map = {}

793

for component_id, content, digest in \

794

self._data.read_records_iter(records):

795

method, position, size, next = position_map[component_id]

796

record_map[component_id] = method, content, digest, next

797

798

return record_map

799

800

def get_text(self, version_id):

801

"""See VersionedFile.get_text"""

802

return self.get_texts([version_id])[0]

803

804

def get_texts(self, version_ids):

805

return [''.join(l) for l in self.get_line_list(version_ids)]

806

807

def get_line_list(self, version_ids):

808

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

810

for version_id in version_ids:

811

self.check_not_reserved_id(version_id)

812

text_map, content_map = self._get_content_maps(version_ids)

813

return [text_map[v] for v in version_ids]

814

815

def _get_content_maps(self, version_ids):

816

"""Produce maps of text and KnitContents

817

2153

818

:return: (text_map, content_map) where text_map contains the texts for

2154

the requested versions and content_map contains the KnitContents.

819

the requested versions and content_map contains the KnitContents.

820

Both dicts take version_ids as their keys.

2155

821

"""

2156

# NB: By definition we never need to read remote sources unless texts

2157

# are requested from them: we don't delta across stores - and we

2158

# explicitly do not want to to prevent data loss situations.

2159

if self.global_map is None:

2160

self.global_map = self.vf.get_parent_map(self.keys)

2161

nonlocal_keys = self.nonlocal_keys

2162

2163

missing_keys = set(nonlocal_keys)

2164

# Read from remote versioned file instances and provide to our caller.

2165

for source in self.vf._immediate_fallback_vfs:

2166

if not missing_keys:

2167

break

2168

# Loop over fallback repositories asking them for texts - ignore

2169

# any missing from a particular fallback.

2170

for record in source.get_record_stream(missing_keys,

2171

self._ordering, True):

2172

if record.storage_kind == 'absent':

2173

# Not in thie particular stream, may be in one of the

2174

# other fallback vfs objects.

2175

continue

2176

missing_keys.remove(record.key)

2177

yield record

2178

2179

if self._raw_record_map is None:

2180

raise AssertionError('_raw_record_map should have been filled')

2181

first = True

2182

for key in self.keys:

2183

if key in self.nonlocal_keys:

2184

continue

2185

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2186

first = False

2187

2188

def _get_one_work(self, requested_key):

2189

# Now, if we have calculated everything already, just return the

2190

# desired text.

2191

if requested_key in self._contents_map:

2192

return self._contents_map[requested_key]

2193

# To simplify things, parse everything at once - code that wants one text

2194

# probably wants them all.

2195

# FUTURE: This function could be improved for the 'extract many' case

2196

# by tracking each component and only doing the copy when the number of

2197

# children than need to apply delta's to it is > 1 or it is part of the

2198

# final output.

2199

multiple_versions = len(self.keys) != 1

2200

if self._record_map is None:

2201

self._record_map = self.vf._raw_map_to_record_map(

2202

self._raw_record_map)

2203

record_map = self._record_map

2204

# raw_record_map is key:

2205

# Have read and parsed records at this point.

2206

for key in self.keys:

2207

if key in self.nonlocal_keys:

2208

# already handled

2209

continue

822

for version_id in version_ids:

823

if not self.has_version(version_id):

824

raise RevisionNotPresent(version_id, self.filename)

825

record_map = self._get_record_map(version_ids)

826

827

text_map = {}

828

content_map = {}

829

final_content = {}

830

for version_id in version_ids:

2210

831

components = []

2211

cursor = key

832

cursor = version_id

2212

833

while cursor is not None:

2213

try:

2214

record, record_details, digest, next = record_map[cursor]

2215

except KeyError:

2216

raise RevisionNotPresent(cursor, self)

2217

components.append((cursor, record, record_details, digest))

2218

cursor = next

2219

if cursor in self._contents_map:

2220

# no need to plan further back

2221

components.append((cursor, None, None, None))

834

method, data, digest, next = record_map[cursor]

835

components.append((cursor, method, data, digest))

836

if cursor in content_map:

2222

837

break

838

cursor = next

2223

839

2224

840

content = None

2225

for (component_id, record, record_details,

2226

digest) in reversed(components):

2227

if component_id in self._contents_map:

2228

content = self._contents_map[component_id]

841

for component_id, method, data, digest in reversed(components):

842

if component_id in content_map:

843

content = content_map[component_id]

2229

844

else:

2230

content, delta = self._factory.parse_record(

2231

key[-1], record, record_details, content,

2232

copy_base_content=multiple_versions)

2233

if multiple_versions:

2234

self._contents_map[component_id] = content

845

if method == 'fulltext':

846

assert content is None

847

content = self.factory.parse_fulltext(data, version_id)

848

elif method == 'line-delta':

849

delta = self.factory.parse_line_delta(data, version_id)

850

content = content.copy()

851

content._lines = self._apply_delta(content._lines,

852

delta)

853

content_map[component_id] = content

854

855

if 'no-eol' in self._index.get_options(version_id):

856

content = content.copy()

857

line = content._lines[-1][1].rstrip('\n')

858

content._lines[-1] = (content._lines[-1][0], line)

859

final_content[version_id] = content

2235

860

2236

861

# digest here is the digest from the last applied component.

2237

862

text = content.text()

2238

actual_sha = sha_strings(text)

2239

if actual_sha != digest:

2240

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2241

if multiple_versions:

2242

return self._contents_map[requested_key]

2243

else:

2244

return content

2245

2246

def _wire_bytes(self):

2247

"""Get the bytes to put on the wire for 'key'.

2248

2249

The first collection of bytes asked for returns the serialised

2250

raw_record_map and the additional details (key, parent) for key.

2251

Subsequent calls return just the additional details (key, parent).

2252

The wire storage_kind given for the first key is 'knit-delta-closure',

2253

For subsequent keys it is 'knit-delta-closure-ref'.

2254

2255

:param key: A key from the content generator.

2256

:return: Bytes to put on the wire.

2257

"""

2258

lines = []

2259

# kind marker for dispatch on the far side,

2260

lines.append(b'knit-delta-closure')

2261

# Annotated or not

2262

if self.vf._factory.annotated:

2263

lines.append(b'annotated')

2264

else:

2265

lines.append(b'')

2266

# then the list of keys

2267

lines.append(b'\t'.join(b'\x00'.join(key) for key in self.keys

2268

if key not in self.nonlocal_keys))

2269

# then the _raw_record_map in serialised form:

2270

map_byte_list = []

2271

# for each item in the map:

2272

# 1 line with key

2273

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2274

# one line with method

2275

# one line with noeol

2276

# one line with next ('' for None)

2277

# one line with byte count of the record bytes

2278

# the record bytes

2279

for key, (record_bytes, (method, noeol), next) in (

2280

self._raw_record_map.items()):

2281

key_bytes = b'\x00'.join(key)

2282

parents = self.global_map.get(key, None)

2283

if parents is None:

2284

parent_bytes = b'None:'

2285

else:

2286

parent_bytes = b'\t'.join(b'\x00'.join(key) for key in parents)

2287

method_bytes = method.encode('ascii')

2288

if noeol:

2289

noeol_bytes = b"T"

2290

else:

2291

noeol_bytes = b"F"

2292

if next:

2293

next_bytes = b'\x00'.join(next)

2294

else:

2295

next_bytes = b''

2296

map_byte_list.append(b'\n'.join(

2297

[key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2298

b'%d' % len(record_bytes), record_bytes]))

2299

map_bytes = b''.join(map_byte_list)

2300

lines.append(map_bytes)

2301

bytes = b'\n'.join(lines)

2302

return bytes

2303

2304

2305

class _VFContentMapGenerator(_ContentMapGenerator):

2306

"""Content map generator reading from a VersionedFiles object."""

2307

2308

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2309

global_map=None, raw_record_map=None, ordering='unordered'):

2310

"""Create a _ContentMapGenerator.

2311

2312

:param versioned_files: The versioned files that the texts are being

2313

extracted from.

2314

:param keys: The keys to produce content maps for.

2315

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2316

which are known to not be in this knit, but rather in one of the

2317

fallback knits.

2318

:param global_map: The result of get_parent_map(keys) (or a supermap).

2319

This is required if get_record_stream() is to be used.

2320

:param raw_record_map: A unparsed raw record map to use for answering

2321

contents.

2322

"""

2323

_ContentMapGenerator.__init__(self, ordering=ordering)

2324

# The vf to source data from

2325

self.vf = versioned_files

2326

# The keys desired

2327

self.keys = list(keys)

2328

# Keys known to be in fallback vfs objects

2329

if nonlocal_keys is None:

2330

self.nonlocal_keys = set()

2331

else:

2332

self.nonlocal_keys = frozenset(nonlocal_keys)

2333

# Parents data for keys to be returned in get_record_stream

2334

self.global_map = global_map

2335

# The chunked lists for self.keys in text form

2336

self._text_map = {}

2337

# A cache of KnitContent objects used in extracting texts.

2338

self._contents_map = {}

2339

# All the knit records needed to assemble the requested keys as full

2340

# texts.

2341

self._record_map = None

2342

if raw_record_map is None:

2343

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2344

allow_missing=True)

2345

else:

2346

self._raw_record_map = raw_record_map

2347

# the factory for parsing records

2348

self._factory = self.vf._factory

2349

2350

2351

class _NetworkContentMapGenerator(_ContentMapGenerator):

2352

"""Content map generator sourced from a network stream."""

2353

2354

def __init__(self, bytes, line_end):

2355

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2356

self._bytes = bytes

2357

self.global_map = {}

2358

self._raw_record_map = {}

2359

self._contents_map = {}

2360

self._record_map = None

2361

self.nonlocal_keys = []

2362

# Get access to record parsing facilities

2363

self.vf = KnitVersionedFiles(None, None)

2364

start = line_end

2365

# Annotated or not

2366

line_end = bytes.find(b'\n', start)

2367

line = bytes[start:line_end]

2368

start = line_end + 1

2369

if line == b'annotated':

2370

self._factory = KnitAnnotateFactory()

2371

else:

2372

self._factory = KnitPlainFactory()

2373

# list of keys to emit in get_record_stream

2374

line_end = bytes.find(b'\n', start)

2375

line = bytes[start:line_end]

2376

start = line_end + 1

2377

self.keys = [

2378

tuple(segment.split(b'\x00')) for segment in line.split(b'\t')

2379

if segment]

2380

# now a loop until the end. XXX: It would be nice if this was just a

2381

# bunch of the same records as get_record_stream(..., False) gives, but

2382

# there is a decent sized gap stopping that at the moment.

2383

end = len(bytes)

2384

while start < end:

2385

# 1 line with key

2386

line_end = bytes.find(b'\n', start)

2387

key = tuple(bytes[start:line_end].split(b'\x00'))

2388

start = line_end + 1

2389

# 1 line with parents (None: for None, '' for ())

2390

line_end = bytes.find(b'\n', start)

2391

line = bytes[start:line_end]

2392

if line == b'None:':

2393

parents = None

2394

else:

2395

parents = tuple(

2396

tuple(segment.split(b'\x00')) for segment in line.split(b'\t')

2397

if segment)

2398

self.global_map[key] = parents

2399

start = line_end + 1

2400

# one line with method

2401

line_end = bytes.find(b'\n', start)

2402

line = bytes[start:line_end]

2403

method = line.decode('ascii')

2404

start = line_end + 1

2405

# one line with noeol

2406

line_end = bytes.find(b'\n', start)

2407

line = bytes[start:line_end]

2408

noeol = line == b"T"

2409

start = line_end + 1

2410

# one line with next (b'' for None)

2411

line_end = bytes.find(b'\n', start)

2412

line = bytes[start:line_end]

2413

if not line:

2414

next = None

2415

else:

2416

next = tuple(bytes[start:line_end].split(b'\x00'))

2417

start = line_end + 1

2418

# one line with byte count of the record bytes

2419

line_end = bytes.find(b'\n', start)

2420

line = bytes[start:line_end]

2421

count = int(line)

2422

start = line_end + 1

2423

# the record bytes

2424

record_bytes = bytes[start:start + count]

2425

start = start + count

2426

# put it in the map

2427

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2428

2429

def get_record_stream(self):

2430

"""Get a record stream for for keys requested by the bytestream."""

2431

first = True

2432

for key in self.keys:

2433

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2434

first = False

2435

2436

def _wire_bytes(self):

2437

return self._bytes

2438

2439

2440

class _KndxIndex(object):

2441

"""Manages knit index files

2442

2443

The index is kept in memory and read on startup, to enable

863

if sha_strings(text) != digest:

864

raise KnitCorrupt(self.filename,

865

'sha-1 does not match %s' % version_id)

866

867

text_map[version_id] = text

868

return text_map, final_content

869

870

def iter_lines_added_or_present_in_versions(self, version_ids=None,

871

pb=None):

872

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

873

if version_ids is None:

874

version_ids = self.versions()

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

877

if pb is None:

878

pb = progress.DummyProgress()

879

# we don't care about inclusions, the caller cares.

880

# but we need to setup a list of records to visit.

881

# we need version_id, position, length

882

version_id_records = []

883

requested_versions = set(version_ids)

884

# filter for available versions

885

for version_id in requested_versions:

886

if not self.has_version(version_id):

887

raise RevisionNotPresent(version_id, self.filename)

888

# get a in-component-order queue:

889

for version_id in self.versions():

890

if version_id in requested_versions:

891

data_pos, length = self._index.get_position(version_id)

892

version_id_records.append((version_id, data_pos, length))

893

894

total = len(version_id_records)

895

for version_idx, (version_id, data, sha_value) in \

896

enumerate(self._data.read_records_iter(version_id_records)):

897

pb.update('Walking content.', version_idx, total)

898

method = self._index.get_method(version_id)

899

900

assert method in ('fulltext', 'line-delta')

901

if method == 'fulltext':

902

line_iterator = self.factory.get_fulltext_content(data)

903

else:

904

line_iterator = self.factory.get_linedelta_content(data)

905

for line in line_iterator:

906

yield line

907

908

pb.update('Walking content.', total, total)

909

910

def num_versions(self):

911

"""See VersionedFile.num_versions()."""

912

return self._index.num_versions()

913

914

__len__ = num_versions

915

916

def annotate_iter(self, version_id):

917

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

919

content = self._get_content(version_id)

920

for origin, text in content.annotate_iter():

921

yield origin, text

922

923

def get_parents(self, version_id):

924

"""See VersionedFile.get_parents."""

925

# perf notes:

926

# optimism counts!

927

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

929

try:

930

return self._index.get_parents(version_id)

931

except KeyError:

932

raise RevisionNotPresent(version_id, self.filename)

933

934

def get_parents_with_ghosts(self, version_id):

935

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

937

try:

938

return self._index.get_parents_with_ghosts(version_id)

939

except KeyError:

940

raise RevisionNotPresent(version_id, self.filename)

941

942

def get_ancestry(self, versions):

943

"""See VersionedFile.get_ancestry."""

944

if isinstance(versions, basestring):

945

versions = [versions]

946

if not versions:

947

return []

948

versions = [osutils.safe_revision_id(v) for v in versions]

949

return self._index.get_ancestry(versions)

950

951

def get_ancestry_with_ghosts(self, versions):

952

"""See VersionedFile.get_ancestry_with_ghosts."""

953

if isinstance(versions, basestring):

954

versions = [versions]

955

if not versions:

956

return []

957

versions = [osutils.safe_revision_id(v) for v in versions]

958

return self._index.get_ancestry_with_ghosts(versions)

959

960

#@deprecated_method(zero_eight)

961

def walk(self, version_ids):

962

"""See VersionedFile.walk."""

963

# We take the short path here, and extract all relevant texts

964

# and put them in a weave and let that do all the work. Far

965

# from optimal, but is much simpler.

966

# FIXME RB 20060228 this really is inefficient!

967

from bzrlib.weave import Weave

968

969

w = Weave(self.filename)

970

ancestry = self.get_ancestry(version_ids)

971

sorted_graph = topo_sort(self._index.get_graph())

972

version_list = [vid for vid in sorted_graph if vid in ancestry]

973

974

for version_id in version_list:

975

lines = self.get_lines(version_id)

976

w.add_lines(version_id, self.get_parents(version_id), lines)

977

978

for lineno, insert_id, dset, line in w.walk(version_ids):

979

yield lineno, insert_id, dset, line

980

981

def plan_merge(self, ver_a, ver_b):

982

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

985

ancestors_b = set(self.get_ancestry(ver_b))

986

def status_a(revision, text):

987

if revision in ancestors_b:

988

return 'killed-b', text

989

else:

990

return 'new-a', text

991

992

ancestors_a = set(self.get_ancestry(ver_a))

993

def status_b(revision, text):

994

if revision in ancestors_a:

995

return 'killed-a', text

996

else:

997

return 'new-b', text

998

999

annotated_a = self.annotate(ver_a)

1000

annotated_b = self.annotate(ver_b)

1001

plain_a = [t for (a, t) in annotated_a]

1002

plain_b = [t for (a, t) in annotated_b]

1003

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

1004

a_cur = 0

1005

b_cur = 0

1006

for ai, bi, l in blocks:

1007

# process all mismatched sections

1008

# (last mismatched section is handled because blocks always

1009

# includes a 0-length last block)

1010

for revision, text in annotated_a[a_cur:ai]:

1011

yield status_a(revision, text)

1012

for revision, text in annotated_b[b_cur:bi]:

1013

yield status_b(revision, text)

1014

1015

# and now the matched section

1016

a_cur = ai + l

1017

b_cur = bi + l

1018

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

1019

assert text_a == text_b

1020

yield "unchanged", text_a

1021

1022

1023

class _KnitComponentFile(object):

1024

"""One of the files used to implement a knit database"""

1025

1026

def __init__(self, transport, filename, mode, file_mode=None,

1027

create_parent_dir=False, dir_mode=None):

1028

self._transport = transport

1029

self._filename = filename

1030

self._mode = mode

1031

self._file_mode = file_mode

1032

self._dir_mode = dir_mode

1033

self._create_parent_dir = create_parent_dir

1034

self._need_to_create = False

1035

1036

def _full_path(self):

1037

"""Return the full path to this file."""

1038

return self._transport.base + self._filename

1039

1040

def check_header(self, fp):

1041

line = fp.readline()

1042

if line == '':

1043

# An empty file can actually be treated as though the file doesn't

1044

# exist yet.

1045

raise errors.NoSuchFile(self._full_path())

1046

if line != self.HEADER:

1047

raise KnitHeaderError(badline=line,

1048

filename=self._transport.abspath(self._filename))

1049

1050

def commit(self):

1051

"""Commit is a nop."""

1052

1053

def __repr__(self):

1054

return '%s(%s)' % (self.__class__.__name__, self._filename)

1055

1056

1057

class _KnitIndex(_KnitComponentFile):

1058

"""Manages knit index file.

1059

1060

The index is already kept in memory and read on startup, to enable

2444

1061

fast lookups of revision information. The cursor of the index

2445

1062

file is always pointing to the end, making it easy to append

2446

1063

entries.

2456

1073

2457

1074

Duplicate entries may be written to the index for a single version id

2458

1075

if this is done then the latter one completely replaces the former:

2459

this allows updates to correct version and parent information.

1076

this allows updates to correct version and parent information.

2460

1077

Note that the two entries may share the delta, and that successive

2461

1078

annotations and references MUST point to the first entry.

2462

1079

2463

1080

The index file on disc contains a header, followed by one line per knit

2464

1081

record. The same revision can be present in an index file more than once.

2465

The first occurrence gets assigned a sequence number starting from 0.

2466

1082

The first occurrence gets assigned a sequence number starting from 0.

1083

2467

1084

The format of a single line is

2468

1085

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

2469

1086

REVISION_ID is a utf8-encoded revision id

2470

FLAGS is a comma separated list of flags about the record. Values include

1087

FLAGS is a comma separated list of flags about the record. Values include

2471

1088

no-eol, line-delta, fulltext.

2472

1089

BYTE_OFFSET is the ascii representation of the byte offset in the data file

2473

that the compressed data starts at.

1090

that the the compressed data starts at.

2474

1091

LENGTH is the ascii representation of the length of the data file.

2475

1092

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

2476

1093

REVISION_ID.

2477

1094

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

2478

1095

revision id already in the knit that is a parent of REVISION_ID.

2479

1096

The ' :' marker is the end of record marker.

2480

1097

2481

1098

partial writes:

2482

1099

when a write is interrupted to the index file, it will result in a line

2483

1100

that does not end in ' :'. If the ' :' is not present at the end of a line,

2488

1105

to ensure that records always start on new lines even if the last write was

2489

1106

interrupted. As a result its normal for the last line in the index to be

2490

1107

missing a trailing newline. One can be added with no harmful effects.

2491

2492

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

2493

where prefix is e.g. the (fileid,) for .texts instances or () for

2494

constant-mapped things like .revisions, and the old state is

2495

tuple(history_vector, cache_dict). This is used to prevent having an

2496

ABI change with the C extension that reads .kndx files.

2497

1108

"""

2498

1109

2499

HEADER = b"# bzr knit index 8\n"

2500

2501

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

2502

"""Create a _KndxIndex on transport using mapper."""

2503

self._transport = transport

2504

self._mapper = mapper

2505

self._get_scope = get_scope

2506

self._allow_writes = allow_writes

2507

self._is_locked = is_locked

2508

self._reset_cache()

2509

self.has_graph = True

2510

2511

def add_records(self, records, random_id=False, missing_compression_parents=False):

2512

"""Add multiple records to the index.

2513

2514

:param records: a list of tuples:

2515

(key, options, access_memo, parents).

2516

:param random_id: If True the ids being added were randomly generated

2517

and no check for existence will be performed.

2518

:param missing_compression_parents: If True the records being added are

2519

only compressed against texts already in the index (or inside

2520

records). If False the records all refer to unavailable texts (or

2521

texts inside records) as compression parents.

2522

"""

2523

if missing_compression_parents:

2524

# It might be nice to get the edge of the records. But keys isn't

2525

# _wrong_.

2526

keys = sorted(record[0] for record in records)

2527

raise errors.RevisionNotPresent(keys, self)

2528

paths = {}

2529

for record in records:

2530

key = record[0]

2531

prefix = key[:-1]

2532

path = self._mapper.map(key) + '.kndx'

2533

path_keys = paths.setdefault(path, (prefix, []))

2534

path_keys[1].append(record)

2535

for path in sorted(paths):

2536

prefix, path_keys = paths[path]

2537

self._load_prefixes([prefix])

2538

lines = []

2539

orig_history = self._kndx_cache[prefix][1][:]

2540

orig_cache = self._kndx_cache[prefix][0].copy()

2541

2542

try:

2543

for key, options, (_, pos, size), parents in path_keys:

2544

if not all(isinstance(option, bytes) for option in options):

2545

raise TypeError(options)

2546

if parents is None:

2547

# kndx indices cannot be parentless.

2548

parents = ()

2549

line = b' '.join([

2550

b'\n'

2551

+ key[-1], b','.join(options), b'%d' % pos, b'%d' % size,

2552

self._dictionary_compress(parents), b':'])

2553

if not isinstance(line, bytes):

2554

raise AssertionError(

2555

'data must be utf8 was %s' % type(line))

2556

lines.append(line)

2557

self._cache_key(key, options, pos, size, parents)

2558

if len(orig_history):

2559

self._transport.append_bytes(path, b''.join(lines))

2560

else:

2561

self._init_index(path, lines)

2562

except:

2563

# If any problems happen, restore the original values and re-raise

2564

self._kndx_cache[prefix] = (orig_cache, orig_history)

2565

raise

2566

2567

def scan_unvalidated_index(self, graph_index):

2568

"""See _KnitGraphIndex.scan_unvalidated_index."""

2569

# Because kndx files do not support atomic insertion via separate index

2570

# files, they do not support this method.

2571

raise NotImplementedError(self.scan_unvalidated_index)

2572

2573

def get_missing_compression_parents(self):

2574

"""See _KnitGraphIndex.get_missing_compression_parents."""

2575

# Because kndx files do not support atomic insertion via separate index

2576

# files, they do not support this method.

2577

raise NotImplementedError(self.get_missing_compression_parents)

2578

2579

def _cache_key(self, key, options, pos, size, parent_keys):

1110

HEADER = "# bzr knit index 8\n"

1111

1112

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1113

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1114

1115

def _cache_version(self, version_id, options, pos, size, parents):

2580

1116

"""Cache a version record in the history array and index cache.

2581

1117

2582

1118

This is inlined into _load_data for performance. KEEP IN SYNC.

2583

1119

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

2584

1120

indexes).

2585

1121

"""

2586

prefix = key[:-1]

2587

version_id = key[-1]

2588

# last-element only for compatibilty with the C load_data.

2589

parents = tuple(parent[-1] for parent in parent_keys)

2590

for parent in parent_keys:

2591

if parent[:-1] != prefix:

2592

raise ValueError("mismatched prefixes for %r, %r" % (

2593

key, parent_keys))

2594

cache, history = self._kndx_cache[prefix]

2595

1122

# only want the _history index to reference the 1st index entry

2596

1123

# for version_id

2597

if version_id not in cache:

2598

index = len(history)

2599

history.append(version_id)

2600

else:

2601

index = cache[version_id][5]

2602

cache[version_id] = (version_id,

2603

options,

2604

pos,

2605

size,

2606

parents,

2607

index)

2608

2609

def check_header(self, fp):

2610

line = fp.readline()

2611

if line == b'':

2612

# An empty file can actually be treated as though the file doesn't

2613

# exist yet.

2614

raise errors.NoSuchFile(self)

2615

if line != self.HEADER:

2616

raise KnitHeaderError(badline=line, filename=self)

2617

2618

def _check_read(self):

2619

if not self._is_locked():

2620

raise errors.ObjectNotLocked(self)

2621

if self._get_scope() != self._scope:

2622

self._reset_cache()

2623

2624

def _check_write_ok(self):

2625

"""Assert if not writes are permitted."""

2626

if not self._is_locked():

2627

raise errors.ObjectNotLocked(self)

2628

if self._get_scope() != self._scope:

2629

self._reset_cache()

2630

if self._mode != 'w':

2631

raise errors.ReadOnlyObjectDirtiedError(self)

2632

2633

def get_build_details(self, keys):

2634

"""Get the method, index_memo and compression parent for keys.

2635

2636

Ghosts are omitted from the result.

2637

2638

:param keys: An iterable of keys.

2639

:return: A dict of key:(index_memo, compression_parent, parents,

2640

record_details).

2641

index_memo

2642

opaque structure to pass to read_records to extract the raw

2643

data

2644

compression_parent

2645

Content that this record is built upon, may be None

2646

parents

2647

Logical parents of this node

2648

record_details

2649

extra information about the content which needs to be passed to

2650

Factory.parse_record

2651

"""

2652

parent_map = self.get_parent_map(keys)

2653

result = {}

2654

for key in keys:

2655

if key not in parent_map:

2656

continue # Ghost

2657

method = self.get_method(key)

2658

if not isinstance(method, str):

2659

raise TypeError(method)

2660

parents = parent_map[key]

2661

if method == 'fulltext':

2662

compression_parent = None

2663

else:

2664

compression_parent = parents[0]

2665

noeol = b'no-eol' in self.get_options(key)

2666

index_memo = self.get_position(key)

2667

result[key] = (index_memo, compression_parent,

2668

parents, (method, noeol))

2669

return result

2670

2671

def get_method(self, key):

2672

"""Return compression method of specified key."""

2673

options = self.get_options(key)

2674

if b'fulltext' in options:

2675

return 'fulltext'

2676

elif b'line-delta' in options:

2677

return 'line-delta'

2678

else:

2679

raise KnitIndexUnknownMethod(self, options)

2680

2681

def get_options(self, key):

2682

"""Return a list representing options.

2683

2684

e.g. ['foo', 'bar']

2685

"""

2686

prefix, suffix = self._split_key(key)

2687

self._load_prefixes([prefix])

1124

if version_id not in self._cache:

1125

index = len(self._history)

1126

self._history.append(version_id)

1127

else:

1128

index = self._cache[version_id][5]

1129

self._cache[version_id] = (version_id,

1130

options,

1131

pos,

1132

size,

1133

parents,

1134

index)

1135

1136

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1137

create_parent_dir=False, delay_create=False, dir_mode=None):

1138

_KnitComponentFile.__init__(self, transport, filename, mode,

1139

file_mode=file_mode,

1140

create_parent_dir=create_parent_dir,

1141

dir_mode=dir_mode)

1142

self._cache = {}

1143

# position in _history is the 'official' index for a revision

1144

# but the values may have come from a newer entry.

1145

# so - wc -l of a knit index is != the number of unique names

1146

# in the knit.

1147

self._history = []

2688

1148

try:

2689

return self._kndx_cache[prefix][0][suffix][1]

2690

except KeyError:

2691

raise RevisionNotPresent(key, self)

2692

2693

def find_ancestry(self, keys):

2694

"""See CombinedGraphIndex.find_ancestry()"""

2695

prefixes = set(key[:-1] for key in keys)

2696

self._load_prefixes(prefixes)

2697

result = {}

2698

parent_map = {}

2699

missing_keys = set()

2700

pending_keys = list(keys)

2701

# This assumes that keys will not reference parents in a different

2702

# prefix, which is accurate so far.

2703

while pending_keys:

2704

key = pending_keys.pop()

2705

if key in parent_map:

1149

fp = self._transport.get(self._filename)

1150

try:

1151

# _load_data may raise NoSuchFile if the target knit is

1152

# completely empty.

1153

self._load_data(fp)

1154

finally:

1155

fp.close()

1156

except NoSuchFile:

1157

if mode != 'w' or not create:

1158

raise

1159

elif delay_create:

1160

self._need_to_create = True

1161

else:

1162

self._transport.put_bytes_non_atomic(

1163

self._filename, self.HEADER, mode=self._file_mode)

1164

1165

def _load_data(self, fp):

1166

cache = self._cache

1167

history = self._history

1168

1169

self.check_header(fp)

1170

# readlines reads the whole file at once:

1171

# bad for transports like http, good for local disk

1172

# we save 60 ms doing this one change (

1173

# from calling readline each time to calling

1174

# readlines once.

1175

# probably what we want for nice behaviour on

1176

# http is a incremental readlines that yields, or

1177

# a check for local vs non local indexes,

1178

history_top = len(history) - 1

1179

for line in fp.readlines():

1180

rec = line.split()

1181

if len(rec) < 5 or rec[-1] != ':':

1182

# corrupt line.

1183

# FIXME: in the future we should determine if its a

1184

# short write - and ignore it

1185

# or a different failure, and raise. RBC 20060407

2706

1186

continue

2707

prefix = key[:-1]

2708

try:

2709

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2710

except KeyError:

2711

missing_keys.add(key)

2712

else:

2713

parent_keys = tuple([prefix + (suffix,)

2714

for suffix in suffix_parents])

2715

parent_map[key] = parent_keys

2716

pending_keys.extend([p for p in parent_keys

2717

if p not in parent_map])

2718

return parent_map, missing_keys

2719

2720

def get_parent_map(self, keys):

2721

"""Get a map of the parents of keys.

2722

2723

:param keys: The keys to look up parents for.

2724

:return: A mapping from keys to parents. Absent keys are absent from

2725

the mapping.

2726

"""

2727

# Parse what we need to up front, this potentially trades off I/O

2728

# locality (.kndx and .knit in the same block group for the same file

2729

# id) for less checking in inner loops.

2730

prefixes = set(key[:-1] for key in keys)

2731

self._load_prefixes(prefixes)

2732

result = {}

2733

for key in keys:

2734

prefix = key[:-1]

2735

try:

2736

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2737

except KeyError:

2738

pass

2739

else:

2740

result[key] = tuple(prefix + (suffix,) for

2741

suffix in suffix_parents)

2742

return result

2743

2744

def get_position(self, key):

2745

"""Return details needed to access the version.

2746

2747

:return: a tuple (key, data position, size) to hand to the access

2748

logic to get the record.

2749

"""

2750

prefix, suffix = self._split_key(key)

2751

self._load_prefixes([prefix])

2752

entry = self._kndx_cache[prefix][0][suffix]

2753

return key, entry[2], entry[3]

2754

2755

__contains__ = _mod_index._has_key_from_parent_map

2756

2757

def _init_index(self, path, extra_lines=[]):

2758

"""Initialize an index."""

2759

sio = BytesIO()

2760

sio.write(self.HEADER)

2761

sio.writelines(extra_lines)

2762

sio.seek(0)

2763

self._transport.put_file_non_atomic(path, sio,

2764

create_parent_dir=True)

2765

# self._create_parent_dir)

2766

# mode=self._file_mode,

2767

# dir_mode=self._dir_mode)

2768

2769

def keys(self):

2770

"""Get all the keys in the collection.

2771

2772

The keys are not ordered.

2773

"""

2774

result = set()

2775

# Identify all key prefixes.

2776

# XXX: A bit hacky, needs polish.

2777

if isinstance(self._mapper, ConstantMapper):

2778

prefixes = [()]

2779

else:

2780

relpaths = set()

2781

for quoted_relpath in self._transport.iter_files_recursive():

2782

path, ext = os.path.splitext(quoted_relpath)

2783

relpaths.add(path)

2784

prefixes = [self._mapper.unmap(path) for path in relpaths]

2785

self._load_prefixes(prefixes)

2786

for prefix in prefixes:

2787

for suffix in self._kndx_cache[prefix][1]:

2788

result.add(prefix + (suffix,))

2789

return result

2790

2791

def _load_prefixes(self, prefixes):

2792

"""Load the indices for prefixes."""

2793

self._check_read()

2794

for prefix in prefixes:

2795

if prefix not in self._kndx_cache:

2796

# the load_data interface writes to these variables.

2797

self._cache = {}

2798

self._history = []

2799

self._filename = prefix

2800

try:

2801

path = self._mapper.map(prefix) + '.kndx'

2802

with self._transport.get(path) as fp:

2803

# _load_data may raise NoSuchFile if the target knit is

2804

# completely empty.

2805

_load_data(self, fp)

2806

self._kndx_cache[prefix] = (self._cache, self._history)

2807

del self._cache

2808

del self._filename

2809

del self._history

2810

except NoSuchFile:

2811

self._kndx_cache[prefix] = ({}, [])

2812

if isinstance(self._mapper, ConstantMapper):

2813

# preserve behaviour for revisions.kndx etc.

2814

self._init_index(path)

2815

del self._cache

2816

del self._filename

2817

del self._history

2818

2819

missing_keys = _mod_index._missing_keys_from_parent_map

2820

2821

def _partition_keys(self, keys):

2822

"""Turn keys into a dict of prefix:suffix_list."""

2823

result = {}

2824

for key in keys:

2825

prefix_keys = result.setdefault(key[:-1], [])

2826

prefix_keys.append(key[-1])

2827

return result

2828

2829

def _dictionary_compress(self, keys):

2830

"""Dictionary compress keys.

2831

2832

:param keys: The keys to generate references to.

2833

:return: A string representation of keys. keys which are present are

2834

dictionary compressed, and others are emitted as fulltext with a

2835

'.' prefix.

2836

"""

2837

if not keys:

2838

return b''

1187

1188

parents = []

1189

for value in rec[4:-1]:

1190

if value[0] == '.':

1191

# uncompressed reference

1192

parent_id = value[1:]

1193

else:

1194

parent_id = history[int(value)]

1195

parents.append(parent_id)

1196

1197

version_id, options, pos, size = rec[:4]

1198

version_id = version_id

1199

1200

# See self._cache_version

1201

# only want the _history index to reference the 1st

1202

# index entry for version_id

1203

if version_id not in cache:

1204

history_top += 1

1205

index = history_top

1206

history.append(version_id)

1207

else:

1208

index = cache[version_id][5]

1209

cache[version_id] = (version_id,

1210

options.split(','),

1211

int(pos),

1212

int(size),

1213

parents,

1214

index)

1215

# end self._cache_version

1216

1217

def get_graph(self):

1218

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1219

1220

def get_ancestry(self, versions):

1221

"""See VersionedFile.get_ancestry."""

1222

# get a graph of all the mentioned versions:

1223

graph = {}

1224

pending = set(versions)

1225

cache = self._cache

1226

while pending:

1227

version = pending.pop()

1228

# trim ghosts

1229

try:

1230

parents = [p for p in cache[version][4] if p in cache]

1231

except KeyError:

1232

raise RevisionNotPresent(version, self._filename)

1233

# if not completed and not a ghost

1234

pending.update([p for p in parents if p not in graph])

1235

graph[version] = parents

1236

return topo_sort(graph.items())

1237

1238

def get_ancestry_with_ghosts(self, versions):

1239

"""See VersionedFile.get_ancestry_with_ghosts."""

1240

# get a graph of all the mentioned versions:

1241

self.check_versions_present(versions)

1242

cache = self._cache

1243

graph = {}

1244

pending = set(versions)

1245

while pending:

1246

version = pending.pop()

1247

try:

1248

parents = cache[version][4]

1249

except KeyError:

1250

# ghost, fake it

1251

graph[version] = []

1252

else:

1253

# if not completed

1254

pending.update([p for p in parents if p not in graph])

1255

graph[version] = parents

1256

return topo_sort(graph.items())

1257

1258

def num_versions(self):

1259

return len(self._history)

1260

1261

__len__ = num_versions

1262

1263

def get_versions(self):

1264

return self._history

1265

1266

def idx_to_name(self, idx):

1267

return self._history[idx]

1268

1269

def lookup(self, version_id):

1270

assert version_id in self._cache

1271

return self._cache[version_id][5]

1272

1273

def _version_list_to_index(self, versions):

2839

1274

result_list = []

2840

prefix = keys[0][:-1]

2841

cache = self._kndx_cache[prefix][0]

2842

for key in keys:

2843

if key[:-1] != prefix:

2844

# kndx indices cannot refer across partitioned storage.

2845

raise ValueError("mismatched prefixes for %r" % keys)

2846

if key[-1] in cache:

1275

cache = self._cache

1276

for version in versions:

1277

if version in cache:

2847

1278

# -- inlined lookup() --

2848

result_list.append(b'%d' % cache[key[-1]][5])

1279

result_list.append(str(cache[version][5]))

2849

1280

# -- end lookup () --

2850

1281

else:

2851

result_list.append(b'.' + key[-1])

2852

return b' '.join(result_list)

2853

2854

def _reset_cache(self):

2855

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2856

# (cache_dict, history_vector) for parsed kndx files.

2857

self._kndx_cache = {}

2858

self._scope = self._get_scope()

2859

allow_writes = self._allow_writes()

2860

if allow_writes:

2861

self._mode = 'w'

2862

else:

2863

self._mode = 'r'

2864

2865

def _sort_keys_by_io(self, keys, positions):

2866

"""Figure out an optimal order to read the records for the given keys.

2867

2868

Sort keys, grouped by index and sorted by position.

2869

2870

:param keys: A list of keys whose records we want to read. This will be

2871

sorted 'in-place'.

2872

:param positions: A dict, such as the one returned by

2873

_get_components_positions()

2874

:return: None

2875

"""

2876

def get_sort_key(key):

2877

index_memo = positions[key][1]

2878

# Group by prefix and position. index_memo[0] is the key, so it is

2879

# (file_id, revision_id) and we don't want to sort on revision_id,

2880

# index_memo[1] is the position, and index_memo[2] is the size,

2881

# which doesn't matter for the sort

2882

return index_memo[0][:-1], index_memo[1]

2883

return keys.sort(key=get_sort_key)

2884

2885

_get_total_build_size = _get_total_build_size

2886

2887

def _split_key(self, key):

2888

"""Split key into a prefix and suffix."""

2889

# GZ 2018-07-03: This is intentionally either a sequence or bytes?

2890

if isinstance(key, bytes):

2891

return key[:-1], key[-1:]

2892

return key[:-1], key[-1]

2893

2894

2895

class _KnitGraphIndex(object):

2896

"""A KnitVersionedFiles index layered on GraphIndex."""

2897

2898

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2899

add_callback=None, track_external_parent_refs=False):

2900

"""Construct a KnitGraphIndex on a graph_index.

2901

2902

:param graph_index: An implementation of breezy.index.GraphIndex.

2903

:param is_locked: A callback to check whether the object should answer

2904

queries.

2905

:param deltas: Allow delta-compressed records.

2906

:param parents: If True, record knits parents, if not do not record

2907

parents.

2908

:param add_callback: If not None, allow additions to the index and call

2909

this callback with a list of added GraphIndex nodes:

2910

[(node, value, node_refs), ...]

2911

:param is_locked: A callback, returns True if the index is locked and

2912

thus usable.

2913

:param track_external_parent_refs: If True, record all external parent

2914

references parents from added records. These can be retrieved

2915

later by calling get_missing_parents().

2916

"""

2917

self._add_callback = add_callback

2918

self._graph_index = graph_index

2919

self._deltas = deltas

2920

self._parents = parents

2921

if deltas and not parents:

2922

# XXX: TODO: Delta tree and parent graph should be conceptually

2923

# separate.

2924

raise KnitCorrupt(self, "Cannot do delta compression without "

2925

"parent tracking.")

2926

self.has_graph = parents

2927

self._is_locked = is_locked

2928

self._missing_compression_parents = set()

2929

if track_external_parent_refs:

2930

self._key_dependencies = _KeyRefs()

2931

else:

2932

self._key_dependencies = None

2933

2934

def __repr__(self):

2935

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2936

2937

def add_records(self, records, random_id=False,

2938

missing_compression_parents=False):

2939

"""Add multiple records to the index.

2940

2941

This function does not insert data into the Immutable GraphIndex

2942

backing the KnitGraphIndex, instead it prepares data for insertion by

2943

the caller and checks that it is safe to insert then calls

2944

self._add_callback with the prepared GraphIndex nodes.

2945

2946

:param records: a list of tuples:

2947

(key, options, access_memo, parents).

2948

:param random_id: If True the ids being added were randomly generated

2949

and no check for existence will be performed.

2950

:param missing_compression_parents: If True the records being added are

2951

only compressed against texts already in the index (or inside

2952

records). If False the records all refer to unavailable texts (or

2953

texts inside records) as compression parents.

2954

"""

2955

if not self._add_callback:

2956

raise errors.ReadOnlyError(self)

2957

# we hope there are no repositories with inconsistent parentage

2958

# anymore.

2959

2960

keys = {}

2961

compression_parents = set()

2962

key_dependencies = self._key_dependencies

2963

for (key, options, access_memo, parents) in records:

2964

if self._parents:

2965

parents = tuple(parents)

2966

if key_dependencies is not None:

2967

key_dependencies.add_references(key, parents)

2968

index, pos, size = access_memo

2969

if b'no-eol' in options:

2970

value = b'N'

2971

else:

2972

value = b' '

2973

value += b"%d %d" % (pos, size)

2974

if not self._deltas:

2975

if b'line-delta' in options:

2976

raise KnitCorrupt(

2977

self, "attempt to add line-delta in non-delta knit")

2978

if self._parents:

2979

if self._deltas:

2980

if b'line-delta' in options:

2981

node_refs = (parents, (parents[0],))

2982

if missing_compression_parents:

2983

compression_parents.add(parents[0])

2984

else:

2985

node_refs = (parents, ())

1282

result_list.append('.' + version)

1283

return ' '.join(result_list)

1284

1285

def add_version(self, version_id, options, pos, size, parents):

1286

"""Add a version record to the index."""

1287

self.add_versions(((version_id, options, pos, size, parents),))

1288

1289

def add_versions(self, versions):

1290

"""Add multiple versions to the index.

1291

1292

:param versions: a list of tuples:

1293

(version_id, options, pos, size, parents).

1294

"""

1295

lines = []

1296

orig_history = self._history[:]

1297

orig_cache = self._cache.copy()

1298

1299

try:

1300

for version_id, options, pos, size, parents in versions:

1301

line = "\n%s %s %s %s %s :" % (version_id,

1302

','.join(options),

1303

pos,

1304

size,

1305

self._version_list_to_index(parents))

1306

assert isinstance(line, str), \

1307

'content must be utf-8 encoded: %r' % (line,)

1308

lines.append(line)

1309

self._cache_version(version_id, options, pos, size, parents)

1310

if not self._need_to_create:

1311

self._transport.append_bytes(self._filename, ''.join(lines))

1312

else:

1313

sio = StringIO()

1314

sio.write(self.HEADER)

1315

sio.writelines(lines)

1316

sio.seek(0)

1317

self._transport.put_file_non_atomic(self._filename, sio,

1318

create_parent_dir=self._create_parent_dir,

1319

mode=self._file_mode,

1320

dir_mode=self._dir_mode)

1321

self._need_to_create = False

1322

except:

1323

# If any problems happen, restore the original values and re-raise

1324

self._history = orig_history

1325

self._cache = orig_cache

1326

raise

1327

1328

def has_version(self, version_id):

1329

"""True if the version is in the index."""

1330

return version_id in self._cache

1331

1332

def get_position(self, version_id):

1333

"""Return data position and size of specified version."""

1334

entry = self._cache[version_id]

1335

return entry[2], entry[3]

1336

1337

def get_method(self, version_id):

1338

"""Return compression method of specified version."""

1339

options = self._cache[version_id][1]

1340

if 'fulltext' in options:

1341

return 'fulltext'

1342

else:

1343

if 'line-delta' not in options:

1344

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1345

return 'line-delta'

1346

1347

def get_options(self, version_id):

1348

return self._cache[version_id][1]

1349

1350

def get_parents(self, version_id):

1351

"""Return parents of specified version ignoring ghosts."""

1352

return [parent for parent in self._cache[version_id][4]

1353

if parent in self._cache]

1354

1355

def get_parents_with_ghosts(self, version_id):

1356

"""Return parents of specified version with ghosts."""

1357

return self._cache[version_id][4]

1358

1359

def check_versions_present(self, version_ids):

1360

"""Check that all specified versions are present."""

1361

cache = self._cache

1362

for version_id in version_ids:

1363

if version_id not in cache:

1364

raise RevisionNotPresent(version_id, self._filename)

1365

1366

1367

class _KnitData(_KnitComponentFile):

1368

"""Contents of the knit data file"""

1369

1370

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1371

create_parent_dir=False, delay_create=False,

1372

dir_mode=None):

1373

_KnitComponentFile.__init__(self, transport, filename, mode,

1374

file_mode=file_mode,

1375

create_parent_dir=create_parent_dir,

1376

dir_mode=dir_mode)

1377

self._checked = False

1378

# TODO: jam 20060713 conceptually, this could spill to disk

1379

# if the cached size gets larger than a certain amount

1380

# but it complicates the model a bit, so for now just use

1381

# a simple dictionary

1382

self._cache = {}

1383

self._do_cache = False

1384

if create:

1385

if delay_create:

1386

self._need_to_create = create

1387

else:

1388

self._transport.put_bytes_non_atomic(self._filename, '',

1389

mode=self._file_mode)

1390

1391

def enable_cache(self):

1392

"""Enable caching of reads."""

1393

self._do_cache = True

1394

1395

def clear_cache(self):

1396

"""Clear the record cache."""

1397

self._do_cache = False

1398

self._cache = {}

1399

1400

def _open_file(self):

1401

try:

1402

return self._transport.get(self._filename)

1403

except NoSuchFile:

1404

pass

1405

return None

1406

1407

def _record_to_data(self, version_id, digest, lines):

1408

"""Convert version_id, digest, lines into a raw data block.

1409

1410

:return: (len, a StringIO instance with the raw data ready to read.)

1411

"""

1412

sio = StringIO()

1413

data_file = GzipFile(None, mode='wb', fileobj=sio)

1414

1415

assert isinstance(version_id, str)

1416

data_file.writelines(chain(

1417

["version %s %d %s\n" % (version_id,

1418

len(lines),

1419

digest)],

1420

lines,

1421

["end %s\n" % version_id]))

1422

data_file.close()

1423

length= sio.tell()

1424

1425

sio.seek(0)

1426

return length, sio

1427

1428

def add_raw_record(self, raw_data):

1429

"""Append a prepared record to the data file.

1430

1431

:return: the offset in the data file raw_data was written.

1432

"""

1433

assert isinstance(raw_data, str), 'data must be plain bytes'

1434

if not self._need_to_create:

1435

return self._transport.append_bytes(self._filename, raw_data)

1436

else:

1437

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1438

create_parent_dir=self._create_parent_dir,

1439

mode=self._file_mode,

1440

dir_mode=self._dir_mode)

1441

self._need_to_create = False

1442

return 0

1443

1444

def add_record(self, version_id, digest, lines):

1445

"""Write new text record to disk. Returns the position in the

1446

file where it was written."""

1447

size, sio = self._record_to_data(version_id, digest, lines)

1448

# write to disk

1449

if not self._need_to_create:

1450

start_pos = self._transport.append_file(self._filename, sio)

1451

else:

1452

self._transport.put_file_non_atomic(self._filename, sio,

1453

create_parent_dir=self._create_parent_dir,

1454

mode=self._file_mode,

1455

dir_mode=self._dir_mode)

1456

self._need_to_create = False

1457

start_pos = 0

1458

if self._do_cache:

1459

self._cache[version_id] = sio.getvalue()

1460

return start_pos, size

1461

1462

def _parse_record_header(self, version_id, raw_data):

1463

"""Parse a record header for consistency.

1464

1465

:return: the header and the decompressor stream.

1466

as (stream, header_record)

1467

"""

1468

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1469

try:

1470

rec = self._check_header(version_id, df.readline())

1471

except Exception, e:

1472

raise KnitCorrupt(self._filename,

1473

"While reading {%s} got %s(%s)"

1474

% (version_id, e.__class__.__name__, str(e)))

1475

return df, rec

1476

1477

def _check_header(self, version_id, line):

1478

rec = line.split()

1479

if len(rec) != 4:

1480

raise KnitCorrupt(self._filename,

1481

'unexpected number of elements in record header')

1482

if rec[1] != version_id:

1483

raise KnitCorrupt(self._filename,

1484

'unexpected version, wanted %r, got %r'

1485

% (version_id, rec[1]))

1486

return rec

1487

1488

def _parse_record(self, version_id, data):

1489

# profiling notes:

1490

# 4168 calls in 2880 217 internal

1491

# 4168 calls to _parse_record_header in 2121

1492

# 4168 calls to readlines in 330

1493

df = GzipFile(mode='rb', fileobj=StringIO(data))

1494

1495

try:

1496

record_contents = df.readlines()

1497

except Exception, e:

1498

raise KnitCorrupt(self._filename,

1499

"While reading {%s} got %s(%s)"

1500

% (version_id, e.__class__.__name__, str(e)))

1501

header = record_contents.pop(0)

1502

rec = self._check_header(version_id, header)

1503

1504

last_line = record_contents.pop()

1505

if len(record_contents) != int(rec[2]):

1506

raise KnitCorrupt(self._filename,

1507

'incorrect number of lines %s != %s'

1508

' for version {%s}'

1509

% (len(record_contents), int(rec[2]),

1510

version_id))

1511

if last_line != 'end %s\n' % rec[1]:

1512

raise KnitCorrupt(self._filename,

1513

'unexpected version end line %r, wanted %r'

1514

% (last_line, version_id))

1515

df.close()

1516

return record_contents, rec[3]

1517

1518

def read_records_iter_raw(self, records):

1519

"""Read text records from data file and yield raw data.

1520

1521

This unpacks enough of the text record to validate the id is

1522

as expected but thats all.

1523

"""

1524

# setup an iterator of the external records:

1525

# uses readv so nice and fast we hope.

1526

if len(records):

1527

# grab the disk data needed.

1528

if self._cache:

1529

# Don't check _cache if it is empty

1530

needed_offsets = [(pos, size) for version_id, pos, size

1531

in records

1532

if version_id not in self._cache]

1533

else:

1534

needed_offsets = [(pos, size) for version_id, pos, size

1535

in records]

1536

1537

raw_records = self._transport.readv(self._filename, needed_offsets)

1538

1539

for version_id, pos, size in records:

1540

if version_id in self._cache:

1541

# This data has already been validated

1542

data = self._cache[version_id]

1543

else:

1544

pos, data = raw_records.next()

1545

if self._do_cache:

1546

self._cache[version_id] = data

1547

1548

# validate the header

1549

df, rec = self._parse_record_header(version_id, data)

1550

df.close()

1551

yield version_id, data

1552

1553

def read_records_iter(self, records):

1554

"""Read text records from data file and yield result.

1555

1556

The result will be returned in whatever is the fastest to read.

1557

Not by the order requested. Also, multiple requests for the same

1558

record will only yield 1 response.

1559

:param records: A list of (version_id, pos, len) entries

1560

:return: Yields (version_id, contents, digest) in the order

1561

read, not the order requested

1562

"""

1563

if not records:

1564

return

1565

1566

if self._cache:

1567

# Skip records we have alread seen

1568

yielded_records = set()

1569

needed_records = set()

1570

for record in records:

1571

if record[0] in self._cache:

1572

if record[0] in yielded_records:

1573

continue

1574

yielded_records.add(record[0])

1575

data = self._cache[record[0]]

1576

content, digest = self._parse_record(record[0], data)

1577

yield (record[0], content, digest)

2986

1578

else:

2987

node_refs = (parents, )

2988

else:

2989

if parents:

2990

raise KnitCorrupt(self, "attempt to add node with parents "

2991

"in parentless index.")

2992

node_refs = ()

2993

keys[key] = (value, node_refs)

2994

# check for dups

2995

if not random_id:

2996

present_nodes = self._get_entries(keys)

2997

for (index, key, value, node_refs) in present_nodes:

2998

parents = node_refs[:1]

2999

# Sometimes these are passed as a list rather than a tuple

3000

passed = static_tuple.as_tuples(keys[key])

3001

passed_parents = passed[1][:1]

3002

if (value[0:1] != keys[key][0][0:1]

3003

or parents != passed_parents):

3004

node_refs = static_tuple.as_tuples(node_refs)

3005

raise KnitCorrupt(self, "inconsistent details in add_records"

3006

": %s %s" % ((value, node_refs), passed))

3007

del keys[key]

3008

result = []

3009

if self._parents:

3010

for key, (value, node_refs) in keys.items():

3011

result.append((key, value, node_refs))

3012

else:

3013

for key, (value, node_refs) in keys.items():

3014

result.append((key, value))

3015

self._add_callback(result)

3016

if missing_compression_parents:

3017

# This may appear to be incorrect (it does not check for

3018

# compression parents that are in the existing graph index),

3019

# but such records won't have been buffered, so this is

3020

# actually correct: every entry when

3021

# missing_compression_parents==True either has a missing parent, or

3022

# a parent that is one of the keys in records.

3023

compression_parents.difference_update(keys)

3024

self._missing_compression_parents.update(compression_parents)

3025

# Adding records may have satisfied missing compression parents.

3026

self._missing_compression_parents.difference_update(keys)

3027

3028

def scan_unvalidated_index(self, graph_index):

3029

"""Inform this _KnitGraphIndex that there is an unvalidated index.

3030

3031

This allows this _KnitGraphIndex to keep track of any missing

3032

compression parents we may want to have filled in to make those

3033

indices valid.

3034

3035

:param graph_index: A GraphIndex

3036

"""

3037

if self._deltas:

3038

new_missing = graph_index.external_references(ref_list_num=1)

3039

new_missing.difference_update(self.get_parent_map(new_missing))

3040

self._missing_compression_parents.update(new_missing)

3041

if self._key_dependencies is not None:

3042

# Add parent refs from graph_index (and discard parent refs that

3043

# the graph_index has).

3044

for node in graph_index.iter_all_entries():

3045

self._key_dependencies.add_references(node[1], node[3][0])

3046

3047

def get_missing_compression_parents(self):

3048

"""Return the keys of missing compression parents.

3049

3050

Missing compression parents occur when a record stream was missing

3051

basis texts, or a index was scanned that had missing basis texts.

3052

"""

3053

return frozenset(self._missing_compression_parents)

3054

3055

def get_missing_parents(self):

3056

"""Return the keys of missing parents."""

3057

# If updating this, you should also update

3058

# groupcompress._GCGraphIndex.get_missing_parents

3059

# We may have false positives, so filter those out.

3060

self._key_dependencies.satisfy_refs_for_keys(

3061

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

3062

return frozenset(self._key_dependencies.get_unsatisfied_refs())

3063

3064

def _check_read(self):

3065

"""raise if reads are not permitted."""

3066

if not self._is_locked():

3067

raise errors.ObjectNotLocked(self)

3068

3069

def _check_write_ok(self):

3070

"""Assert if writes are not permitted."""

3071

if not self._is_locked():

3072

raise errors.ObjectNotLocked(self)

3073

3074

def _compression_parent(self, an_entry):

3075

# return the key that an_entry is compressed against, or None

3076

# Grab the second parent list (as deltas implies parents currently)

3077

compression_parents = an_entry[3][1]

3078

if not compression_parents:

3079

return None

3080

if len(compression_parents) != 1:

3081

raise AssertionError(

3082

"Too many compression parents: %r" % compression_parents)

3083

return compression_parents[0]

3084

3085

def get_build_details(self, keys):

3086

"""Get the method, index_memo and compression parent for version_ids.

3087

3088

Ghosts are omitted from the result.

3089

3090

:param keys: An iterable of keys.

3091

:return: A dict of key:

3092

(index_memo, compression_parent, parents, record_details).

3093

index_memo

3094

opaque structure to pass to read_records to extract the raw

3095

data

3096

compression_parent

3097

Content that this record is built upon, may be None

3098

parents

3099

Logical parents of this node

3100

record_details

3101

extra information about the content which needs to be passed to

3102

Factory.parse_record

3103

"""

3104

self._check_read()

3105

result = {}

3106

entries = self._get_entries(keys, False)

3107

for entry in entries:

3108

key = entry[1]

3109

if not self._parents:

3110

parents = ()

3111

else:

3112

parents = entry[3][0]

3113

if not self._deltas:

3114

compression_parent_key = None

3115

else:

3116

compression_parent_key = self._compression_parent(entry)

3117

noeol = (entry[2][0:1] == b'N')

3118

if compression_parent_key:

3119

method = 'line-delta'

3120

else:

3121

method = 'fulltext'

3122

result[key] = (self._node_to_position(entry),

3123

compression_parent_key, parents,

3124

(method, noeol))

3125

return result

3126

3127

def _get_entries(self, keys, check_present=False):

3128

"""Get the entries for keys.

3129

3130

:param keys: An iterable of index key tuples.

3131

"""

3132

keys = set(keys)

3133

found_keys = set()

3134

if self._parents:

3135

for node in self._graph_index.iter_entries(keys):

3136

yield node

3137

found_keys.add(node[1])

3138

else:

3139

# adapt parentless index to the rest of the code.

3140

for node in self._graph_index.iter_entries(keys):

3141

yield node[0], node[1], node[2], ()

3142

found_keys.add(node[1])

3143

if check_present:

3144

missing_keys = keys.difference(found_keys)

3145

if missing_keys:

3146

raise RevisionNotPresent(missing_keys.pop(), self)

3147

3148

def get_method(self, key):

3149

"""Return compression method of specified key."""

3150

return self._get_method(self._get_node(key))

3151

3152

def _get_method(self, node):

3153

if not self._deltas:

3154

return 'fulltext'

3155

if self._compression_parent(node):

3156

return 'line-delta'

3157

else:

3158

return 'fulltext'

3159

3160

def _get_node(self, key):

3161

try:

3162

return list(self._get_entries([key]))[0]

3163

except IndexError:

3164

raise RevisionNotPresent(key, self)

3165

3166

def get_options(self, key):

3167

"""Return a list representing options.

3168

3169

e.g. ['foo', 'bar']

3170

"""

3171

node = self._get_node(key)

3172

options = [self._get_method(node).encode('ascii')]

3173

if node[2][0:1] == b'N':

3174

options.append(b'no-eol')

3175

return options

3176

3177

def find_ancestry(self, keys):

3178

"""See CombinedGraphIndex.find_ancestry()"""

3179

return self._graph_index.find_ancestry(keys, 0)

3180

3181

def get_parent_map(self, keys):

3182

"""Get a map of the parents of keys.

3183

3184

:param keys: The keys to look up parents for.

3185

:return: A mapping from keys to parents. Absent keys are absent from

3186

the mapping.

3187

"""

3188

self._check_read()

3189

nodes = self._get_entries(keys)

3190

result = {}

3191

if self._parents:

3192

for node in nodes:

3193

result[node[1]] = node[3][0]

3194

else:

3195

for node in nodes:

3196

result[node[1]] = None

3197

return result

3198

3199

def get_position(self, key):

3200

"""Return details needed to access the version.

3201

3202

:return: a tuple (index, data position, size) to hand to the access

3203

logic to get the record.

3204

"""

3205

node = self._get_node(key)

3206

return self._node_to_position(node)

3207

3208

__contains__ = _mod_index._has_key_from_parent_map

3209

3210

def keys(self):

3211

"""Get all the keys in the collection.

3212

3213

The keys are not ordered.

3214

"""

3215

self._check_read()

3216

return [node[1] for node in self._graph_index.iter_all_entries()]

3217

3218

missing_keys = _mod_index._missing_keys_from_parent_map

3219

3220

def _node_to_position(self, node):

3221

"""Convert an index value to position details."""

3222

bits = node[2][1:].split(b' ')

3223

return node[0], int(bits[0]), int(bits[1])

3224

3225

def _sort_keys_by_io(self, keys, positions):

3226

"""Figure out an optimal order to read the records for the given keys.

3227

3228

Sort keys, grouped by index and sorted by position.

3229

3230

:param keys: A list of keys whose records we want to read. This will be

3231

sorted 'in-place'.

3232

:param positions: A dict, such as the one returned by

3233

_get_components_positions()

3234

:return: None

3235

"""

3236

def get_index_memo(key):

3237

# index_memo is at offset [1]. It is made up of (GraphIndex,

3238

# position, size). GI is an object, which will be unique for each

3239

# pack file. This causes us to group by pack file, then sort by

3240

# position. Size doesn't matter, but it isn't worth breaking up the

3241

# tuple.

3242

return positions[key][1]

3243

return keys.sort(key=get_index_memo)

3244

3245

_get_total_build_size = _get_total_build_size

3246

3247

3248

class _KnitKeyAccess(object):

3249

"""Access to records in .knit files."""

3250

3251

def __init__(self, transport, mapper):

3252

"""Create a _KnitKeyAccess with transport and mapper.

3253

3254

:param transport: The transport the access object is rooted at.

3255

:param mapper: The mapper used to map keys to .knit files.

3256

"""

3257

self._transport = transport

3258

self._mapper = mapper

3259

3260

def add_raw_record(self, key, size, raw_data):

3261

"""Add raw knit bytes to a storage area.

3262

3263

The data is spooled to the container writer in one bytes-record per

3264

raw data item.

3265

3266

:param key: The key of the raw data segment

3267

:param size: The size of the raw data segment

3268

:param raw_data: A chunked bytestring containing the data.

3269

:return: opaque index memo to retrieve the record later.

3270

For _KnitKeyAccess the memo is (key, pos, length), where the key is

3271

the record key.

3272

"""

3273

path = self._mapper.map(key)

3274

try:

3275

base = self._transport.append_bytes(path + '.knit', b''.join(raw_data))

3276

except errors.NoSuchFile:

3277

self._transport.mkdir(osutils.dirname(path))

3278

base = self._transport.append_bytes(path + '.knit', b''.join(raw_data))

3279

# if base == 0:

3280

# chmod.

3281

return (key, base, size)

3282

3283

def add_raw_records(self, key_sizes, raw_data):

3284

"""Add raw knit bytes to a storage area.

3285

3286

The data is spooled to the container writer in one bytes-record per

3287

raw data item.

3288

3289

:param sizes: An iterable of tuples containing the key and size of each

3290

raw data segment.

3291

:param raw_data: A chunked bytestring containing the data.

3292

:return: A list of memos to retrieve the record later. Each memo is an

3293

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3294

length), where the key is the record key.

3295

"""

3296

raw_data = b''.join(raw_data)

3297

if not isinstance(raw_data, bytes):

3298

raise AssertionError(

3299

'data must be plain bytes was %s' % type(raw_data))

3300

result = []

3301

offset = 0

3302

# TODO: This can be tuned for writing to sftp and other servers where

3303

# append() is relatively expensive by grouping the writes to each key

3304

# prefix.

3305

for key, size in key_sizes:

3306

record_bytes = [raw_data[offset:offset + size]]

3307

result.append(self.add_raw_record(key, size, record_bytes))

3308

offset += size

3309

return result

3310

3311

def flush(self):

3312

"""Flush pending writes on this access object.

3313

3314

For .knit files this is a no-op.

3315

"""

3316

pass

3317

3318

def get_raw_records(self, memos_for_retrieval):

3319

"""Get the raw bytes for a records.

3320

3321

:param memos_for_retrieval: An iterable containing the access memo for

3322

retrieving the bytes.

3323

:return: An iterator over the bytes of the records.

3324

"""

3325

# first pass, group into same-index request to minimise readv's issued.

3326

request_lists = []

3327

current_prefix = None

3328

for (key, offset, length) in memos_for_retrieval:

3329

if current_prefix == key[:-1]:

3330

current_list.append((offset, length))

3331

else:

3332

if current_prefix is not None:

3333

request_lists.append((current_prefix, current_list))

3334

current_prefix = key[:-1]

3335

current_list = [(offset, length)]

3336

# handle the last entry

3337

if current_prefix is not None:

3338

request_lists.append((current_prefix, current_list))

3339

for prefix, read_vector in request_lists:

3340

path = self._mapper.map(prefix) + '.knit'

3341

for pos, data in self._transport.readv(path, read_vector):

3342

yield data

3343

3344

3345

def annotate_knit(knit, revision_id):

3346

"""Annotate a knit with no cached annotations.

3347

3348

This implementation is for knits with no cached annotations.

3349

It will work for knits with cached annotations, but this is not

3350

recommended.

1579

needed_records.add(record)

1580

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1581

else:

1582

needed_records = sorted(set(records), key=operator.itemgetter(1))

1583

1584

if not needed_records:

1585

return

1586

1587

# The transport optimizes the fetching as well

1588

# (ie, reads continuous ranges.)

1589

readv_response = self._transport.readv(self._filename,

1590

[(pos, size) for version_id, pos, size in needed_records])

1591

1592

for (version_id, pos, size), (pos, data) in \

1593

izip(iter(needed_records), readv_response):

1594

content, digest = self._parse_record(version_id, data)

1595

if self._do_cache:

1596

self._cache[version_id] = data

1597

yield version_id, content, digest

1598

1599

def read_records(self, records):

1600

"""Read records into a dictionary."""

1601

components = {}

1602

for record_id, content, digest in \

1603

self.read_records_iter(records):

1604

components[record_id] = (content, digest)

1605

return components

1606

1607

1608

class InterKnit(InterVersionedFile):

1609

"""Optimised code paths for knit to knit operations."""

1610

1611

_matching_file_from_factory = KnitVersionedFile

1612

_matching_file_to_factory = KnitVersionedFile

1613

1614

@staticmethod

1615

def is_compatible(source, target):

1616

"""Be compatible with knits. """

1617

try:

1618

return (isinstance(source, KnitVersionedFile) and

1619

isinstance(target, KnitVersionedFile))

1620

except AttributeError:

1621

return False

1622

1623

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1624

"""See InterVersionedFile.join."""

1625

assert isinstance(self.source, KnitVersionedFile)

1626

assert isinstance(self.target, KnitVersionedFile)

1627

1628

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1629

1630

if not version_ids:

1631

return 0

1632

1633

pb = ui.ui_factory.nested_progress_bar()

1634

try:

1635

version_ids = list(version_ids)

1636

if None in version_ids:

1637

version_ids.remove(None)

1638

1639

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1640

this_versions = set(self.target._index.get_versions())

1641

needed_versions = self.source_ancestry - this_versions

1642

cross_check_versions = self.source_ancestry.intersection(this_versions)

1643

mismatched_versions = set()

1644

for version in cross_check_versions:

1645

# scan to include needed parents.

1646

n1 = set(self.target.get_parents_with_ghosts(version))

1647

n2 = set(self.source.get_parents_with_ghosts(version))

1648

if n1 != n2:

1649

# FIXME TEST this check for cycles being introduced works

1650

# the logic is we have a cycle if in our graph we are an

1651

# ancestor of any of the n2 revisions.

1652

for parent in n2:

1653

if parent in n1:

1654

# safe

1655

continue

1656

else:

1657

parent_ancestors = self.source.get_ancestry(parent)

1658

if version in parent_ancestors:

1659

raise errors.GraphCycleError([parent, version])

1660

# ensure this parent will be available later.

1661

new_parents = n2.difference(n1)

1662

needed_versions.update(new_parents.difference(this_versions))

1663

mismatched_versions.add(version)

1664

1665

if not needed_versions and not mismatched_versions:

1666

return 0

1667

full_list = topo_sort(self.source.get_graph())

1668

1669

version_list = [i for i in full_list if (not self.target.has_version(i)

1670

and i in needed_versions)]

1671

1672

# plan the join:

1673

copy_queue = []

1674

copy_queue_records = []

1675

copy_set = set()

1676

for version_id in version_list:

1677

options = self.source._index.get_options(version_id)

1678

parents = self.source._index.get_parents_with_ghosts(version_id)

1679

# check that its will be a consistent copy:

1680

for parent in parents:

1681

# if source has the parent, we must :

1682

# * already have it or

1683

# * have it scheduled already

1684

# otherwise we don't care

1685

assert (self.target.has_version(parent) or

1686

parent in copy_set or

1687

not self.source.has_version(parent))

1688

data_pos, data_size = self.source._index.get_position(version_id)

1689

copy_queue_records.append((version_id, data_pos, data_size))

1690

copy_queue.append((version_id, options, parents))

1691

copy_set.add(version_id)

1692

1693

# data suck the join:

1694

count = 0

1695

total = len(version_list)

1696

raw_datum = []

1697

raw_records = []

1698

for (version_id, raw_data), \

1699

(version_id2, options, parents) in \

1700

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1701

copy_queue):

1702

assert version_id == version_id2, 'logic error, inconsistent results'

1703

count = count + 1

1704

pb.update("Joining knit", count, total)

1705

raw_records.append((version_id, options, parents, len(raw_data)))

1706

raw_datum.append(raw_data)

1707

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1708

1709

for version in mismatched_versions:

1710

# FIXME RBC 20060309 is this needed?

1711

n1 = set(self.target.get_parents_with_ghosts(version))

1712

n2 = set(self.source.get_parents_with_ghosts(version))

1713

# write a combined record to our history preserving the current

1714

# parents as first in the list

1715

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1716

self.target.fix_parents(version, new_parents)

1717

return count

1718

finally:

1719

pb.finished()

1720

1721

1722

InterVersionedFile.register_optimiser(InterKnit)

1723

1724

1725

class WeaveToKnit(InterVersionedFile):

1726

"""Optimised code paths for weave to knit operations."""

1727

1728

_matching_file_from_factory = bzrlib.weave.WeaveFile

1729

_matching_file_to_factory = KnitVersionedFile

1730

1731

@staticmethod

1732

def is_compatible(source, target):

1733

"""Be compatible with weaves to knits."""

1734

try:

1735

return (isinstance(source, bzrlib.weave.Weave) and

1736

isinstance(target, KnitVersionedFile))

1737

except AttributeError:

1738

return False

1739

1740

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1741

"""See InterVersionedFile.join."""

1742

assert isinstance(self.source, bzrlib.weave.Weave)

1743

assert isinstance(self.target, KnitVersionedFile)

1744

1745

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1746

1747

if not version_ids:

1748

return 0

1749

1750

pb = ui.ui_factory.nested_progress_bar()

1751

try:

1752

version_ids = list(version_ids)

1753

1754

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1755

this_versions = set(self.target._index.get_versions())

1756

needed_versions = self.source_ancestry - this_versions

1757

cross_check_versions = self.source_ancestry.intersection(this_versions)

1758

mismatched_versions = set()

1759

for version in cross_check_versions:

1760

# scan to include needed parents.

1761

n1 = set(self.target.get_parents_with_ghosts(version))

1762

n2 = set(self.source.get_parents(version))

1763

# if all of n2's parents are in n1, then its fine.

1764

if n2.difference(n1):

1765

# FIXME TEST this check for cycles being introduced works

1766

# the logic is we have a cycle if in our graph we are an

1767

# ancestor of any of the n2 revisions.

1768

for parent in n2:

1769

if parent in n1:

1770

# safe

1771

continue

1772

else:

1773

parent_ancestors = self.source.get_ancestry(parent)

1774

if version in parent_ancestors:

1775

raise errors.GraphCycleError([parent, version])

1776

# ensure this parent will be available later.

1777

new_parents = n2.difference(n1)

1778

needed_versions.update(new_parents.difference(this_versions))

1779

mismatched_versions.add(version)

1780

1781

if not needed_versions and not mismatched_versions:

1782

return 0

1783

full_list = topo_sort(self.source.get_graph())

1784

1785

version_list = [i for i in full_list if (not self.target.has_version(i)

1786

and i in needed_versions)]

1787

1788

# do the join:

1789

count = 0

1790

total = len(version_list)

1791

for version_id in version_list:

1792

pb.update("Converting to knit", count, total)

1793

parents = self.source.get_parents(version_id)

1794

# check that its will be a consistent copy:

1795

for parent in parents:

1796

# if source has the parent, we must already have it

1797

assert (self.target.has_version(parent))

1798

self.target.add_lines(

1799

version_id, parents, self.source.get_lines(version_id))

1800

count = count + 1

1801

1802

for version in mismatched_versions:

1803

# FIXME RBC 20060309 is this needed?

1804

n1 = set(self.target.get_parents_with_ghosts(version))

1805

n2 = set(self.source.get_parents(version))

1806

# write a combined record to our history preserving the current

1807

# parents as first in the list

1808

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1809

self.target.fix_parents(version, new_parents)

1810

return count

1811

finally:

1812

pb.finished()

1813

1814

1815

InterVersionedFile.register_optimiser(WeaveToKnit)

1816

1817

1818

class KnitSequenceMatcher(difflib.SequenceMatcher):

1819

"""Knit tuned sequence matcher.

1820

1821

This is based on profiling of difflib which indicated some improvements

1822

for our usage pattern.

3351

1823

"""

3352

annotator = _KnitAnnotator(knit)

3353

return iter(annotator.annotate_flat(revision_id))

3354

3355

3356

class _KnitAnnotator(annotate.Annotator):

3357

"""Build up the annotations for a text."""

3358

3359

def __init__(self, vf):

3360

annotate.Annotator.__init__(self, vf)

3361

3362

# TODO: handle Nodes which cannot be extracted

3363

# self._ghosts = set()

3364

3365

# Map from (key, parent_key) => matching_blocks, should be 'use once'

3366

self._matching_blocks = {}

3367

3368

# KnitContent objects

3369

self._content_objects = {}

3370

# The number of children that depend on this fulltext content object

3371

self._num_compression_children = {}

3372

# Delta records that need their compression parent before they can be

3373

# expanded

3374

self._pending_deltas = {}

3375

# Fulltext records that are waiting for their parents fulltexts before

3376

# they can be yielded for annotation

3377

self._pending_annotation = {}

3378

3379

self._all_build_details = {}

3380

3381

def _get_build_graph(self, key):

3382

"""Get the graphs for building texts and annotations.

3383

3384

The data you need for creating a full text may be different than the

3385

data you need to annotate that text. (At a minimum, you need both

3386

parents to create an annotation, but only need 1 parent to generate the

3387

fulltext.)

3388

3389

:return: A list of (key, index_memo) records, suitable for

3390

passing to read_records_iter to start reading in the raw data from

3391

the pack file.

1824

1825

def find_longest_match(self, alo, ahi, blo, bhi):

1826

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1827

1828

If isjunk is not defined:

1829

1830

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1831

alo <= i <= i+k <= ahi

1832

blo <= j <= j+k <= bhi

1833

and for all (i',j',k') meeting those conditions,

1834

k >= k'

1835

i <= i'

1836

and if i == i', j <= j'

1837

1838

In other words, of all maximal matching blocks, return one that

1839

starts earliest in a, and of all those maximal matching blocks that

1840

start earliest in a, return the one that starts earliest in b.

1841

1842

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1843

>>> s.find_longest_match(0, 5, 0, 9)

1844

(0, 4, 5)

1845

1846

If isjunk is defined, first the longest matching block is

1847

determined as above, but with the additional restriction that no

1848

junk element appears in the block. Then that block is extended as

1849

far as possible by matching (only) junk elements on both sides. So

1850

the resulting block never matches on junk except as identical junk

1851

happens to be adjacent to an "interesting" match.

1852

1853

Here's the same example as before, but considering blanks to be

1854

junk. That prevents " abcd" from matching the " abcd" at the tail

1855

end of the second sequence directly. Instead only the "abcd" can

1856

match, and matches the leftmost "abcd" in the second sequence:

1857

1858

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1859

>>> s.find_longest_match(0, 5, 0, 9)

1860

(1, 0, 4)

1861

1862

If no blocks match, return (alo, blo, 0).

1863

1864

>>> s = SequenceMatcher(None, "ab", "c")

1865

>>> s.find_longest_match(0, 2, 0, 1)

1866

(0, 0, 0)

3392

1867

"""

3393

pending = {key}

3394

records = []

3395

ann_keys = set()

3396

self._num_needed_children[key] = 1

3397

while pending:

3398

# get all pending nodes

3399

this_iteration = pending

3400

build_details = self._vf._index.get_build_details(this_iteration)

3401

self._all_build_details.update(build_details)

3402

# new_nodes = self._vf._index._get_entries(this_iteration)

3403

pending = set()

3404

for key, details in build_details.items():

3405

(index_memo, compression_parent, parent_keys,

3406

record_details) = details

3407

self._parent_map[key] = parent_keys

3408

self._heads_provider = None

3409

records.append((key, index_memo))

3410

# Do we actually need to check _annotated_lines?

3411

pending.update([p for p in parent_keys

3412

if p not in self._all_build_details])

3413

if parent_keys:

3414

for parent_key in parent_keys:

3415

if parent_key in self._num_needed_children:

3416

self._num_needed_children[parent_key] += 1

3417

else:

3418

self._num_needed_children[parent_key] = 1

3419

if compression_parent:

3420

if compression_parent in self._num_compression_children:

3421

self._num_compression_children[compression_parent] += 1

3422

else:

3423

self._num_compression_children[compression_parent] = 1

3424

3425

missing_versions = this_iteration.difference(build_details)

3426

if missing_versions:

3427

for key in missing_versions:

3428

if key in self._parent_map and key in self._text_cache:

3429

# We already have this text ready, we just need to

3430

# yield it later so we get it annotated

3431

ann_keys.add(key)

3432

parent_keys = self._parent_map[key]

3433

for parent_key in parent_keys:

3434

if parent_key in self._num_needed_children:

3435

self._num_needed_children[parent_key] += 1

3436

else:

3437

self._num_needed_children[parent_key] = 1

3438

pending.update([p for p in parent_keys

3439

if p not in self._all_build_details])

3440

else:

3441

raise errors.RevisionNotPresent(key, self._vf)

3442

# Generally we will want to read the records in reverse order, because

3443

# we find the parent nodes after the children

3444

records.reverse()

3445

return records, ann_keys

3446

3447

def _get_needed_texts(self, key, pb=None):

3448

# if True or len(self._vf._immediate_fallback_vfs) > 0:

3449

if len(self._vf._immediate_fallback_vfs) > 0:

3450

# If we have fallbacks, go to the generic path

3451

for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):

3452

yield v

3453

return

3454

while True:

1868

1869

# CAUTION: stripping common prefix or suffix would be incorrect.

1870

# E.g.,

1871

# ab

1872

# acab

1873

# Longest matching block is "ab", but if common prefix is

1874

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1875

# strip, so ends up claiming that ab is changed to acab by

1876

# inserting "ca" in the middle. That's minimal but unintuitive:

1877

# "it's obvious" that someone inserted "ac" at the front.

1878

# Windiff ends up at the same place as diff, but by pairing up

1879

# the unique 'b's and then matching the first two 'a's.

1880

1881

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1882

besti, bestj, bestsize = alo, blo, 0

1883

# find longest junk-free match

1884

# during an iteration of the loop, j2len[j] = length of longest

1885

# junk-free match ending with a[i-1] and b[j]

1886

j2len = {}

1887

# nothing = []

1888

b2jget = b2j.get

1889

for i in xrange(alo, ahi):

1890

# look at all instances of a[i] in b; note that because

1891

# b2j has no junk keys, the loop is skipped if a[i] is junk

1892

j2lenget = j2len.get

1893

newj2len = {}

1894

1895

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1896

# following improvement

1897

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1898

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1899

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1900

# to

1901

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1902

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1903

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1904

3455

1905

try:

3456

records, ann_keys = self._get_build_graph(key)

3457

for idx, (sub_key, text, num_lines) in enumerate(

3458

self._extract_texts(records)):

3459

if pb is not None:

3460

pb.update(gettext('annotating'), idx, len(records))

3461

yield sub_key, text, num_lines

3462

for sub_key in ann_keys:

3463

text = self._text_cache[sub_key]

3464

num_lines = len(text) # bad assumption

3465

yield sub_key, text, num_lines

3466

return

3467

except errors.RetryWithNewPacks as e:

3468

self._vf._access.reload_or_raise(e)

3469

# The cached build_details are no longer valid

3470

self._all_build_details.clear()

3471

3472

def _cache_delta_blocks(self, key, compression_parent, delta, lines):

3473

parent_lines = self._text_cache[compression_parent]

3474

blocks = list(KnitContent.get_line_delta_blocks(

3475

delta, parent_lines, lines))

3476

self._matching_blocks[(key, compression_parent)] = blocks

3477

3478

def _expand_record(self, key, parent_keys, compression_parent, record,

3479

record_details):

3480

delta = None

3481

if compression_parent:

3482

if compression_parent not in self._content_objects:

3483

# Waiting for the parent

3484

self._pending_deltas.setdefault(compression_parent, []).append(

3485

(key, parent_keys, record, record_details))

3486

return None

3487

# We have the basis parent, so expand the delta

3488

num = self._num_compression_children[compression_parent]

3489

num -= 1

3490

if num == 0:

3491

base_content = self._content_objects.pop(compression_parent)

3492

self._num_compression_children.pop(compression_parent)

1906

js = b2j[a[i]]

1907

except KeyError:

1908

pass

3493

1909

else:

3494

self._num_compression_children[compression_parent] = num

3495

base_content = self._content_objects[compression_parent]

3496

# It is tempting to want to copy_base_content=False for the last

3497

# child object. However, whenever noeol=False,

3498

# self._text_cache[parent_key] is content._lines. So mutating it

3499

# gives very bad results.

3500

# The alternative is to copy the lines into text cache, but then we

3501

# are copying anyway, so just do it here.

3502

content, delta = self._vf._factory.parse_record(

3503

key, record, record_details, base_content,

3504

copy_base_content=True)

3505

else:

3506

# Fulltext record

3507

content, _ = self._vf._factory.parse_record(

3508

key, record, record_details, None)

3509

if self._num_compression_children.get(key, 0) > 0:

3510

self._content_objects[key] = content

3511

lines = content.text()

3512

self._text_cache[key] = lines

3513

if delta is not None:

3514

self._cache_delta_blocks(key, compression_parent, delta, lines)

3515

return lines

3516

3517

def _get_parent_annotations_and_matches(self, key, text, parent_key):

3518

"""Get the list of annotations for the parent, and the matching lines.

3519

3520

:param text: The opaque value given by _get_needed_texts

3521

:param parent_key: The key for the parent text

3522

:return: (parent_annotations, matching_blocks)

3523

parent_annotations is a list as long as the number of lines in

3524

parent

3525

matching_blocks is a list of (parent_idx, text_idx, len) tuples

3526

indicating which lines match between the two texts

3527

"""

3528

block_key = (key, parent_key)

3529

if block_key in self._matching_blocks:

3530

blocks = self._matching_blocks.pop(block_key)

3531

parent_annotations = self._annotations_cache[parent_key]

3532

return parent_annotations, blocks

3533

return annotate.Annotator._get_parent_annotations_and_matches(self,

3534

key, text, parent_key)

3535

3536

def _process_pending(self, key):

3537

"""The content for 'key' was just processed.

3538

3539

Determine if there is any more pending work to be processed.

3540

"""

3541

to_return = []

3542

if key in self._pending_deltas:

3543

compression_parent = key

3544

children = self._pending_deltas.pop(key)

3545

for child_key, parent_keys, record, record_details in children:

3546

lines = self._expand_record(child_key, parent_keys,

3547

compression_parent,

3548

record, record_details)

3549

if self._check_ready_for_annotations(child_key, parent_keys):

3550

to_return.append(child_key)

3551

# Also check any children that are waiting for this parent to be

3552

# annotation ready

3553

if key in self._pending_annotation:

3554

children = self._pending_annotation.pop(key)

3555

to_return.extend([c for c, p_keys in children

3556

if self._check_ready_for_annotations(c, p_keys)])

3557

return to_return

3558

3559

def _check_ready_for_annotations(self, key, parent_keys):

3560

"""return true if this text is ready to be yielded.

3561

3562

Otherwise, this will return False, and queue the text into

3563

self._pending_annotation

3564

"""

3565

for parent_key in parent_keys:

3566

if parent_key not in self._annotations_cache:

3567

# still waiting on at least one parent text, so queue it up

3568

# Note that if there are multiple parents, we need to wait

3569

# for all of them.

3570

self._pending_annotation.setdefault(parent_key,

3571

[]).append((key, parent_keys))

3572

return False

3573

return True

3574

3575

def _extract_texts(self, records):

3576

"""Extract the various texts needed based on records"""

3577

# We iterate in the order read, rather than a strict order requested

3578

# However, process what we can, and put off to the side things that

3579

# still need parents, cleaning them up when those parents are

3580

# processed.

3581

# Basic data flow:

3582

# 1) As 'records' are read, see if we can expand these records into

3583

# Content objects (and thus lines)

3584

# 2) If a given line-delta is waiting on its compression parent, it

3585

# gets queued up into self._pending_deltas, otherwise we expand

3586

# it, and put it into self._text_cache and self._content_objects

3587

# 3) If we expanded the text, we will then check to see if all

3588

# parents have also been processed. If so, this text gets yielded,

3589

# else this record gets set aside into pending_annotation

3590

# 4) Further, if we expanded the text in (2), we will then check to

3591

# see if there are any children in self._pending_deltas waiting to

3592

# also be processed. If so, we go back to (2) for those

3593

# 5) Further again, if we yielded the text, we can then check if that

3594

# 'unlocks' any of the texts in pending_annotations, which should

3595

# then get yielded as well

3596

# Note that both steps 4 and 5 are 'recursive' in that unlocking one

3597

# compression child could unlock yet another, and yielding a fulltext

3598

# will also 'unlock' the children that are waiting on that annotation.

3599

# (Though also, unlocking 1 parent's fulltext, does not unlock a child

3600

# if other parents are also waiting.)

3601

# We want to yield content before expanding child content objects, so

3602

# that we know when we can re-use the content lines, and the annotation

3603

# code can know when it can stop caching fulltexts, as well.

3604

3605

# Children that are missing their compression parent

3606

pending_deltas = {}

3607

for (key, record, digest) in self._vf._read_records_iter(records):

3608

# ghosts?

3609

details = self._all_build_details[key]

3610

(_, compression_parent, parent_keys, record_details) = details

3611

lines = self._expand_record(key, parent_keys, compression_parent,

3612

record, record_details)

3613

if lines is None:

3614

# Pending delta should be queued up

3615

continue

3616

# At this point, we may be able to yield this content, if all

3617

# parents are also finished

3618

yield_this_text = self._check_ready_for_annotations(key,

3619

parent_keys)

3620

if yield_this_text:

3621

# All parents present

3622

yield key, lines, len(lines)

3623

to_process = self._process_pending(key)

3624

while to_process:

3625

this_process = to_process

3626

to_process = []

3627

for key in this_process:

3628

lines = self._text_cache[key]

3629

yield key, lines, len(lines)

3630

to_process.extend(self._process_pending(key))

3631

3632

3633

try:

3634

from ._knit_load_data_pyx import _load_data_c as _load_data

3635

except ImportError as e:

3636

osutils.failed_to_load_extension(e)

3637

from ._knit_load_data_py import _load_data_py as _load_data

1910

for j in js:

1911

# a[i] matches b[j]

1912

if j >= blo:

1913

if j >= bhi:

1914

break

1915

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1916

if k > bestsize:

1917

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1918

j2len = newj2len

1919

1920

# Extend the best by non-junk elements on each end. In particular,

1921

# "popular" non-junk elements aren't in b2j, which greatly speeds

1922

# the inner loop above, but also means "the best" match so far

1923

# doesn't contain any junk *or* popular non-junk elements.

1924

while besti > alo and bestj > blo and \

1925

not isbjunk(b[bestj-1]) and \

1926

a[besti-1] == b[bestj-1]:

1927

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1928

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1929

not isbjunk(b[bestj+bestsize]) and \

1930

a[besti+bestsize] == b[bestj+bestsize]:

1931

bestsize += 1

1932

1933

# Now that we have a wholly interesting match (albeit possibly

1934

# empty!), we may as well suck up the matching junk on each

1935

# side of it too. Can't think of a good reason not to, and it

1936

# saves post-processing the (possibly considerable) expense of

1937

# figuring out what to do with it. In the case of an empty

1938

# interesting match, this is clearly the right thing to do,

1939

# because no other kind of match is possible in the regions.

1940

while besti > alo and bestj > blo and \

1941

isbjunk(b[bestj-1]) and \

1942

a[besti-1] == b[bestj-1]:

1943

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1944

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1945

isbjunk(b[bestj+bestsize]) and \

1946

a[besti+bestsize] == b[bestj+bestsize]:

1947

bestsize = bestsize + 1

1948

1949

return besti, bestj, bestsize

Older »