/brz/remove-bazaar : revision 2018.5.139

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Andrew Bennetts
Date: 2007-04-10 15:54:15 UTC
mfrom: (2402 +trunk)
mto: (2018.18.11 hpss-faster-copy)
mto: This revision was merged to the branch mainline in revision 2435.
Revision ID: andrew.bennetts@canonical.com-20070410155415-hyzlzwevu3ud0dny

Merge from bzr.dev, resolving conflicts.

files added:
build-api

bzr.ico

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/export

bzrlib/export/dir_exporter.py

bzrlib/graph.py

bzrlib/inspect_for_copy.py

bzrlib/plugins/__init__.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_api.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_graph.py

bzrlib/textui.py

bzrlib/transport/ftp.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/weave_commands.py

bzrlib/xml6.py

contrib/add-bzr-to-baz

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/index.txt

tools/biobench.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

files removed:
.coveragerc

.mailmap

.testr.conf

.travis.yml

MANIFEST.in

NEWS

README_BDIST_RPM

apport

apport/README

apport/brz-crashdb.conf

apport/source_brz.py

breezy/_annotator_py.py

breezy/_annotator_pyx.pyx

breezy/_bencode_pyx.h

breezy/_bencode_pyx.pyx

breezy/_chunks_to_lines_py.py

breezy/_chunks_to_lines_pyx.pyx

breezy/_export_c_api.h

breezy/_import_c_api.h

breezy/_known_graph_py.py

breezy/_known_graph_pyx.pyx

breezy/_patiencediff_c.c

breezy/_readdir_py.py

breezy/_readdir_pyx.pyx

breezy/_rio_py.py

breezy/_rio_pyx.pyx

breezy/_simple_set_pyx.pxd

breezy/_simple_set_pyx.pyx

breezy/_static_tuple_c.c

breezy/_static_tuple_c.h

breezy/_static_tuple_c.pxd

breezy/_static_tuple_py.py

breezy/_termcolor.py

breezy/_walkdirs_win32.pyx

breezy/archive

breezy/archive/__init__.py

breezy/bencode.py

breezy/bisect.py

breezy/bisect_multi.py

breezy/branchbuilder.py

breezy/breakin.py

breezy/bugtracker.py

breezy/bundle/serializer/v4.py

breezy/bzr

breezy/bzr/__init__.py

breezy/bzr/_btree_serializer_py.py

breezy/bzr/_btree_serializer_pyx.pyx

breezy/bzr/_chk_map_py.py

breezy/bzr/_chk_map_pyx.pyx

breezy/bzr/_dirstate_helpers_py.py

breezy/bzr/_dirstate_helpers_pyx.h

breezy/bzr/_dirstate_helpers_pyx.pyx

breezy/bzr/_groupcompress_py.py

breezy/bzr/_groupcompress_pyx.pyx

breezy/bzr/_knit_load_data_py.py

breezy/bzr/_knit_load_data_pyx.pyx

breezy/bzr/branch.py

breezy/bzr/btree_index.py

breezy/bzr/check.py

breezy/bzr/chk_map.py

breezy/bzr/chk_serializer.py

breezy/bzr/debug_commands.py

breezy/bzr/delta.h

breezy/bzr/diff-delta.c

breezy/bzr/fullhistory.py

breezy/bzr/groupcompress.py

breezy/bzr/groupcompress_repo.py

breezy/bzr/index.py

breezy/bzr/inventory_delta.py

breezy/bzr/inventorytree.py

breezy/bzr/knitpack_repo.py

breezy/bzr/pack.py

breezy/bzr/pack_repo.py

breezy/bzr/repository.py

breezy/bzr/serializer.py

breezy/bzr/smart/message.py

breezy/bzr/smart/packrepository.py

breezy/bzr/smart/ping.py

breezy/bzr/smart/signals.py

breezy/bzr/vf_repository.py

breezy/bzr/vf_search.py

breezy/bzr/workingtree.py

breezy/bzr/workingtree_3.py

breezy/bzr/xml5.py

breezy/bzr/xml6.py

breezy/bzr_distutils.py

breezy/cethread.py

breezy/chunk_writer.py

breezy/clean_tree.py

breezy/cleanup.py

breezy/cmd_test_script.py

breezy/cmdline.py

breezy/controldir.py

breezy/counted_lock.py

breezy/crash.py

breezy/directory_service.py

breezy/doc_generate/conf.py

breezy/email_message.py

breezy/estimate_compressed_size.py

breezy/export_pot.py

breezy/fetch_ghosts.py

breezy/fifo_cache.py

breezy/filter_tree.py

breezy/filters

breezy/filters/__init__.py

breezy/filters/eol.py

breezy/foreign.py

breezy/graph.py

breezy/help_topics

breezy/help_topics/en

breezy/help_topics/en/authentication.txt

breezy/help_topics/en/conflict-types.txt

breezy/help_topics/en/content-filters.txt

breezy/help_topics/en/debug-flags.txt

breezy/help_topics/en/diverged-branches.txt

breezy/help_topics/en/eol.txt

breezy/help_topics/en/log-formats.txt

breezy/help_topics/en/missing-extensions.txt

breezy/help_topics/en/patterns.txt

breezy/help_topics/en/rules.txt

breezy/help_topics/en/url-special-chars.txt

breezy/help_topics/es

breezy/help_topics/es/conflict-types.txt

breezy/i18n.py

breezy/library_state.py

breezy/lru_cache.py

breezy/mail_client.py

breezy/mergetools.py

breezy/multiparent.py

breezy/patiencediff.py

breezy/plugins/__init__.py

breezy/plugins/bash_completion

breezy/plugins/bash_completion/README.txt

breezy/plugins/bash_completion/__init__.py

breezy/plugins/bash_completion/bashcomp.py

breezy/plugins/bash_completion/tests

breezy/plugins/bash_completion/tests/__init__.py

breezy/plugins/bash_completion/tests/test_bashcomp.py

breezy/plugins/changelog_merge

breezy/plugins/changelog_merge/__init__.py

breezy/plugins/changelog_merge/changelog_merge.py

breezy/plugins/changelog_merge/tests

breezy/plugins/changelog_merge/tests/__init__.py

breezy/plugins/changelog_merge/tests/test_changelog_merge.py

breezy/plugins/commitfromnews

breezy/plugins/commitfromnews/__init__.py

breezy/plugins/commitfromnews/committemplate.py

breezy/plugins/commitfromnews/tests

breezy/plugins/commitfromnews/tests/__init__.py

breezy/plugins/commitfromnews/tests/test_committemplate.py

breezy/plugins/commitfromnews/tests/test_msgeditor.py

breezy/plugins/cvs

breezy/plugins/cvs/__init__.py

breezy/plugins/darcs

breezy/plugins/darcs/__init__.py

breezy/plugins/email

breezy/plugins/email/__init__.py

breezy/plugins/email/emailer.py

breezy/plugins/email/tests

breezy/plugins/email/tests/__init__.py

breezy/plugins/email/tests/testemail.py

breezy/plugins/fastimport

breezy/plugins/fastimport/NEWS

breezy/plugins/fastimport/__init__.py

breezy/plugins/fastimport/branch_mapper.py

breezy/plugins/fastimport/branch_updater.py

breezy/plugins/fastimport/bzr_commit_handler.py

breezy/plugins/fastimport/cache_manager.py

breezy/plugins/fastimport/cmds.py

breezy/plugins/fastimport/doc

breezy/plugins/fastimport/doc/notes.txt

breezy/plugins/fastimport/exporter.py

breezy/plugins/fastimport/helpers.py

breezy/plugins/fastimport/idmapfile.py

breezy/plugins/fastimport/marks_file.py

breezy/plugins/fastimport/processors

breezy/plugins/fastimport/processors/__init__.py

breezy/plugins/fastimport/processors/generic_processor.py

breezy/plugins/fastimport/revision_store.py

breezy/plugins/fastimport/tests

breezy/plugins/fastimport/tests/__init__.py

breezy/plugins/fastimport/tests/test_branch_mapper.py

breezy/plugins/fastimport/tests/test_commands.py

breezy/plugins/fastimport/tests/test_exporter.py

breezy/plugins/fastimport/tests/test_generic_processor.py

breezy/plugins/fastimport/tests/test_head_tracking.py

breezy/plugins/fastimport/tests/test_revision_store.py

breezy/plugins/fastimport/user_mapper.py

breezy/plugins/git

breezy/plugins/git/.testr.conf

breezy/plugins/git/Makefile

breezy/plugins/git/TODO

breezy/plugins/git/__init__.py

breezy/plugins/git/annotate.py

breezy/plugins/git/branch.py

breezy/plugins/git/bzr-receive-pack

breezy/plugins/git/bzr-upload-pack

breezy/plugins/git/cache.py

breezy/plugins/git/commands.py

breezy/plugins/git/commit.py

breezy/plugins/git/config.py

breezy/plugins/git/dir.py

breezy/plugins/git/directory.py

breezy/plugins/git/errors.py

breezy/plugins/git/fetch.py

breezy/plugins/git/filegraph.py

breezy/plugins/git/git-remote-bzr

breezy/plugins/git/git-remote-bzr.1

breezy/plugins/git/git_remote_helper.py

breezy/plugins/git/help.py

breezy/plugins/git/hg.py

breezy/plugins/git/interrepo.py

breezy/plugins/git/mapping.py

breezy/plugins/git/memorytree.py

breezy/plugins/git/notes

breezy/plugins/git/notes/git-serve.txt

breezy/plugins/git/notes/mapping.txt

breezy/plugins/git/notes/roundtripping.txt

breezy/plugins/git/object_store.py

breezy/plugins/git/pristine_tar.py

breezy/plugins/git/push.py

breezy/plugins/git/refs.py

breezy/plugins/git/remote.py

breezy/plugins/git/repository.py

breezy/plugins/git/revspec.py

breezy/plugins/git/roundtrip.py

breezy/plugins/git/send.py

breezy/plugins/git/server.py

breezy/plugins/git/tests

breezy/plugins/git/tests/__init__.py

breezy/plugins/git/tests/test_blackbox.py

breezy/plugins/git/tests/test_branch.py

breezy/plugins/git/tests/test_builder.py

breezy/plugins/git/tests/test_cache.py

breezy/plugins/git/tests/test_dir.py

breezy/plugins/git/tests/test_fetch.py

breezy/plugins/git/tests/test_git_remote_helper.py

breezy/plugins/git/tests/test_mapping.py

breezy/plugins/git/tests/test_memorytree.py

breezy/plugins/git/tests/test_object_store.py

breezy/plugins/git/tests/test_pristine_tar.py

breezy/plugins/git/tests/test_push.py

breezy/plugins/git/tests/test_refs.py

breezy/plugins/git/tests/test_remote.py

breezy/plugins/git/tests/test_repository.py

breezy/plugins/git/tests/test_revspec.py

breezy/plugins/git/tests/test_roundtrip.py

breezy/plugins/git/tests/test_server.py

breezy/plugins/git/tests/test_transportgit.py

breezy/plugins/git/tests/test_unpeel_map.py

breezy/plugins/git/tests/test_urls.py

breezy/plugins/git/tests/test_workingtree.py

breezy/plugins/git/transportgit.py

breezy/plugins/git/tree.py

breezy/plugins/git/unpeel_map.py

breezy/plugins/git/urls.py

breezy/plugins/git/workingtree.py

breezy/plugins/grep

breezy/plugins/grep/.bzrignore

breezy/plugins/grep/NEWS

breezy/plugins/grep/__init__.py

breezy/plugins/grep/cmds.py

breezy/plugins/grep/grep.py

breezy/plugins/grep/test_grep.py

breezy/plugins/launchpad/account.py

breezy/plugins/launchpad/cmds.py

breezy/plugins/launchpad/lp_api.py

breezy/plugins/launchpad/lp_api_lite.py

breezy/plugins/launchpad/lp_propose.py

breezy/plugins/launchpad/test_account.py

breezy/plugins/launchpad/test_lp_api.py

breezy/plugins/launchpad/test_lp_api_lite.py

breezy/plugins/launchpad/test_lp_login.py

breezy/plugins/launchpad/test_lp_open.py

breezy/plugins/launchpad/test_lp_service.py

breezy/plugins/mtn

breezy/plugins/mtn/__init__.py

breezy/plugins/netrc_credential_store

breezy/plugins/netrc_credential_store/__init__.py

breezy/plugins/netrc_credential_store/tests

breezy/plugins/netrc_credential_store/tests/__init__.py

breezy/plugins/netrc_credential_store/tests/test_netrc.py

breezy/plugins/news_merge

breezy/plugins/news_merge/README

breezy/plugins/news_merge/__init__.py

breezy/plugins/news_merge/news_merge.py

breezy/plugins/news_merge/parser.py

breezy/plugins/news_merge/tests

breezy/plugins/news_merge/tests/__init__.py

breezy/plugins/news_merge/tests/test_news_merge.py

breezy/plugins/po_merge

breezy/plugins/po_merge/README

breezy/plugins/po_merge/__init__.py

breezy/plugins/po_merge/po_merge.py

breezy/plugins/po_merge/tests

breezy/plugins/po_merge/tests/__init__.py

breezy/plugins/po_merge/tests/test_po_merge.py

breezy/plugins/repodebug

breezy/plugins/repodebug/__init__.py

breezy/plugins/repodebug/check_chk.py

breezy/plugins/repodebug/chk_used_by.py

breezy/plugins/repodebug/fetch_all_records.py

breezy/plugins/repodebug/file_refs.py

breezy/plugins/repodebug/missing_keys_for_stacking_fixer.py

breezy/plugins/repodebug/repo_has_key.py

breezy/plugins/repodebug/repo_keys.py

breezy/plugins/repodebug/tests

breezy/plugins/repodebug/tests/__init__.py

breezy/plugins/stats

breezy/plugins/stats/__init__.py

breezy/plugins/stats/classify.py

breezy/plugins/stats/cmds.py

breezy/plugins/stats/test_classify.py

breezy/plugins/stats/test_stats.py

breezy/plugins/upload

breezy/plugins/upload/.bzrignore

breezy/plugins/upload/NEWS

breezy/plugins/upload/README

breezy/plugins/upload/__init__.py

breezy/plugins/upload/cmds.py

breezy/plugins/upload/tests

breezy/plugins/upload/tests/__init__.py

breezy/plugins/upload/tests/test_auto_upload_hook.py

breezy/plugins/upload/tests/test_upload.py

breezy/plugins/weave_fmt

breezy/plugins/weave_fmt/__init__.py

breezy/plugins/weave_fmt/branch.py

breezy/plugins/weave_fmt/bzrdir.py

breezy/plugins/weave_fmt/test_bzrdir.py

breezy/plugins/weave_fmt/test_repository.py

breezy/plugins/weave_fmt/test_workingtree.py

breezy/plugins/weave_fmt/workingtree.py

breezy/push.py

breezy/python-compat.h

breezy/pyutils.py

breezy/readdir.h

breezy/reconfigure.py

breezy/recordcounter.py

breezy/rename_map.py

breezy/rules.py

breezy/send.py

breezy/shelf.py

breezy/shelf_ui.py

breezy/sixish.py

breezy/smtp_connection.py

breezy/static_tuple.py

breezy/switch.py

breezy/tests/blackbox/test_alias.py

breezy/tests/blackbox/test_bisect.py

breezy/tests/blackbox/test_branches.py

breezy/tests/blackbox/test_bundle_info.py

breezy/tests/blackbox/test_cat_revision.py

breezy/tests/blackbox/test_check.py

breezy/tests/blackbox/test_clean_tree.py

breezy/tests/blackbox/test_config.py

breezy/tests/blackbox/test_cp.py

breezy/tests/blackbox/test_deleted.py

breezy/tests/blackbox/test_dump_btree.py

breezy/tests/blackbox/test_export_pot.py

breezy/tests/blackbox/test_fetch_ghosts.py

breezy/tests/blackbox/test_filesystem_cicp.py

breezy/tests/blackbox/test_filtered_view_ops.py

breezy/tests/blackbox/test_hooks.py

breezy/tests/blackbox/test_import.py

breezy/tests/blackbox/test_link_tree.py

breezy/tests/blackbox/test_lookup_revision.py

breezy/tests/blackbox/test_lsprof.py

breezy/tests/blackbox/test_mkdir.py

breezy/tests/blackbox/test_modified.py

breezy/tests/blackbox/test_pack.py

breezy/tests/blackbox/test_ping.py

breezy/tests/blackbox/test_plugins.py

breezy/tests/blackbox/test_reconfigure.py

breezy/tests/blackbox/test_reference.py

breezy/tests/blackbox/test_remember_option.py

breezy/tests/blackbox/test_repair_workingtree.py

breezy/tests/blackbox/test_resolve.py

breezy/tests/blackbox/test_rmbranch.py

breezy/tests/blackbox/test_script.py

breezy/tests/blackbox/test_shell_complete.py

breezy/tests/blackbox/test_shelve.py

breezy/tests/blackbox/test_switch.py

breezy/tests/blackbox/test_unknowns.py

breezy/tests/blackbox/test_verify_signatures.py

breezy/tests/blackbox/test_view.py

breezy/tests/commands

breezy/tests/commands/__init__.py

breezy/tests/commands/test_branch.py

breezy/tests/commands/test_cat.py

breezy/tests/commands/test_checkout.py

breezy/tests/commands/test_commit.py

breezy/tests/commands/test_init.py

breezy/tests/commands/test_init_repository.py

breezy/tests/commands/test_merge.py

breezy/tests/commands/test_missing.py

breezy/tests/commands/test_pull.py

breezy/tests/commands/test_push.py

breezy/tests/commands/test_revert.py

breezy/tests/commands/test_update.py

breezy/tests/fake_command.py

breezy/tests/features.py

breezy/tests/file_utils.py

breezy/tests/fixtures.py

breezy/tests/https_server.py

breezy/tests/matchers.py

breezy/tests/per_branch/test_check.py

breezy/tests/per_branch/test_config.py

breezy/tests/per_branch/test_create_clone.py

breezy/tests/per_branch/test_dotted_revno_to_revision_id.py

breezy/tests/per_branch/test_get_revision_id_to_revno_map.py

breezy/tests/per_branch/test_iter_merge_sorted_revisions.py

breezy/tests/per_branch/test_reconcile.py

breezy/tests/per_branch/test_revision_id_to_dotted_revno.py

breezy/tests/per_branch/test_revision_id_to_revno.py

breezy/tests/per_branch/test_sprout.py

breezy/tests/per_branch/test_stacking.py

breezy/tests/per_bzrdir

breezy/tests/per_bzrdir/__init__.py

breezy/tests/per_bzrdir/test_bzrdir.py

breezy/tests/per_controldir/test_format.py

breezy/tests/per_controldir/test_push.py

breezy/tests/per_controldir_colo

breezy/tests/per_controldir_colo/__init__.py

breezy/tests/per_controldir_colo/test_supported.py

breezy/tests/per_controldir_colo/test_unsupported.py

breezy/tests/per_foreign_vcs

breezy/tests/per_foreign_vcs/__init__.py

breezy/tests/per_foreign_vcs/test_branch.py

breezy/tests/per_foreign_vcs/test_repository.py

breezy/tests/per_interbranch

breezy/tests/per_interbranch/__init__.py

breezy/tests/per_interbranch/test_copy_content_into.py

breezy/tests/per_interbranch/test_fetch.py

breezy/tests/per_interbranch/test_get.py

breezy/tests/per_interbranch/test_pull.py

breezy/tests/per_interbranch/test_push.py

breezy/tests/per_interrepository/test_fetch.py

breezy/tests/per_intertree/test_file_content_matches.py

breezy/tests/per_inventory

breezy/tests/per_inventory/__init__.py

breezy/tests/per_inventory/basics.py

breezy/tests/per_merger.py

breezy/tests/per_pack_repository.py

breezy/tests/per_repository/test_add_fallback_repository.py

breezy/tests/per_repository/test_check.py

breezy/tests/per_repository/test_fetch.py

breezy/tests/per_repository/test_file_graph.py

breezy/tests/per_repository/test_get_parent_map.py

breezy/tests/per_repository/test_has_revisions.py

breezy/tests/per_repository/test_has_same_location.py

breezy/tests/per_repository/test_locking.py

breezy/tests/per_repository/test_pack.py

breezy/tests/per_repository/test_refresh_data.py

breezy/tests/per_repository/test_signatures.py

breezy/tests/per_repository/test_write_group.py

breezy/tests/per_repository_chk

breezy/tests/per_repository_chk/__init__.py

breezy/tests/per_repository_chk/test_supported.py

breezy/tests/per_repository_chk/test_unsupported.py

breezy/tests/per_repository_reference

breezy/tests/per_repository_reference/__init__.py

breezy/tests/per_repository_reference/test__make_parents_provider.py

breezy/tests/per_repository_reference/test_add_inventory.py

breezy/tests/per_repository_reference/test_add_revision.py

breezy/tests/per_repository_reference/test_add_signature_text.py

breezy/tests/per_repository_reference/test_all_revision_ids.py

breezy/tests/per_repository_reference/test_break_lock.py

breezy/tests/per_repository_reference/test_check.py

breezy/tests/per_repository_reference/test_commit_with_stacking.py

breezy/tests/per_repository_reference/test_default_stacking.py

breezy/tests/per_repository_reference/test_fetch.py

breezy/tests/per_repository_reference/test_get_record_stream.py

breezy/tests/per_repository_reference/test_get_rev_id_for_revno.py

breezy/tests/per_repository_reference/test_graph.py

breezy/tests/per_repository_reference/test_initialize.py

breezy/tests/per_repository_reference/test_unlock.py

breezy/tests/per_repository_vf

breezy/tests/per_repository_vf/__init__.py

breezy/tests/per_repository_vf/helpers.py

breezy/tests/per_repository_vf/test__generate_text_key_index.py

breezy/tests/per_repository_vf/test_add_inventory_by_delta.py

breezy/tests/per_repository_vf/test_check.py

breezy/tests/per_repository_vf/test_check_reconcile.py

breezy/tests/per_repository_vf/test_fetch.py

breezy/tests/per_repository_vf/test_fileid_involved.py

breezy/tests/per_repository_vf/test_find_text_key_references.py

breezy/tests/per_repository_vf/test_merge_directive.py

breezy/tests/per_repository_vf/test_reconcile.py

breezy/tests/per_repository_vf/test_refresh_data.py

breezy/tests/per_repository_vf/test_repository.py

breezy/tests/per_repository_vf/test_write_group.py

breezy/tests/per_tree/test_annotate_iter.py

breezy/tests/per_tree/test_archive.py

breezy/tests/per_tree/test_export.py

breezy/tests/per_tree/test_get_file_with_stat.py

breezy/tests/per_tree/test_get_root_id.py

breezy/tests/per_tree/test_ids.py

breezy/tests/per_tree/test_is_executable.py

breezy/tests/per_tree/test_iter_search_rules.py

breezy/tests/per_tree/test_locking.py

breezy/tests/per_tree/test_path_content_summary.py

breezy/tests/per_uifactory

breezy/tests/per_uifactory/__init__.py

breezy/tests/per_workingtree/test_annotate_iter.py

breezy/tests/per_workingtree/test_check.py

breezy/tests/per_workingtree/test_check_state.py

breezy/tests/per_workingtree/test_content_filters.py

breezy/tests/per_workingtree/test_eol_conversion.py

breezy/tests/per_workingtree/test_get_file_mtime.py

breezy/tests/per_workingtree/test_remove.py

breezy/tests/per_workingtree/test_shelf_manager.py

breezy/tests/per_workingtree/test_symlinks.py

breezy/tests/per_workingtree/test_uncommit.py

breezy/tests/per_workingtree/test_views.py

breezy/tests/scenarios.py

breezy/tests/script.py

breezy/tests/ssl_certs

breezy/tests/ssl_certs/__init__.py

breezy/tests/ssl_certs/ca.crt

breezy/tests/ssl_certs/ca.key

breezy/tests/ssl_certs/create_ssls.py

breezy/tests/ssl_certs/server.crt

breezy/tests/ssl_certs/server.csr

breezy/tests/ssl_certs/server_with_pass.key

breezy/tests/ssl_certs/server_without_pass.key

breezy/tests/test__annotator.py

breezy/tests/test__bencode.py

breezy/tests/test__btree_serializer.py

breezy/tests/test__chk_map.py

breezy/tests/test__chunks_to_lines.py

breezy/tests/test__dirstate_helpers.py

breezy/tests/test__groupcompress.py

breezy/tests/test__known_graph.py

breezy/tests/test__rio.py

breezy/tests/test__simple_set.py

breezy/tests/test__static_tuple.py

breezy/tests/test__walkdirs_win32.py

breezy/tests/test_bisect.py

breezy/tests/test_bisect_multi.py

breezy/tests/test_branchbuilder.py

breezy/tests/test_btree_index.py

breezy/tests/test_bugtracker.py

breezy/tests/test_cethread.py

breezy/tests/test_chk_map.py

breezy/tests/test_chk_serializer.py

breezy/tests/test_chunk_writer.py

breezy/tests/test_clean_tree.py

breezy/tests/test_cleanup.py

breezy/tests/test_cmdline.py

breezy/tests/test_controldir.py

breezy/tests/test_counted_lock.py

breezy/tests/test_crash.py

breezy/tests/test_debug.py

breezy/tests/test_directory_service.py

breezy/tests/test_email_message.py

breezy/tests/test_eol_filters.py

breezy/tests/test_estimate_compressed_size.py

breezy/tests/test_export.py

breezy/tests/test_export_pot.py

breezy/tests/test_features.py

breezy/tests/test_fetch_ghosts.py

breezy/tests/test_fifo_cache.py

breezy/tests/test_filter_tree.py

breezy/tests/test_filters.py

breezy/tests/test_fixtures.py

breezy/tests/test_foreign.py

breezy/tests/test_graph.py

breezy/tests/test_groupcompress.py

breezy/tests/test_help.py

breezy/tests/test_hooks.py

breezy/tests/test_https_urllib.py

breezy/tests/test_i18n.py

breezy/tests/test_import_tariff.py

breezy/tests/test_index.py

breezy/tests/test_info.py

breezy/tests/test_inventory_delta.py

breezy/tests/test_library_state.py

breezy/tests/test_lock.py

breezy/tests/test_lru_cache.py

breezy/tests/test_lsprof.py

breezy/tests/test_mail_client.py

breezy/tests/test_matchers.py

breezy/tests/test_mergetools.py

breezy/tests/test_multiparent.py

breezy/tests/test_mutabletree.py

breezy/tests/test_pack.py

breezy/tests/test_patches_data/binary-after-normal.patch

breezy/tests/test_patches_data/binary.patch

breezy/tests/test_patches_data/diff-7

breezy/tests/test_patches_data/mod-7

breezy/tests/test_patches_data/orig-7

breezy/tests/test_pyutils.py

breezy/tests/test_reconfigure.py

breezy/tests/test_rename_map.py

breezy/tests/test_rules.py

breezy/tests/test_scenarios.py

breezy/tests/test_script.py

breezy/tests/test_serializer.py

breezy/tests/test_server.py

breezy/tests/test_shelf.py

breezy/tests/test_shelf_ui.py

breezy/tests/test_smart_request.py

breezy/tests/test_smart_signals.py

breezy/tests/test_smtp_connection.py

breezy/tests/test_switch.py

breezy/tests/test_test_server.py

breezy/tests/test_transport_log.py

breezy/tests/test_treeshape.py

breezy/tests/test_uncommit.py

breezy/tests/test_upgrade_stacked.py

breezy/tests/test_upstream_import.py

breezy/tests/test_url_policy_open.py

breezy/tests/test_utextwrap.py

breezy/tests/test_versionedfile.py

breezy/tests/test_vf_search.py

breezy/tests/test_views.py

breezy/tests/test_win32utils.py

breezy/tests/testui.py

breezy/tests/transport_util.py

breezy/tests/ui_testing.py

breezy/transport/brokenrename.py

breezy/transport/gio_transport.py

breezy/transport/log.py

breezy/transport/nosmart.py

breezy/transport/pathfilter.py

breezy/transport/trace.py

breezy/transport/unlistable.py

breezy/upstream_import.py

breezy/url_policy_open.py

breezy/utextwrap.py

breezy/util/simplemapi.py

breezy/util/tests

breezy/util/tests/__init__.py

breezy/version_info_formats/format_custom.py

breezy/views.py

byov.conf

contrib/bash/brz

contrib/bash/brzbashprompt.sh

contrib/brz_access

contrib/bzr_ssh_path_limiter

contrib/debian

contrib/debian/default

contrib/debian/init.d

doc/developers

doc/developers/_static

doc/developers/_static/brz-doc.css

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/apport.txt

doc/developers/authentication-ring.txt

doc/developers/branding.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/code-review.txt

doc/developers/code-style.txt

doc/developers/colocated-branches.txt

doc/developers/conf.py

doc/developers/config-rationale.txt

doc/developers/configuration.txt

doc/developers/container-format.txt

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/dirstate.txt

doc/developers/documenting-changes.txt

doc/developers/ec2.txt

doc/developers/feature-flags.txt

doc/developers/fetch.txt

doc/developers/groupcompress-design.txt

doc/developers/implementation-notes.txt

doc/developers/improved_chk_index.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/miscellaneous-notes.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance.dot

doc/developers/plans

doc/developers/plans/index.txt

doc/developers/plans/nested-trees.txt

doc/developers/plans/performance

doc/developers/plans/performance/add.txt

doc/developers/plans/performance/annotate.txt

doc/developers/plans/performance/bundle-creation.txt

doc/developers/plans/performance/commit.txt

doc/developers/plans/performance/diff.txt

doc/developers/plans/performance/directory-fingerprints.txt

doc/developers/plans/performance/gc.txt

doc/developers/plans/performance/incremental-push-pull.txt

doc/developers/plans/performance/initial-push-pull.txt

doc/developers/plans/performance/merge-scaling.txt

doc/developers/plans/performance/missing.txt

doc/developers/plans/performance/performance-use-case-analysis.txt

doc/developers/plans/performance/planned-change-integration.txt

doc/developers/plans/performance/planned-performance-changes.txt

doc/developers/plans/performance/revert.txt

doc/developers/plans/performance/roadmap.txt

doc/developers/plans/performance/status.txt

doc/developers/plans/performance/uncommit.txt

doc/developers/plans/performance/update.txt

doc/developers/plans/tortoise-strategy.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/principles.txt

doc/developers/profiling.txt

doc/developers/proposals

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revision-properties.txt

doc/developers/specifications.txt

doc/developers/testing.txt

doc/developers/transports.txt

doc/developers/ui.txt

doc/developers/win32_build_setup.txt

doc/en

doc/en/Makefile

doc/en/_static

doc/en/_static/en

doc/en/_static/en/Makefile

doc/en/_static/en/brz-en-quick-reference.pdf

doc/en/_static/en/brz-en-quick-reference.png

doc/en/_static/en/brz-en-quick-reference.svg

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/admin-guide

doc/en/admin-guide/advanced.txt

doc/en/admin-guide/backup.txt

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/hooks-plugins.txt

doc/en/admin-guide/index-plain.txt

doc/en/admin-guide/index.txt

doc/en/admin-guide/integration.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/licence.txt

doc/en/admin-guide/migration.txt

doc/en/admin-guide/other-setups.txt

doc/en/admin-guide/security.txt

doc/en/admin-guide/simple-setups.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/index.txt

doc/en/release-notes

doc/en/release-notes/brz-3.0.txt

doc/en/release-notes/bzr-0.1.txt

doc/en/release-notes/bzr-0.10.txt

doc/en/release-notes/bzr-0.11.txt

doc/en/release-notes/bzr-0.12.txt

doc/en/release-notes/bzr-0.13.txt

doc/en/release-notes/bzr-0.14.txt

doc/en/release-notes/bzr-0.15.txt

doc/en/release-notes/bzr-0.16.txt

doc/en/release-notes/bzr-0.17.txt

doc/en/release-notes/bzr-0.18.txt

doc/en/release-notes/bzr-0.6.txt

doc/en/release-notes/bzr-0.7.txt

doc/en/release-notes/bzr-0.8.txt

doc/en/release-notes/bzr-0.9.txt

doc/en/release-notes/bzr-0.90.txt

doc/en/release-notes/bzr-0.91.txt

doc/en/release-notes/bzr-0.92.txt

doc/en/release-notes/bzr-1.0.txt

doc/en/release-notes/bzr-1.1.txt

doc/en/release-notes/bzr-1.10.txt

doc/en/release-notes/bzr-1.11.txt

doc/en/release-notes/bzr-1.12.txt

doc/en/release-notes/bzr-1.13.txt

doc/en/release-notes/bzr-1.14.txt

doc/en/release-notes/bzr-1.15.txt

doc/en/release-notes/bzr-1.16.txt

doc/en/release-notes/bzr-1.17.txt

doc/en/release-notes/bzr-1.18.txt

doc/en/release-notes/bzr-1.2.txt

doc/en/release-notes/bzr-1.3.txt

doc/en/release-notes/bzr-1.4.txt

doc/en/release-notes/bzr-1.5.txt

doc/en/release-notes/bzr-1.6.txt

doc/en/release-notes/bzr-1.7.txt

doc/en/release-notes/bzr-1.8.txt

doc/en/release-notes/bzr-1.9.txt

doc/en/release-notes/bzr-2.0.txt

doc/en/release-notes/bzr-2.1.txt

doc/en/release-notes/bzr-2.2.txt

doc/en/release-notes/bzr-2.4.txt

doc/en/release-notes/bzr-2.5.txt

doc/en/release-notes/bzr-2.6.txt

doc/en/release-notes/bzr-2.7.txt

doc/en/release-notes/bzr-2.8.txt

doc/en/release-notes/fork.txt

doc/en/release-notes/release-template.txt

doc/en/release-notes/series-template.txt

doc/en/tutorials

doc/en/tutorials/index.txt

doc/en/tutorials/licence.txt

doc/en/tutorials/using_breezy_with_launchpad.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/licence.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/breezy_workflows.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_breezy.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/gpg_signatures.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_breezy.txt

doc/en/user-guide/introducing_breezy.txt

doc/en/user-guide/licence.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/switch_store.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/en/whats-new

doc/en/whats-new/template.txt

doc/en/whats-new/whats-new-in-2.1.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/en/whats-new/whats-new-in-2.3.txt

doc/en/whats-new/whats-new-in-2.4.txt

doc/en/whats-new/whats-new-in-2.5.txt

doc/en/whats-new/whats-new-in-2.6.txt

doc/en/whats-new/whats-new-in-2.7.txt

doc/en/whats-new/whats-new-in-2.8.txt

doc/en/whats-new/whats-new-in-3.0.txt

doc/index.txt

doc/news-template.txt

man1

po/ar.po

po/ast.po

po/brz.pot

po/bs.po

po/ca.po

po/cs.po

po/de.po

po/el.po

po/en_AU.po

po/en_GB.po

po/es.po

po/fa.po

po/fo.po

po/fr.po

po/gl.po

po/he.po

po/id.po

po/it.po

po/ja.po

po/ko.po

po/ms.po

po/my.po

po/nb.po

po/nl.po

po/oc.po

po/pl.po

po/pt_BR.po

po/ro.po

po/ru.po

po/sco.po

po/si.po

po/sk.po

po/sr.po

po/sv.po

po/tr.po

po/ug.po

po/uk.po

po/vi.po

po/zh_CN.po

python3.passing

tools/brz_epydoc

tools/brz_epydoc_uid.py

tools/check-newsbugs.py

tools/fixed-in.py

tools/generate_release_notes.py

tools/package_docs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-control.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/subunit-sum

tools/testr-run.py

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/py2exe_boot_common.py

tools/win32/run_script.py

files renamed:
doc/developers/HACKING.txt => HACKING

doc/en/release-notes/bzr-2.3.txt => NEWS

README.rst => README

brz => bzr

breezy/ => bzrlib/

breezy/bzr/bzrdir.py => bzrlib/bzrdir.py

breezy/bzr/dirstate.py => bzrlib/dirstate.py

breezy/export.py => bzrlib/export/__init__.py

breezy/archive/tar.py => bzrlib/export/tar_exporter.py

breezy/archive/zip.py => bzrlib/export/zip_exporter.py

breezy/help_topics/__init__.py => bzrlib/help_topics.py

breezy/bzr/inventory.py => bzrlib/inventory.py

breezy/bzr/knit.py => bzrlib/knit.py

breezy/_patiencediff_py.py => bzrlib/patiencediff.py

breezy/plugins/launchpad/lp_directory.py => bzrlib/plugins/launchpad/lp_indirect.py

breezy/plugins/launchpad/test_lp_directory.py => bzrlib/plugins/launchpad/test_lp_indirect.py

breezy/bzr/remote.py => bzrlib/remote.py

breezy/bzr/knitrepo.py => bzrlib/repofmt/knitrepo.py

breezy/plugins/weave_fmt/repository.py => bzrlib/repofmt/weaverepo.py

breezy/commit_signature_commands.py => bzrlib/sign_my_commits.py

breezy/bzr/smart/ => bzrlib/smart/

breezy/plugins/weave_fmt/store/ => bzrlib/store/

breezy/plugins/weave_fmt/store/versioned.py => bzrlib/store/versioned/__init__.py

breezy/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

breezy/tests/http_server.py => bzrlib/tests/HttpServer.py

breezy/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

breezy/tests/per_branch/ => bzrlib/tests/branch_implementations/

breezy/tests/per_controldir/ => bzrlib/tests/bzrdir_implementations/

breezy/tests/per_controldir/test_controldir.py => bzrlib/tests/bzrdir_implementations/test_bzrdir.py

breezy/tests/per_interrepository/ => bzrlib/tests/interrepository_implementations/

breezy/tests/per_intertree/ => bzrlib/tests/intertree_implementations/

breezy/tests/per_repository/ => bzrlib/tests/repository_implementations/

breezy/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

breezy/plugins/weave_fmt/test_store.py => bzrlib/tests/test_store.py

breezy/tests/per_transport.py => bzrlib/tests/test_transport_implementations.py

breezy/tests/per_versionedfile.py => bzrlib/tests/test_versionedfile.py

breezy/tests/per_tree/ => bzrlib/tests/tree_implementations/

breezy/tests/per_workingtree/ => bzrlib/tests/workingtree_implementations/

breezy/bzr/textinv.py => bzrlib/textinv.py

breezy/util/_bencode_py.py => bzrlib/util/bencode.py

breezy/bzr/versionedfile.py => bzrlib/versionedfile.py

breezy/bzr/weave.py => bzrlib/weave.py

breezy/bzr/weavefile.py => bzrlib/weavefile.py

breezy/bzr/workingtree_4.py => bzrlib/workingtree_4.py

breezy/plugins/weave_fmt/xml4.py => bzrlib/xml4.py

breezy/bzr/xml8.py => bzrlib/xml5.py

breezy/bzr/xml7.py => bzrlib/xml7.py

breezy/bzr/xml_serializer.py => bzrlib/xml_serializer.py

contrib/emacs/brz-mode.el => contrib/emacs/bzr-mode.el

doc/en/tutorials/centralized_workflow.txt => doc/centralized_workflow.txt

breezy/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/http_smart_server.txt => doc/http_smart_server.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/server.txt => doc/server.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

doc/en/user-guide/version_info.txt => doc/version_info.txt

tools/generate_docs.py => generate_docs.py

breezy/doc_generate/ => tools/doc_generate/

tools/win32/breezy.url => tools/win32/bazaar.url

tools/win32/brz-win32-bdist-postinstall.py => tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/brz.iss.cog => tools/win32/bzr.iss.cog

tools/win32/brz_postinstall.py => tools/win32/bzr_postinstall.py

tools/win32/start_brz.bat => tools/win32/start_bzr.bat

files modified:
.bzrignore

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/__init__.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/win32utils.py

bzrlib/workingtree.py

doc/default.css

profile_imports.py

setup.py *

tools/capture_tree.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32/info.txt

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

"""

from __future__ import absolute_import

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

from ..lazy_import import lazy_import

lazy_import(globals(), """

import gzip

from breezy import (

debug,

diff,

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

static_tuple,

trace,

tsort,

tuned_gzip,

progress,

ui,

)

from breezy.bzr import (

index as _mod_index,

pack,

)

from breezy.bzr import pack_repo

from breezy.i18n import gettext

""")

from .. import (

annotate,

errors,

osutils,

)

from ..errors import (

InternalBzrError,

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

NoSuchFile,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from ..osutils import (

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import (

contains_whitespace,

sha_string,

contains_linebreaks,

sha_strings,

split_lines,

)

from ..sixish import (

BytesIO,

100

range,

101

viewitems,

102

viewvalues,

103

)

104

from ..bzr.versionedfile import (

105

_KeyRefs,

106

AbsentContentFactory,

107

adapter_registry,

108

ConstantMapper,

109

ContentFactory,

110

sort_groupcompress,

111

VersionedFilesWithFallbacks,

112

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

113

103

114

104

115

105

# TODO: Split out code specific to this format into an associated object.

127

117

128

118

DATA_SUFFIX = '.knit'

129

119

INDEX_SUFFIX = '.kndx'

130

_STREAM_MIN_BUFFER_SIZE = 5*1024*1024

131

132

133

class KnitError(InternalBzrError):

134

135

_fmt = "Knit error"

136

137

138

class KnitCorrupt(KnitError):

139

140

_fmt = "Knit %(filename)s corrupt: %(how)s"

141

142

def __init__(self, filename, how):

143

KnitError.__init__(self)

144

self.filename = filename

145

self.how = how

146

147

148

class SHA1KnitCorrupt(KnitCorrupt):

149

150

_fmt = ("Knit %(filename)s corrupt: sha-1 of reconstructed text does not "

151

"match expected sha-1. key %(key)s expected sha %(expected)s actual "

152

"sha %(actual)s")

153

154

def __init__(self, filename, actual, expected, key, content):

155

KnitError.__init__(self)

156

self.filename = filename

157

self.actual = actual

158

self.expected = expected

159

self.key = key

160

self.content = content

161

162

163

class KnitDataStreamIncompatible(KnitError):

164

# Not raised anymore, as we can convert data streams. In future we may

165

# need it again for more exotic cases, so we're keeping it around for now.

166

167

_fmt = "Cannot insert knit data stream of format \"%(stream_format)s\" into knit of format \"%(target_format)s\"."

168

169

def __init__(self, stream_format, target_format):

170

self.stream_format = stream_format

171

self.target_format = target_format

172

173

174

class KnitDataStreamUnknown(KnitError):

175

# Indicates a data stream we don't know how to handle.

176

177

_fmt = "Cannot parse knit data stream of format \"%(stream_format)s\"."

178

179

def __init__(self, stream_format):

180

self.stream_format = stream_format

181

182

183

class KnitHeaderError(KnitError):

184

185

_fmt = 'Knit header error: %(badline)r unexpected for file "%(filename)s".'

186

187

def __init__(self, badline, filename):

188

KnitError.__init__(self)

189

self.badline = badline

190

self.filename = filename

191

192

193

class KnitIndexUnknownMethod(KnitError):

194

"""Raised when we don't understand the storage method.

195

196

Currently only 'fulltext' and 'line-delta' are supported.

197

"""

198

199

_fmt = ("Knit index %(filename)s does not have a known method"

200

" in options: %(options)r")

201

202

def __init__(self, filename, options):

203

KnitError.__init__(self)

204

self.filename = filename

205

self.options = options

206

207

208

class KnitAdapter(object):

209

"""Base class for knit record adaption."""

210

211

def __init__(self, basis_vf):

212

"""Create an adapter which accesses full texts from basis_vf.

213

214

:param basis_vf: A versioned file to access basis texts of deltas from.

215

May be None for adapters that do not need to access basis texts.

216

"""

217

self._data = KnitVersionedFiles(None, None)

218

self._annotate_factory = KnitAnnotateFactory()

219

self._plain_factory = KnitPlainFactory()

220

self._basis_vf = basis_vf

221

222

223

class FTAnnotatedToUnannotated(KnitAdapter):

224

"""An adapter from FT annotated knits to unannotated ones."""

225

226

def get_bytes(self, factory):

227

annotated_compressed_bytes = factory._raw_record

228

rec, contents = \

229

self._data._parse_record_unchecked(annotated_compressed_bytes)

230

content = self._annotate_factory.parse_fulltext(contents, rec[1])

231

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

232

return bytes

233

234

235

class DeltaAnnotatedToUnannotated(KnitAdapter):

236

"""An adapter for deltas from annotated to unannotated."""

237

238

def get_bytes(self, factory):

239

annotated_compressed_bytes = factory._raw_record

240

rec, contents = \

241

self._data._parse_record_unchecked(annotated_compressed_bytes)

242

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

243

plain=True)

244

contents = self._plain_factory.lower_line_delta(delta)

245

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

246

return bytes

247

248

249

class FTAnnotatedToFullText(KnitAdapter):

250

"""An adapter from FT annotated knits to unannotated ones."""

251

252

def get_bytes(self, factory):

253

annotated_compressed_bytes = factory._raw_record

254

rec, contents = \

255

self._data._parse_record_unchecked(annotated_compressed_bytes)

256

content, delta = self._annotate_factory.parse_record(factory.key[-1],

257

contents, factory._build_details, None)

258

return b''.join(content.text())

259

260

261

class DeltaAnnotatedToFullText(KnitAdapter):

262

"""An adapter for deltas from annotated to unannotated."""

263

264

def get_bytes(self, factory):

265

annotated_compressed_bytes = factory._raw_record

266

rec, contents = \

267

self._data._parse_record_unchecked(annotated_compressed_bytes)

268

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

269

plain=True)

270

compression_parent = factory.parents[0]

271

basis_entry = next(self._basis_vf.get_record_stream(

272

[compression_parent], 'unordered', True))

273

if basis_entry.storage_kind == 'absent':

274

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

275

basis_chunks = basis_entry.get_bytes_as('chunked')

276

basis_lines = osutils.chunks_to_lines(basis_chunks)

277

# Manually apply the delta because we have one annotated content and

278

# one plain.

279

basis_content = PlainKnitContent(basis_lines, compression_parent)

280

basis_content.apply_delta(delta, rec[1])

281

basis_content._should_strip_eol = factory._build_details[1]

282

return b''.join(basis_content.text())

283

284

285

class FTPlainToFullText(KnitAdapter):

286

"""An adapter from FT plain knits to unannotated ones."""

287

288

def get_bytes(self, factory):

289

compressed_bytes = factory._raw_record

290

rec, contents = \

291

self._data._parse_record_unchecked(compressed_bytes)

292

content, delta = self._plain_factory.parse_record(factory.key[-1],

293

contents, factory._build_details, None)

294

return b''.join(content.text())

295

296

297

class DeltaPlainToFullText(KnitAdapter):

298

"""An adapter for deltas from annotated to unannotated."""

299

300

def get_bytes(self, factory):

301

compressed_bytes = factory._raw_record

302

rec, contents = \

303

self._data._parse_record_unchecked(compressed_bytes)

304

delta = self._plain_factory.parse_line_delta(contents, rec[1])

305

compression_parent = factory.parents[0]

306

# XXX: string splitting overhead.

307

basis_entry = next(self._basis_vf.get_record_stream(

308

[compression_parent], 'unordered', True))

309

if basis_entry.storage_kind == 'absent':

310

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

311

basis_chunks = basis_entry.get_bytes_as('chunked')

312

basis_lines = osutils.chunks_to_lines(basis_chunks)

313

basis_content = PlainKnitContent(basis_lines, compression_parent)

314

# Manually apply the delta because we have one annotated content and

315

# one plain.

316

content, _ = self._plain_factory.parse_record(rec[1], contents,

317

factory._build_details, basis_content)

318

return b''.join(content.text())

319

320

321

class KnitContentFactory(ContentFactory):

322

"""Content factory for streaming from knits.

323

324

:seealso ContentFactory:

325

"""

326

327

def __init__(self, key, parents, build_details, sha1, raw_record,

328

annotated, knit=None, network_bytes=None):

329

"""Create a KnitContentFactory for key.

330

331

:param key: The key.

332

:param parents: The parents.

333

:param build_details: The build details as returned from

334

get_build_details.

335

:param sha1: The sha1 expected from the full text of this object.

336

:param raw_record: The bytes of the knit data from disk.

337

:param annotated: True if the raw data is annotated.

338

:param network_bytes: None to calculate the network bytes on demand,

339

not-none if they are already known.

340

"""

341

ContentFactory.__init__(self)

342

self.sha1 = sha1

343

self.key = key

344

self.parents = parents

345

if build_details[0] == 'line-delta':

346

kind = 'delta'

347

else:

348

kind = 'ft'

349

if annotated:

350

annotated_kind = 'annotated-'

351

else:

352

annotated_kind = ''

353

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

354

self._raw_record = raw_record

355

self._network_bytes = network_bytes

356

self._build_details = build_details

357

self._knit = knit

358

359

def _create_network_bytes(self):

360

"""Create a fully serialised network version for transmission."""

361

# storage_kind, key, parents, Noeol, raw_record

362

key_bytes = b'\x00'.join(self.key)

363

if self.parents is None:

364

parent_bytes = b'None:'

365

else:

366

parent_bytes = b'\t'.join(b'\x00'.join(key) for key in self.parents)

367

if self._build_details[1]:

368

noeol = b'N'

369

else:

370

noeol = b' '

371

network_bytes = b"%s\n%s\n%s\n%s%s" % (

372

self.storage_kind.encode('ascii'), key_bytes,

373

parent_bytes, noeol, self._raw_record)

374

self._network_bytes = network_bytes

375

376

def get_bytes_as(self, storage_kind):

377

if storage_kind == self.storage_kind:

378

if self._network_bytes is None:

379

self._create_network_bytes()

380

return self._network_bytes

381

if ('-ft-' in self.storage_kind and

382

storage_kind in ('chunked', 'fulltext')):

383

adapter_key = (self.storage_kind, 'fulltext')

384

adapter_factory = adapter_registry.get(adapter_key)

385

adapter = adapter_factory(None)

386

bytes = adapter.get_bytes(self)

387

if storage_kind == 'chunked':

388

return [bytes]

389

else:

390

return bytes

391

if self._knit is not None:

392

# Not redundant with direct conversion above - that only handles

393

# fulltext cases.

394

if storage_kind == 'chunked':

395

return self._knit.get_lines(self.key[0])

396

elif storage_kind == 'fulltext':

397

return self._knit.get_text(self.key[0])

398

raise errors.UnavailableRepresentation(self.key, storage_kind,

399

self.storage_kind)

400

401

402

class LazyKnitContentFactory(ContentFactory):

403

"""A ContentFactory which can either generate full text or a wire form.

404

405

:seealso ContentFactory:

406

"""

407

408

def __init__(self, key, parents, generator, first):

409

"""Create a LazyKnitContentFactory.

410

411

:param key: The key of the record.

412

:param parents: The parents of the record.

413

:param generator: A _ContentMapGenerator containing the record for this

414

key.

415

:param first: Is this the first content object returned from generator?

416

if it is, its storage kind is knit-delta-closure, otherwise it is

417

knit-delta-closure-ref

418

"""

419

self.key = key

420

self.parents = parents

421

self.sha1 = None

422

self._generator = generator

423

self.storage_kind = "knit-delta-closure"

424

if not first:

425

self.storage_kind = self.storage_kind + "-ref"

426

self._first = first

427

428

def get_bytes_as(self, storage_kind):

429

if storage_kind == self.storage_kind:

430

if self._first:

431

return self._generator._wire_bytes()

432

else:

433

# all the keys etc are contained in the bytes returned in the

434

# first record.

435

return b''

436

if storage_kind in ('chunked', 'fulltext'):

437

chunks = self._generator._get_one_work(self.key).text()

438

if storage_kind == 'chunked':

439

return chunks

440

else:

441

return b''.join(chunks)

442

raise errors.UnavailableRepresentation(self.key, storage_kind,

443

self.storage_kind)

444

445

446

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

447

"""Convert a network record to a iterator over stream records.

448

449

:param storage_kind: The storage kind of the record.

450

Must be 'knit-delta-closure'.

451

:param bytes: The bytes of the record on the network.

452

"""

453

generator = _NetworkContentMapGenerator(bytes, line_end)

454

return generator.get_record_stream()

455

456

457

def knit_network_to_record(storage_kind, bytes, line_end):

458

"""Convert a network record to a record object.

459

460

:param storage_kind: The storage kind of the record.

461

:param bytes: The bytes of the record on the network.

462

"""

463

start = line_end

464

line_end = bytes.find(b'\n', start)

465

key = tuple(bytes[start:line_end].split(b'\x00'))

466

start = line_end + 1

467

line_end = bytes.find(b'\n', start)

468

parent_line = bytes[start:line_end]

469

if parent_line == b'None:':

470

parents = None

471

else:

472

parents = tuple(

473

[tuple(segment.split(b'\x00')) for segment in parent_line.split(b'\t')

474

if segment])

475

start = line_end + 1

476

noeol = bytes[start:start+1] == b'N'

477

if 'ft' in storage_kind:

478

method = 'fulltext'

479

else:

480

method = 'line-delta'

481

build_details = (method, noeol)

482

start = start + 1

483

raw_record = bytes[start:]

484

annotated = 'annotated' in storage_kind

485

return [KnitContentFactory(key, parents, build_details, None, raw_record,

486

annotated, network_bytes=bytes)]

487

120

488

121

489

122

class KnitContent(object):

490

"""Content of a knit version to which deltas can be applied.

491

492

This is always stored in memory as a list of lines with \\n at the end,

493

plus a flag saying if the final ending is really there or not, because that

494

corresponds to the on-disk knit representation.

495

"""

496

497

def __init__(self):

498

self._should_strip_eol = False

499

500

def apply_delta(self, delta, new_version_id):

501

"""Apply delta to this object to become new_version_id."""

502

raise NotImplementedError(self.apply_delta)

123

"""Content of a knit version to which deltas can be applied."""

124

125

def __init__(self, lines):

126

self._lines = lines

127

128

def annotate_iter(self):

129

"""Yield tuples of (origin, text) for each content line."""

130

return iter(self._lines)

131

132

def annotate(self):

133

"""Return a list of (origin, text) tuples."""

134

return list(self.annotate_iter())

503

135

504

136

def line_delta_iter(self, new_lines):

505

137

"""Generate line-based delta from this content to new_lines."""

506

138

new_texts = new_lines.text()

507

139

old_texts = self.text()

508

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

140

s = KnitSequenceMatcher(None, old_texts, new_texts)

509

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

510

142

if tag == 'equal':

511

143

continue

515

147

def line_delta(self, new_lines):

516

148

return list(self.line_delta_iter(new_lines))

517

149

518

@staticmethod

519

def get_line_delta_blocks(knit_delta, source, target):

520

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

521

target_len = len(target)

522

s_pos = 0

523

t_pos = 0

524

for s_begin, s_end, t_len, new_text in knit_delta:

525

true_n = s_begin - s_pos

526

n = true_n

527

if n > 0:

528

# knit deltas do not provide reliable info about whether the

529

# last line of a file matches, due to eol handling.

530

if source[s_pos + n -1] != target[t_pos + n -1]:

531

n-=1

532

if n > 0:

533

yield s_pos, t_pos, n

534

t_pos += t_len + true_n

535

s_pos = s_end

536

n = target_len - t_pos

537

if n > 0:

538

if source[s_pos + n -1] != target[t_pos + n -1]:

539

n-=1

540

if n > 0:

541

yield s_pos, t_pos, n

542

yield s_pos + (target_len - t_pos), target_len, 0

543

544

545

class AnnotatedKnitContent(KnitContent):

546

"""Annotated content."""

547

548

def __init__(self, lines):

549

KnitContent.__init__(self)

550

self._lines = list(lines)

551

552

def annotate(self):

553

"""Return a list of (origin, text) for each content line."""

554

lines = self._lines[:]

555

if self._should_strip_eol:

556

origin, last_line = lines[-1]

557

lines[-1] = (origin, last_line.rstrip(b'\n'))

558

return lines

559

560

def apply_delta(self, delta, new_version_id):

561

"""Apply delta to this object to become new_version_id."""

562

offset = 0

563

lines = self._lines

564

for start, end, count, delta_lines in delta:

565

lines[offset+start:offset+end] = delta_lines

566

offset = offset + (start - end) + count

567

568

def text(self):

569

try:

570

lines = [text for origin, text in self._lines]

571

except ValueError as e:

572

# most commonly (only?) caused by the internal form of the knit

573

# missing annotation information because of a bug - see thread

574

# around 20071015

575

raise KnitCorrupt(self,

576

"line in annotated knit missing annotation information: %s"

577

% (e,))

578

if self._should_strip_eol:

579

lines[-1] = lines[-1].rstrip(b'\n')

580

return lines

581

582

def copy(self):

583

return AnnotatedKnitContent(self._lines)

584

585

586

class PlainKnitContent(KnitContent):

587

"""Unannotated content.

588

589

When annotate[_iter] is called on this content, the same version is reported

590

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

591

objects.

592

"""

593

594

def __init__(self, lines, version_id):

595

KnitContent.__init__(self)

596

self._lines = lines

597

self._version_id = version_id

598

599

def annotate(self):

600

"""Return a list of (origin, text) for each content line."""

601

return [(self._version_id, line) for line in self._lines]

602

603

def apply_delta(self, delta, new_version_id):

604

"""Apply delta to this object to become new_version_id."""

605

offset = 0

606

lines = self._lines

607

for start, end, count, delta_lines in delta:

608

lines[offset+start:offset+end] = delta_lines

609

offset = offset + (start - end) + count

610

self._version_id = new_version_id

611

612

def copy(self):

613

return PlainKnitContent(self._lines[:], self._version_id)

614

615

def text(self):

616

lines = self._lines

617

if self._should_strip_eol:

618

lines = lines[:]

619

lines[-1] = lines[-1].rstrip(b'\n')

620

return lines

150

def text(self):

151

return [text for origin, text in self._lines]

152

153

def copy(self):

154

return KnitContent(self._lines[:])

621

155

622

156

623

157

class _KnitFactory(object):

624

"""Base class for common Factory functions."""

625

626

def parse_record(self, version_id, record, record_details,

627

base_content, copy_base_content=True):

628

"""Parse a record into a full content object.

629

630

:param version_id: The official version id for this content

631

:param record: The data returned by read_records_iter()

632

:param record_details: Details about the record returned by

633

get_build_details

634

:param base_content: If get_build_details returns a compression_parent,

635

you must return a base_content here, else use None

636

:param copy_base_content: When building from the base_content, decide

637

you can either copy it and return a new object, or modify it in

638

place.

639

:return: (content, delta) A Content object and possibly a line-delta,

640

delta may be None

641

"""

642

method, noeol = record_details

643

if method == 'line-delta':

644

if copy_base_content:

645

content = base_content.copy()

646

else:

647

content = base_content

648

delta = self.parse_line_delta(record, version_id)

649

content.apply_delta(delta, version_id)

650

else:

651

content = self.parse_fulltext(record, version_id)

652

delta = None

653

content._should_strip_eol = noeol

654

return (content, delta)

158

"""Base factory for creating content objects."""

159

160

def make(self, lines, version_id):

161

num_lines = len(lines)

162

return KnitContent(zip([version_id] * num_lines, lines))

655

163

656

164

657

165

class KnitAnnotateFactory(_KnitFactory):

659

167

660

168

annotated = True

661

169

662

def make(self, lines, version_id):

663

num_lines = len(lines)

664

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

665

666

170

def parse_fulltext(self, content, version_id):

667

171

"""Convert fulltext to internal representation

668

172

675

179

# but the code itself doesn't really depend on that.

676

180

# Figure out a way to not require the overhead of turning the

677

181

# list back into tuples.

678

lines = (tuple(line.split(b' ', 1)) for line in content)

679

return AnnotatedKnitContent(lines)

182

lines = [tuple(line.split(' ', 1)) for line in content]

183

return KnitContent(lines)

680

184

681

185

def parse_line_delta_iter(self, lines):

682

186

return iter(self.parse_line_delta(lines))

683

187

684

def parse_line_delta(self, lines, version_id, plain=False):

188

def parse_line_delta(self, lines, version_id):

685

189

"""Convert a line based delta into internal representation.

686

190

687

191

line delta is in the form of:

690

194

revid(utf8) newline\n

691

195

internal representation is

692

196

(start, end, count, [1..count tuples (revid, newline)])

693

694

:param plain: If True, the lines are returned as a plain

695

list without annotations, not as a list of (origin, content) tuples, i.e.

696

(start, end, count, [1..count newline])

697

197

"""

698

198

result = []

699

199

lines = iter(lines)

200

next = lines.next

700

201

701

202

cache = {}

702

203

def cache_and_return(line):

703

origin, text = line.split(b' ', 1)

204

origin, text = line.split(' ', 1)

704

205

return cache.setdefault(origin, origin), text

705

206

706

207

# walk through the lines parsing.

707

# Note that the plain test is explicitly pulled out of the

708

# loop to minimise any performance impact

709

if plain:

710

for header in lines:

711

start, end, count = [int(n) for n in header.split(b',')]

712

contents = [next(lines).split(b' ', 1)[1] for _ in range(count)]

713

result.append((start, end, count, contents))

714

else:

715

for header in lines:

716

start, end, count = [int(n) for n in header.split(b',')]

717

contents = [tuple(next(lines).split(b' ', 1))

718

for _ in range(count)]

719

result.append((start, end, count, contents))

208

for header in lines:

209

start, end, count = [int(n) for n in header.split(',')]

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

211

result.append((start, end, count, contents))

720

212

return result

721

213

722

214

def get_fulltext_content(self, lines):

723

215

"""Extract just the content lines from a fulltext."""

724

return (line.split(b' ', 1)[1] for line in lines)

216

return (line.split(' ', 1)[1] for line in lines)

725

217

726

218

def get_linedelta_content(self, lines):

727

219

"""Extract just the content from a line delta.

730

222

Only the actual content lines.

731

223

"""

732

224

lines = iter(lines)

225

next = lines.next

733

226

for header in lines:

734

header = header.split(b',')

227

header = header.split(',')

735

228

count = int(header[2])

736

for _ in range(count):

737

origin, text = next(lines).split(b' ', 1)

229

for i in xrange(count):

230

origin, text = next().split(' ', 1)

738

231

yield text

739

232

740

233

def lower_fulltext(self, content):

742

235

743

236

see parse_fulltext which this inverts.

744

237

"""

745

return [b'%s %s' % (o, t) for o, t in content._lines]

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

746

241

747

242

def lower_line_delta(self, delta):

748

243

"""convert a delta into a serializable form.

753

248

# the origin is a valid utf-8 line, eventually we could remove it

754

249

out = []

755

250

for start, end, c, lines in delta:

756

out.append(b'%d,%d,%d\n' % (start, end, c))

757

out.extend(origin + b' ' + text

251

out.append('%d,%d,%d\n' % (start, end, c))

252

out.extend(origin + ' ' + text

758

253

for origin, text in lines)

759

254

return out

760

255

761

def annotate(self, knit, key):

762

content = knit._get_content(key)

763

# adjust for the fact that serialised annotations are only key suffixes

764

# for this factory.

765

if isinstance(key, tuple):

766

prefix = key[:-1]

767

origins = content.annotate()

768

result = []

769

for origin, line in origins:

770

result.append((prefix + (origin,), line))

771

return result

772

else:

773

# XXX: This smells a bit. Why would key ever be a non-tuple here?

774

# Aren't keys defined to be tuples? -- spiv 20080618

775

return content.annotate()

776

777

256

778

257

class KnitPlainFactory(_KnitFactory):

779

258

"""Factory for creating plain Content objects."""

780

259

781

260

annotated = False

782

261

783

def make(self, lines, version_id):

784

return PlainKnitContent(lines, version_id)

785

786

262

def parse_fulltext(self, content, version_id):

787

263

"""This parses an unannotated fulltext.

788

264

797

273

while cur < num_lines:

798

274

header = lines[cur]

799

275

cur += 1

800

start, end, c = [int(n) for n in header.split(b',')]

801

yield start, end, c, lines[cur:cur+c]

276

start, end, c = [int(n) for n in header.split(',')]

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

802

278

cur += c

803

279

804

280

def parse_line_delta(self, lines, version_id):

815

291

Only the actual content lines.

816

292

"""

817

293

lines = iter(lines)

294

next = lines.next

818

295

for header in lines:

819

header = header.split(b',')

296

header = header.split(',')

820

297

count = int(header[2])

821

for _ in range(count):

822

yield next(lines)

298

for i in xrange(count):

299

yield next()

823

300

824

301

def lower_fulltext(self, content):

825

302

return content.text()

827

304

def lower_line_delta(self, delta):

828

305

out = []

829

306

for start, end, c, lines in delta:

830

out.append(b'%d,%d,%d\n' % (start, end, c))

831

out.extend(lines)

307

out.append('%d,%d,%d\n' % (start, end, c))

308

out.extend([text for origin, text in lines])

832

309

return out

833

310

834

def annotate(self, knit, key):

835

annotator = _KnitAnnotator(knit)

836

return annotator.annotate_flat(key)

837

838

839

840

def make_file_factory(annotated, mapper):

841

"""Create a factory for creating a file based KnitVersionedFiles.

842

843

This is only functional enough to run interface tests, it doesn't try to

844

provide a full pack environment.

845

846

:param annotated: knit annotations are wanted.

847

:param mapper: The mapper from keys to paths.

848

"""

849

def factory(transport):

850

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

851

access = _KnitKeyAccess(transport, mapper)

852

return KnitVersionedFiles(index, access, annotated=annotated)

853

return factory

854

855

856

def make_pack_factory(graph, delta, keylength):

857

"""Create a factory for creating a pack based VersionedFiles.

858

859

This is only functional enough to run interface tests, it doesn't try to

860

provide a full pack environment.

861

862

:param graph: Store a graph.

863

:param delta: Delta compress contents.

864

:param keylength: How long should keys be.

865

"""

866

def factory(transport):

867

parents = graph or delta

868

ref_length = 0

869

if graph:

870

ref_length += 1

871

if delta:

872

ref_length += 1

873

max_delta_chain = 200

874

else:

875

max_delta_chain = 0

876

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

877

key_elements=keylength)

878

stream = transport.open_write_stream('newpack')

879

writer = pack.ContainerWriter(stream.write)

880

writer.begin()

881

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

882

deltas=delta, add_callback=graph_index.add_nodes)

883

access = pack_repo._DirectPackAccess({})

884

access.set_writer(writer, graph_index, (transport, 'newpack'))

885

result = KnitVersionedFiles(index, access,

886

max_delta_chain=max_delta_chain)

887

result.stream = stream

888

result.writer = writer

889

return result

890

return factory

891

892

893

def cleanup_pack_knit(versioned_files):

894

versioned_files.stream.close()

895

versioned_files.writer.end()

896

897

898

def _get_total_build_size(self, keys, positions):

899

"""Determine the total bytes to build these keys.

900

901

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

902

don't inherit from a common base.)

903

904

:param keys: Keys that we want to build

905

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

906

as returned by _get_components_positions)

907

:return: Number of bytes to build those keys

908

"""

909

all_build_index_memos = {}

910

build_keys = keys

911

while build_keys:

912

next_keys = set()

913

for key in build_keys:

914

# This is mostly for the 'stacked' case

915

# Where we will be getting the data from a fallback

916

if key not in positions:

917

continue

918

_, index_memo, compression_parent = positions[key]

919

all_build_index_memos[key] = index_memo

920

if compression_parent not in all_build_index_memos:

921

next_keys.add(compression_parent)

922

build_keys = next_keys

923

return sum(index_memo[2]

924

for index_memo in viewvalues(all_build_index_memos))

925

926

927

class KnitVersionedFiles(VersionedFilesWithFallbacks):

928

"""Storage for many versioned files using knit compression.

929

930

Backend storage is managed by indices and data objects.

931

932

:ivar _index: A _KnitGraphIndex or similar that can describe the

933

parents, graph, compression and data location of entries in this

934

KnitVersionedFiles. Note that this is only the index for

935

*this* vfs; if there are fallbacks they must be queried separately.

936

"""

937

938

def __init__(self, index, data_access, max_delta_chain=200,

939

annotated=False, reload_func=None):

940

"""Create a KnitVersionedFiles with index and data_access.

941

942

:param index: The index for the knit data.

943

:param data_access: The access object to store and retrieve knit

944

records.

945

:param max_delta_chain: The maximum number of deltas to permit during

946

insertion. Set to 0 to prohibit the use of deltas.

947

:param annotated: Set to True to cause annotations to be calculated and

948

stored during insertion.

949

:param reload_func: An function that can be called if we think we need

950

to reload the pack listing and try again. See

951

'breezy.bzr.pack_repo.AggregateIndex' for the signature.

311

312

def make_empty_knit(transport, relpath):

313

"""Construct a empty knit at the specified location."""

314

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

315

k._data._open_file()

316

317

318

class KnitVersionedFile(VersionedFile):

319

"""Weave-like structure with faster random access.

320

321

A knit stores a number of texts and a summary of the relationships

322

between them. Texts are identified by a string version-id. Texts

323

are normally stored and retrieved as a series of lines, but can

324

also be passed as single strings.

325

326

Lines are stored with the trailing newline (if any) included, to

327

avoid special cases for files with no final newline. Lines are

328

composed of 8-bit characters, not unicode. The combination of

329

these approaches should mean any 'binary' file can be safely

330

stored and retrieved.

331

"""

332

333

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

334

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

335

create=False, create_parent_dir=False, delay_create=False,

336

dir_mode=None):

337

"""Construct a knit at location specified by relpath.

338

339

:param create: If not True, only open an existing knit.

340

:param create_parent_dir: If True, create the parent directory if

341

creating the file fails. (This is used for stores with

342

hash-prefixes that may not exist yet)

343

:param delay_create: The calling code is aware that the knit won't

344

actually be created until the first data is stored.

952

345

"""

953

self._index = index

954

self._access = data_access

955

self._max_delta_chain = max_delta_chain

956

if annotated:

957

self._factory = KnitAnnotateFactory()

958

else:

959

self._factory = KnitPlainFactory()

960

self._immediate_fallback_vfs = []

961

self._reload_func = reload_func

346

if deprecated_passed(basis_knit):

347

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

348

" deprecated as of bzr 0.9.",

349

DeprecationWarning, stacklevel=2)

350

if access_mode is None:

351

access_mode = 'w'

352

super(KnitVersionedFile, self).__init__(access_mode)

353

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

354

self.transport = transport

355

self.filename = relpath

356

self.factory = factory or KnitAnnotateFactory()

357

self.writable = (access_mode == 'w')

358

self.delta = delta

359

360

self._max_delta_chain = 200

361

362

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

363

access_mode, create=create, file_mode=file_mode,

364

create_parent_dir=create_parent_dir, delay_create=delay_create,

365

dir_mode=dir_mode)

366

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

367

access_mode, create=create and not len(self), file_mode=file_mode,

368

create_parent_dir=create_parent_dir, delay_create=delay_create,

369

dir_mode=dir_mode)

962

370

963

371

def __repr__(self):

964

return "%s(%r, %r)" % (

965

self.__class__.__name__,

966

self._index,

967

self._access)

968

969

def without_fallbacks(self):

970

"""Return a clone of this object without any fallbacks configured."""

971

return KnitVersionedFiles(self._index, self._access,

972

self._max_delta_chain, self._factory.annotated,

973

self._reload_func)

974

975

def add_fallback_versioned_files(self, a_versioned_files):

976

"""Add a source of texts for texts not present in this knit.

977

978

:param a_versioned_files: A VersionedFiles object.

979

"""

980

self._immediate_fallback_vfs.append(a_versioned_files)

981

982

def add_lines(self, key, parents, lines, parent_texts=None,

983

left_matching_blocks=None, nostore_sha=None, random_id=False,

984

check_content=True):

985

"""See VersionedFiles.add_lines()."""

986

self._index._check_write_ok()

987

self._check_add(key, lines, random_id, check_content)

988

if parents is None:

989

# The caller might pass None if there is no graph data, but kndx

990

# indexes can't directly store that, so we give them

991

# an empty tuple instead.

992

parents = ()

993

line_bytes = b''.join(lines)

994

return self._add(key, lines, parents,

995

parent_texts, left_matching_blocks, nostore_sha, random_id,

996

line_bytes=line_bytes)

997

998

def _add(self, key, lines, parents, parent_texts,

999

left_matching_blocks, nostore_sha, random_id,

1000

line_bytes):

1001

"""Add a set of lines on top of version specified by parents.

1002

1003

Any versions not present will be converted into ghosts.

1004

1005

:param lines: A list of strings where each one is a single line (has a

1006

single newline at the end of the string) This is now optional

1007

(callers can pass None). It is left in its location for backwards

1008

compatibility. It should ''.join(lines) must == line_bytes

1009

:param line_bytes: A single string containing the content

1010

1011

We pass both lines and line_bytes because different routes bring the

1012

values to this function. And for memory efficiency, we don't want to

1013

have to split/join on-demand.

1014

"""

1015

# first thing, if the content is something we don't need to store, find

1016

# that out.

1017

digest = sha_string(line_bytes)

1018

if nostore_sha == digest:

1019

raise errors.ExistingContent

1020

1021

present_parents = []

1022

if parent_texts is None:

1023

parent_texts = {}

1024

# Do a single query to ascertain parent presence; we only compress

1025

# against parents in the same kvf.

1026

present_parent_map = self._index.get_parent_map(parents)

1027

for parent in parents:

1028

if parent in present_parent_map:

1029

present_parents.append(parent)

1030

1031

# Currently we can only compress against the left most present parent.

1032

if (len(present_parents) == 0 or

1033

present_parents[0] != parents[0]):

1034

delta = False

1035

else:

1036

# To speed the extract of texts the delta chain is limited

1037

# to a fixed number of deltas. This should minimize both

1038

# I/O and the time spend applying deltas.

1039

delta = self._check_should_delta(present_parents[0])

1040

1041

text_length = len(line_bytes)

1042

options = []

1043

no_eol = False

1044

# Note: line_bytes is not modified to add a newline, that is tracked

1045

# via the no_eol flag. 'lines' *is* modified, because that is the

1046

# general values needed by the Content code.

1047

if line_bytes and not line_bytes.endswith(b'\n'):

1048

options.append(b'no-eol')

1049

no_eol = True

1050

# Copy the existing list, or create a new one

1051

if lines is None:

1052

lines = osutils.split_lines(line_bytes)

1053

else:

1054

lines = lines[:]

1055

# Replace the last line with one that ends in a final newline

1056

lines[-1] = lines[-1] + b'\n'

1057

if lines is None:

1058

lines = osutils.split_lines(line_bytes)

1059

1060

for element in key[:-1]:

1061

if not isinstance(element, bytes):

1062

raise TypeError("key contains non-bytestrings: %r" % (key,))

1063

if key[-1] is None:

1064

key = key[:-1] + (b'sha1:' + digest,)

1065

elif not isinstance(key[-1], bytes):

1066

raise TypeError("key contains non-bytestrings: %r" % (key,))

1067

# Knit hunks are still last-element only

1068

version_id = key[-1]

1069

content = self._factory.make(lines, version_id)

1070

if no_eol:

1071

# Hint to the content object that its text() call should strip the

1072

# EOL.

1073

content._should_strip_eol = True

1074

if delta or (self._factory.annotated and len(present_parents) > 0):

1075

# Merge annotations from parent texts if needed.

1076

delta_hunks = self._merge_annotations(content, present_parents,

1077

parent_texts, delta, self._factory.annotated,

1078

left_matching_blocks)

1079

1080

if delta:

1081

options.append(b'line-delta')

1082

store_lines = self._factory.lower_line_delta(delta_hunks)

1083

size, data = self._record_to_data(key, digest,

1084

store_lines)

1085

else:

1086

options.append(b'fulltext')

1087

# isinstance is slower and we have no hierarchy.

1088

if self._factory.__class__ is KnitPlainFactory:

1089

# Use the already joined bytes saving iteration time in

1090

# _record_to_data.

1091

dense_lines = [line_bytes]

1092

if no_eol:

1093

dense_lines.append(b'\n')

1094

size, data = self._record_to_data(key, digest,

1095

lines, dense_lines)

1096

else:

1097

# get mixed annotation + content and feed it into the

1098

# serialiser.

1099

store_lines = self._factory.lower_fulltext(content)

1100

size, data = self._record_to_data(key, digest,

1101

store_lines)

1102

1103

access_memo = self._access.add_raw_records([(key, size)], data)[0]

1104

self._index.add_records(

1105

((key, options, access_memo, parents),),

1106

random_id=random_id)

1107

return digest, text_length, content

1108

1109

def annotate(self, key):

1110

"""See VersionedFiles.annotate."""

1111

return self._factory.annotate(self, key)

1112

1113

def get_annotator(self):

1114

return _KnitAnnotator(self)

1115

1116

def check(self, progress_bar=None, keys=None):

1117

"""See VersionedFiles.check()."""

1118

if keys is None:

1119

return self._logical_check()

1120

else:

1121

# At the moment, check does not extra work over get_record_stream

1122

return self.get_record_stream(keys, 'unordered', True)

1123

1124

def _logical_check(self):

1125

# This doesn't actually test extraction of everything, but that will

1126

# impact 'bzr check' substantially, and needs to be integrated with

1127

# care. However, it does check for the obvious problem of a delta with

1128

# no basis.

1129

keys = self._index.keys()

1130

parent_map = self.get_parent_map(keys)

1131

for key in keys:

1132

if self._index.get_method(key) != 'fulltext':

1133

compression_parent = parent_map[key][0]

1134

if compression_parent not in parent_map:

1135

raise KnitCorrupt(self,

1136

"Missing basis parent %s for %s" % (

1137

compression_parent, key))

1138

for fallback_vfs in self._immediate_fallback_vfs:

1139

fallback_vfs.check()

1140

1141

def _check_add(self, key, lines, random_id, check_content):

1142

"""check that version_id and lines are safe to add."""

1143

if not all(isinstance(x, bytes) or x is None for x in key):

1144

raise TypeError(key)

1145

version_id = key[-1]

1146

if version_id is not None:

1147

if contains_whitespace(version_id):

1148

raise InvalidRevisionId(version_id, self)

1149

self.check_not_reserved_id(version_id)

1150

# TODO: If random_id==False and the key is already present, we should

1151

# probably check that the existing content is identical to what is

1152

# being inserted, and otherwise raise an exception. This would make

1153

# the bundle code simpler.

1154

if check_content:

1155

self._check_lines_not_unicode(lines)

1156

self._check_lines_are_lines(lines)

1157

1158

def _check_header(self, key, line):

1159

rec = self._split_header(line)

1160

self._check_header_version(rec, key[-1])

1161

return rec

1162

1163

def _check_header_version(self, rec, version_id):

1164

"""Checks the header version on original format knit records.

1165

1166

These have the last component of the key embedded in the record.

1167

"""

1168

if rec[1] != version_id:

1169

raise KnitCorrupt(self,

1170

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

1171

1172

def _check_should_delta(self, parent):

372

return '%s(%s)' % (self.__class__.__name__,

373

self.transport.abspath(self.filename))

374

375

def _check_should_delta(self, first_parents):

1173

376

"""Iterate back through the parent listing, looking for a fulltext.

1174

377

1175

378

This is used when we want to decide whether to add a delta or a new

1182

385

"""

1183

386

delta_size = 0

1184

387

fulltext_size = None

1185

for count in range(self._max_delta_chain):

1186

try:

1187

# Note that this only looks in the index of this particular

1188

# KnitVersionedFiles, not in the fallbacks. This ensures that

1189

# we won't store a delta spanning physical repository

1190

# boundaries.

1191

build_details = self._index.get_build_details([parent])

1192

parent_details = build_details[parent]

1193

except (RevisionNotPresent, KeyError) as e:

1194

# Some basis is not locally present: always fulltext

1195

return False

1196

index_memo, compression_parent, _, _ = parent_details

1197

_, _, size = index_memo

1198

if compression_parent is None:

388

delta_parents = first_parents

389

for count in xrange(self._max_delta_chain):

390

parent = delta_parents[0]

391

method = self._index.get_method(parent)

392

pos, size = self._index.get_position(parent)

393

if method == 'fulltext':

1199

394

fulltext_size = size

1200

395

break

1201

396

delta_size += size

1202

# We don't explicitly check for presence because this is in an

1203

# inner loop, and if it's missing it'll fail anyhow.

1204

parent = compression_parent

397

delta_parents = self._index.get_parents(parent)

1205

398

else:

1206

399

# We couldn't find a fulltext, so we must create a new one

1207

400

return False

1208

# Simple heuristic - if the total I/O wold be greater as a delta than

1209

# the originally installed fulltext, we create a new fulltext.

401

1210

402

return fulltext_size > delta_size

1211

403

1212

def _build_details_to_components(self, build_details):

1213

"""Convert a build_details tuple to a position tuple."""

1214

# record_details, access_memo, compression_parent

1215

return build_details[3], build_details[0], build_details[1]

1216

1217

def _get_components_positions(self, keys, allow_missing=False):

1218

"""Produce a map of position data for the components of keys.

1219

1220

This data is intended to be used for retrieving the knit records.

1221

1222

A dict of key to (record_details, index_memo, next, parents) is

1223

returned.

1224

1225

* method is the way referenced data should be applied.

1226

* index_memo is the handle to pass to the data access to actually get

1227

the data

1228

* next is the build-parent of the version, or None for fulltexts.

1229

* parents is the version_ids of the parents of this version

1230

1231

:param allow_missing: If True do not raise an error on a missing

1232

component, just ignore it.

1233

"""

1234

component_data = {}

1235

pending_components = keys

1236

while pending_components:

1237

build_details = self._index.get_build_details(pending_components)

1238

current_components = set(pending_components)

1239

pending_components = set()

1240

for key, details in viewitems(build_details):

1241

(index_memo, compression_parent, parents,

1242

record_details) = details

1243

method = record_details[0]

1244

if compression_parent is not None:

1245

pending_components.add(compression_parent)

1246

component_data[key] = self._build_details_to_components(details)

1247

missing = current_components.difference(build_details)

1248

if missing and not allow_missing:

1249

raise errors.RevisionNotPresent(missing.pop(), self)

1250

return component_data

1251

1252

def _get_content(self, key, parent_texts={}):

1253

"""Returns a content object that makes up the specified

1254

version."""

1255

cached_version = parent_texts.get(key, None)

1256

if cached_version is not None:

1257

# Ensure the cache dict is valid.

1258

if not self.get_parent_map([key]):

1259

raise RevisionNotPresent(key, self)

1260

return cached_version

1261

generator = _VFContentMapGenerator(self, [key])

1262

return generator._get_content(key)

1263

1264

def get_parent_map(self, keys):

1265

"""Get a map of the graph parents of keys.

1266

1267

:param keys: The keys to look up parents for.

1268

:return: A mapping from keys to parents. Absent keys are absent from

1269

the mapping.

1270

"""

1271

return self._get_parent_map_with_sources(keys)[0]

1272

1273

def _get_parent_map_with_sources(self, keys):

1274

"""Get a map of the parents of keys.

1275

1276

:param keys: The keys to look up parents for.

1277

:return: A tuple. The first element is a mapping from keys to parents.

1278

Absent keys are absent from the mapping. The second element is a

1279

list with the locations each key was found in. The first element

1280

is the in-this-knit parents, the second the first fallback source,

1281

and so on.

1282

"""

1283

result = {}

1284

sources = [self._index] + self._immediate_fallback_vfs

1285

source_results = []

1286

missing = set(keys)

1287

for source in sources:

1288

if not missing:

1289

break

1290

new_result = source.get_parent_map(missing)

1291

source_results.append(new_result)

1292

result.update(new_result)

1293

missing.difference_update(set(new_result))

1294

return result, source_results

1295

1296

def _get_record_map(self, keys, allow_missing=False):

1297

"""Produce a dictionary of knit records.

1298

1299

:return: {key:(record, record_details, digest, next)}

1300

1301

* record: data returned from read_records (a KnitContentobject)

1302

* record_details: opaque information to pass to parse_record

1303

* digest: SHA1 digest of the full text after all steps are done

1304

* next: build-parent of the version, i.e. the leftmost ancestor.

1305

Will be None if the record is not a delta.

1306

1307

:param keys: The keys to build a map for

1308

:param allow_missing: If some records are missing, rather than

1309

error, just return the data that could be generated.

1310

"""

1311

raw_map = self._get_record_map_unparsed(keys,

1312

allow_missing=allow_missing)

1313

return self._raw_map_to_record_map(raw_map)

1314

1315

def _raw_map_to_record_map(self, raw_map):

1316

"""Parse the contents of _get_record_map_unparsed.

1317

1318

:return: see _get_record_map.

1319

"""

1320

result = {}

1321

for key in raw_map:

1322

data, record_details, next = raw_map[key]

1323

content, digest = self._parse_record(key[-1], data)

1324

result[key] = content, record_details, digest, next

1325

return result

1326

1327

def _get_record_map_unparsed(self, keys, allow_missing=False):

1328

"""Get the raw data for reconstructing keys without parsing it.

1329

1330

:return: A dict suitable for parsing via _raw_map_to_record_map.

1331

key-> raw_bytes, (method, noeol), compression_parent

1332

"""

1333

# This retries the whole request if anything fails. Potentially we

1334

# could be a bit more selective. We could track the keys whose records

1335

# we have successfully found, and then only request the new records

1336

# from there. However, _get_components_positions grabs the whole build

1337

# chain, which means we'll likely try to grab the same records again

1338

# anyway. Also, can the build chains change as part of a pack

1339

# operation? We wouldn't want to end up with a broken chain.

1340

while True:

1341

try:

1342

position_map = self._get_components_positions(keys,

1343

allow_missing=allow_missing)

1344

# key = component_id, r = record_details, i_m = index_memo,

1345

# n = next

1346

records = [(key, i_m) for key, (r, i_m, n)

1347

in viewitems(position_map)]

1348

# Sort by the index memo, so that we request records from the

1349

# same pack file together, and in forward-sorted order

1350

records.sort(key=operator.itemgetter(1))

1351

raw_record_map = {}

1352

for key, data in self._read_records_iter_unchecked(records):

1353

(record_details, index_memo, next) = position_map[key]

1354

raw_record_map[key] = data, record_details, next

1355

return raw_record_map

1356

except errors.RetryWithNewPacks as e:

1357

self._access.reload_or_raise(e)

1358

1359

@classmethod

1360

def _split_by_prefix(cls, keys):

1361

"""For the given keys, split them up based on their prefix.

1362

1363

To keep memory pressure somewhat under control, split the

1364

requests back into per-file-id requests, otherwise "bzr co"

1365

extracts the full tree into memory before writing it to disk.

1366

This should be revisited if _get_content_maps() can ever cross

1367

file-id boundaries.

1368

1369

The keys for a given file_id are kept in the same relative order.

1370

Ordering between file_ids is not, though prefix_order will return the

1371

order that the key was first seen.

1372

1373

:param keys: An iterable of key tuples

1374

:return: (split_map, prefix_order)

1375

split_map A dictionary mapping prefix => keys

1376

prefix_order The order that we saw the various prefixes

1377

"""

1378

split_by_prefix = {}

1379

prefix_order = []

1380

for key in keys:

1381

if len(key) == 1:

1382

prefix = b''

1383

else:

1384

prefix = key[0]

1385

1386

if prefix in split_by_prefix:

1387

split_by_prefix[prefix].append(key)

1388

else:

1389

split_by_prefix[prefix] = [key]

1390

prefix_order.append(prefix)

1391

return split_by_prefix, prefix_order

1392

1393

def _group_keys_for_io(self, keys, non_local_keys, positions,

1394

_min_buffer_size=_STREAM_MIN_BUFFER_SIZE):

1395

"""For the given keys, group them into 'best-sized' requests.

1396

1397

The idea is to avoid making 1 request per file, but to never try to

1398

unpack an entire 1.5GB source tree in a single pass. Also when

1399

possible, we should try to group requests to the same pack file

1400

together.

1401

1402

:return: list of (keys, non_local) tuples that indicate what keys

1403

should be fetched next.

1404

"""

1405

# TODO: Ideally we would group on 2 factors. We want to extract texts

1406

# from the same pack file together, and we want to extract all

1407

# the texts for a given build-chain together. Ultimately it

1408

# probably needs a better global view.

1409

total_keys = len(keys)

1410

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1411

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1412

cur_keys = []

1413

cur_non_local = set()

1414

cur_size = 0

1415

result = []

1416

sizes = []

1417

for prefix in prefix_order:

1418

keys = prefix_split_keys[prefix]

1419

non_local = prefix_split_non_local_keys.get(prefix, [])

1420

1421

this_size = self._index._get_total_build_size(keys, positions)

1422

cur_size += this_size

1423

cur_keys.extend(keys)

1424

cur_non_local.update(non_local)

1425

if cur_size > _min_buffer_size:

1426

result.append((cur_keys, cur_non_local))

1427

sizes.append(cur_size)

1428

cur_keys = []

1429

cur_non_local = set()

1430

cur_size = 0

1431

if cur_keys:

1432

result.append((cur_keys, cur_non_local))

1433

sizes.append(cur_size)

1434

return result

1435

1436

def get_record_stream(self, keys, ordering, include_delta_closure):

1437

"""Get a stream of records for keys.

1438

1439

:param keys: The keys to include.

1440

:param ordering: Either 'unordered' or 'topological'. A topologically

1441

sorted stream has compression parents strictly before their

1442

children.

1443

:param include_delta_closure: If True then the closure across any

1444

compression parents will be included (in the opaque data).

1445

:return: An iterator of ContentFactory objects, each of which is only

1446

valid until the iterator is advanced.

1447

"""

1448

# keys might be a generator

1449

keys = set(keys)

1450

if not keys:

1451

return

1452

if not self._index.has_graph:

1453

# Cannot sort when no graph has been stored.

1454

ordering = 'unordered'

1455

1456

remaining_keys = keys

1457

while True:

1458

try:

1459

keys = set(remaining_keys)

1460

for content_factory in self._get_remaining_record_stream(keys,

1461

ordering, include_delta_closure):

1462

remaining_keys.discard(content_factory.key)

1463

yield content_factory

1464

return

1465

except errors.RetryWithNewPacks as e:

1466

self._access.reload_or_raise(e)

1467

1468

def _get_remaining_record_stream(self, keys, ordering,

1469

include_delta_closure):

1470

"""This function is the 'retry' portion for get_record_stream."""

1471

if include_delta_closure:

1472

positions = self._get_components_positions(keys, allow_missing=True)

1473

else:

1474

build_details = self._index.get_build_details(keys)

1475

# map from key to

1476

# (record_details, access_memo, compression_parent_key)

1477

positions = dict((key, self._build_details_to_components(details))

1478

for key, details in viewitems(build_details))

1479

absent_keys = keys.difference(set(positions))

1480

# There may be more absent keys : if we're missing the basis component

1481

# and are trying to include the delta closure.

1482

# XXX: We should not ever need to examine remote sources because we do

1483

# not permit deltas across versioned files boundaries.

1484

if include_delta_closure:

1485

needed_from_fallback = set()

1486

# Build up reconstructable_keys dict. key:True in this dict means

1487

# the key can be reconstructed.

1488

reconstructable_keys = {}

1489

for key in keys:

1490

# the delta chain

1491

try:

1492

chain = [key, positions[key][2]]

1493

except KeyError:

1494

needed_from_fallback.add(key)

1495

continue

1496

result = True

1497

while chain[-1] is not None:

1498

if chain[-1] in reconstructable_keys:

1499

result = reconstructable_keys[chain[-1]]

1500

break

1501

else:

1502

try:

1503

chain.append(positions[chain[-1]][2])

1504

except KeyError:

1505

# missing basis component

1506

needed_from_fallback.add(chain[-1])

1507

result = True

1508

break

1509

for chain_key in chain[:-1]:

1510

reconstructable_keys[chain_key] = result

1511

if not result:

1512

needed_from_fallback.add(key)

1513

# Double index lookups here : need a unified api ?

1514

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1515

if ordering in ('topological', 'groupcompress'):

1516

if ordering == 'topological':

1517

# Global topological sort

1518

present_keys = tsort.topo_sort(global_map)

1519

else:

1520

present_keys = sort_groupcompress(global_map)

1521

# Now group by source:

1522

source_keys = []

1523

current_source = None

1524

for key in present_keys:

1525

for parent_map in parent_maps:

1526

if key in parent_map:

1527

key_source = parent_map

1528

break

1529

if current_source is not key_source:

1530

source_keys.append((key_source, []))

1531

current_source = key_source

1532

source_keys[-1][1].append(key)

1533

else:

1534

if ordering != 'unordered':

1535

raise AssertionError('valid values for ordering are:'

1536

' "unordered", "groupcompress" or "topological" not: %r'

1537

% (ordering,))

1538

# Just group by source; remote sources first.

1539

present_keys = []

1540

source_keys = []

1541

for parent_map in reversed(parent_maps):

1542

source_keys.append((parent_map, []))

1543

for key in parent_map:

1544

present_keys.append(key)

1545

source_keys[-1][1].append(key)

1546

# We have been requested to return these records in an order that

1547

# suits us. So we ask the index to give us an optimally sorted

1548

# order.

1549

for source, sub_keys in source_keys:

1550

if source is parent_maps[0]:

1551

# Only sort the keys for this VF

1552

self._index._sort_keys_by_io(sub_keys, positions)

1553

absent_keys = keys - set(global_map)

1554

for key in absent_keys:

1555

yield AbsentContentFactory(key)

1556

# restrict our view to the keys we can answer.

1557

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1558

# XXX: At that point we need to consider the impact of double reads by

1559

# utilising components multiple times.

1560

if include_delta_closure:

1561

# XXX: get_content_maps performs its own index queries; allow state

1562

# to be passed in.

1563

non_local_keys = needed_from_fallback - absent_keys

1564

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1565

non_local_keys,

1566

positions):

1567

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1568

global_map,

1569

ordering=ordering)

1570

for record in generator.get_record_stream():

1571

yield record

1572

else:

1573

for source, keys in source_keys:

1574

if source is parent_maps[0]:

1575

# this KnitVersionedFiles

1576

records = [(key, positions[key][1]) for key in keys]

1577

for key, raw_data in self._read_records_iter_unchecked(records):

1578

(record_details, index_memo, _) = positions[key]

1579

yield KnitContentFactory(key, global_map[key],

1580

record_details, None, raw_data, self._factory.annotated, None)

1581

else:

1582

vf = self._immediate_fallback_vfs[parent_maps.index(source) - 1]

1583

for record in vf.get_record_stream(keys, ordering,

1584

include_delta_closure):

1585

yield record

1586

1587

def get_sha1s(self, keys):

1588

"""See VersionedFiles.get_sha1s()."""

1589

missing = set(keys)

1590

record_map = self._get_record_map(missing, allow_missing=True)

1591

result = {}

1592

for key, details in viewitems(record_map):

1593

if key not in missing:

1594

continue

1595

# record entry 2 is the 'digest'.

1596

result[key] = details[2]

1597

missing.difference_update(set(result))

1598

for source in self._immediate_fallback_vfs:

1599

if not missing:

1600

break

1601

new_result = source.get_sha1s(missing)

1602

result.update(new_result)

1603

missing.difference_update(set(new_result))

1604

return result

1605

1606

def insert_record_stream(self, stream):

1607

"""Insert a record stream into this container.

1608

1609

:param stream: A stream of records to insert.

1610

:return: None

1611

:seealso VersionedFiles.get_record_stream:

1612

"""

1613

def get_adapter(adapter_key):

1614

try:

1615

return adapters[adapter_key]

1616

except KeyError:

1617

adapter_factory = adapter_registry.get(adapter_key)

1618

adapter = adapter_factory(self)

1619

adapters[adapter_key] = adapter

1620

return adapter

1621

delta_types = set()

1622

if self._factory.annotated:

1623

# self is annotated, we need annotated knits to use directly.

1624

annotated = "annotated-"

1625

convertibles = []

1626

else:

1627

# self is not annotated, but we can strip annotations cheaply.

1628

annotated = ""

1629

convertibles = {"knit-annotated-ft-gz"}

1630

if self._max_delta_chain:

1631

delta_types.add("knit-annotated-delta-gz")

1632

convertibles.add("knit-annotated-delta-gz")

1633

# The set of types we can cheaply adapt without needing basis texts.

1634

native_types = set()

1635

if self._max_delta_chain:

1636

native_types.add("knit-%sdelta-gz" % annotated)

1637

delta_types.add("knit-%sdelta-gz" % annotated)

1638

native_types.add("knit-%sft-gz" % annotated)

1639

knit_types = native_types.union(convertibles)

1640

adapters = {}

1641

# Buffer all index entries that we can't add immediately because their

1642

# basis parent is missing. We don't buffer all because generating

1643

# annotations may require access to some of the new records. However we

1644

# can't generate annotations from new deltas until their basis parent

1645

# is present anyway, so we get away with not needing an index that

1646

# includes the new keys.

1647

1648

# See <http://launchpad.net/bugs/300177> about ordering of compression

1649

# parents in the records - to be conservative, we insist that all

1650

# parents must be present to avoid expanding to a fulltext.

1651

1652

# key = basis_parent, value = index entry to add

1653

buffered_index_entries = {}

1654

for record in stream:

1655

kind = record.storage_kind

1656

if kind.startswith('knit-') and kind.endswith('-gz'):

1657

# Check that the ID in the header of the raw knit bytes matches

1658

# the record metadata.

1659

raw_data = record._raw_record

1660

df, rec = self._parse_record_header(record.key, raw_data)

1661

df.close()

1662

buffered = False

1663

parents = record.parents

1664

if record.storage_kind in delta_types:

1665

# TODO: eventually the record itself should track

1666

# compression_parent

1667

compression_parent = parents[0]

1668

else:

1669

compression_parent = None

1670

# Raise an error when a record is missing.

1671

if record.storage_kind == 'absent':

1672

raise RevisionNotPresent([record.key], self)

1673

elif ((record.storage_kind in knit_types)

1674

and (compression_parent is None

1675

or not self._immediate_fallback_vfs

1676

or compression_parent in self._index

1677

or compression_parent not in self)):

1678

# we can insert the knit record literally if either it has no

1679

# compression parent OR we already have its basis in this kvf

1680

# OR the basis is not present even in the fallbacks. In the

1681

# last case it will either turn up later in the stream and all

1682

# will be well, or it won't turn up at all and we'll raise an

1683

# error at the end.

1684

1685

# TODO: self.__contains__ is somewhat redundant with

1686

# self._index.__contains__; we really want something that directly

1687

# asks if it's only present in the fallbacks. -- mbp 20081119

1688

if record.storage_kind not in native_types:

1689

try:

1690

adapter_key = (record.storage_kind, "knit-delta-gz")

1691

adapter = get_adapter(adapter_key)

1692

except KeyError:

1693

adapter_key = (record.storage_kind, "knit-ft-gz")

1694

adapter = get_adapter(adapter_key)

1695

bytes = adapter.get_bytes(record)

1696

else:

1697

# It's a knit record, it has a _raw_record field (even if

1698

# it was reconstituted from a network stream).

1699

bytes = record._raw_record

1700

options = [record._build_details[0].encode('ascii')]

1701

if record._build_details[1]:

1702

options.append(b'no-eol')

1703

# Just blat it across.

1704

# Note: This does end up adding data on duplicate keys. As

1705

# modern repositories use atomic insertions this should not

1706

# lead to excessive growth in the event of interrupted fetches.

1707

# 'knit' repositories may suffer excessive growth, but as a

1708

# deprecated format this is tolerable. It can be fixed if

1709

# needed by in the kndx index support raising on a duplicate

1710

# add with identical parents and options.

1711

access_memo = self._access.add_raw_records(

1712

[(record.key, len(bytes))], bytes)[0]

1713

index_entry = (record.key, options, access_memo, parents)

1714

if b'fulltext' not in options:

1715

# Not a fulltext, so we need to make sure the compression

1716

# parent will also be present.

1717

# Note that pack backed knits don't need to buffer here

1718

# because they buffer all writes to the transaction level,

1719

# but we don't expose that difference at the index level. If

1720

# the query here has sufficient cost to show up in

1721

# profiling we should do that.

1722

1723

# They're required to be physically in this

1724

# KnitVersionedFiles, not in a fallback.

1725

if compression_parent not in self._index:

1726

pending = buffered_index_entries.setdefault(

1727

compression_parent, [])

1728

pending.append(index_entry)

1729

buffered = True

1730

if not buffered:

1731

self._index.add_records([index_entry])

1732

elif record.storage_kind == 'chunked':

1733

self.add_lines(record.key, parents,

1734

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1735

else:

1736

# Not suitable for direct insertion as a

1737

# delta, either because it's not the right format, or this

1738

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1739

# 0) or because it depends on a base only present in the

1740

# fallback kvfs.

1741

self._access.flush()

1742

try:

1743

# Try getting a fulltext directly from the record.

1744

bytes = record.get_bytes_as('fulltext')

1745

except errors.UnavailableRepresentation:

1746

adapter_key = record.storage_kind, 'fulltext'

1747

adapter = get_adapter(adapter_key)

1748

bytes = adapter.get_bytes(record)

1749

lines = split_lines(bytes)

1750

try:

1751

self.add_lines(record.key, parents, lines)

1752

except errors.RevisionAlreadyPresent:

1753

pass

1754

# Add any records whose basis parent is now available.

1755

if not buffered:

1756

added_keys = [record.key]

1757

while added_keys:

1758

key = added_keys.pop(0)

1759

if key in buffered_index_entries:

1760

index_entries = buffered_index_entries[key]

1761

self._index.add_records(index_entries)

1762

added_keys.extend(

1763

[index_entry[0] for index_entry in index_entries])

1764

del buffered_index_entries[key]

1765

if buffered_index_entries:

1766

# There were index entries buffered at the end of the stream,

1767

# So these need to be added (if the index supports holding such

1768

# entries for later insertion)

1769

all_entries = []

1770

for key in buffered_index_entries:

1771

index_entries = buffered_index_entries[key]

1772

all_entries.extend(index_entries)

1773

self._index.add_records(

1774

all_entries, missing_compression_parents=True)

1775

1776

def get_missing_compression_parent_keys(self):

1777

"""Return an iterable of keys of missing compression parents.

1778

1779

Check this after calling insert_record_stream to find out if there are

1780

any missing compression parents. If there are, the records that

1781

depend on them are not able to be inserted safely. For atomic

1782

KnitVersionedFiles built on packs, the transaction should be aborted or

1783

suspended - commit will fail at this point. Nonatomic knits will error

1784

earlier because they have no staging area to put pending entries into.

1785

"""

1786

return self._index.get_missing_compression_parents()

1787

1788

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1789

"""Iterate over the lines in the versioned files from keys.

1790

1791

This may return lines from other keys. Each item the returned

1792

iterator yields is a tuple of a line and a text version that that line

1793

is present in (not introduced in).

1794

1795

Ordering of results is in whatever order is most suitable for the

1796

underlying storage format.

1797

1798

If a progress bar is supplied, it may be used to indicate progress.

1799

The caller is responsible for cleaning up progress bars (because this

1800

is an iterator).

1801

1802

NOTES:

1803

* Lines are normalised by the underlying store: they will all have \\n

1804

terminators.

1805

* Lines are returned in arbitrary order.

1806

* If a requested key did not change any lines (or didn't have any

1807

lines), it may not be mentioned at all in the result.

1808

1809

:param pb: Progress bar supplied by caller.

1810

:return: An iterator over (line, key).

1811

"""

1812

if pb is None:

1813

pb = ui.ui_factory.nested_progress_bar()

1814

keys = set(keys)

1815

total = len(keys)

1816

done = False

1817

while not done:

1818

try:

1819

# we don't care about inclusions, the caller cares.

1820

# but we need to setup a list of records to visit.

1821

# we need key, position, length

1822

key_records = []

1823

build_details = self._index.get_build_details(keys)

1824

for key, details in viewitems(build_details):

1825

if key in keys:

1826

key_records.append((key, details[0]))

1827

records_iter = enumerate(self._read_records_iter(key_records))

1828

for (key_idx, (key, data, sha_value)) in records_iter:

1829

pb.update(gettext('Walking content'), key_idx, total)

1830

compression_parent = build_details[key][1]

1831

if compression_parent is None:

1832

# fulltext

1833

line_iterator = self._factory.get_fulltext_content(data)

1834

else:

1835

# Delta

1836

line_iterator = self._factory.get_linedelta_content(data)

1837

# Now that we are yielding the data for this key, remove it

1838

# from the list

1839

keys.remove(key)

1840

# XXX: It might be more efficient to yield (key,

1841

# line_iterator) in the future. However for now, this is a

1842

# simpler change to integrate into the rest of the

1843

# codebase. RBC 20071110

1844

for line in line_iterator:

1845

yield line, key

1846

done = True

1847

except errors.RetryWithNewPacks as e:

1848

self._access.reload_or_raise(e)

1849

# If there are still keys we've not yet found, we look in the fallback

1850

# vfs, and hope to find them there. Note that if the keys are found

1851

# but had no changes or no content, the fallback may not return

1852

# anything.

1853

if keys and not self._immediate_fallback_vfs:

1854

# XXX: strictly the second parameter is meant to be the file id

1855

# but it's not easily accessible here.

1856

raise RevisionNotPresent(keys, repr(self))

1857

for source in self._immediate_fallback_vfs:

1858

if not keys:

1859

break

1860

source_keys = set()

1861

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1862

source_keys.add(key)

1863

yield line, key

1864

keys.difference_update(source_keys)

1865

pb.update(gettext('Walking content'), total, total)

1866

1867

def _make_line_delta(self, delta_seq, new_content):

1868

"""Generate a line delta from delta_seq and new_content."""

1869

diff_hunks = []

1870

for op in delta_seq.get_opcodes():

1871

if op[0] == 'equal':

1872

continue

1873

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1874

return diff_hunks

404

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

405

"""See VersionedFile._add_delta()."""

406

self._check_add(version_id, []) # should we check the lines ?

407

self._check_versions_present(parents)

408

present_parents = []

409

ghosts = []

410

parent_texts = {}

411

for parent in parents:

412

if not self.has_version(parent):

413

ghosts.append(parent)

414

else:

415

present_parents.append(parent)

416

417

if delta_parent is None:

418

# reconstitute as full text.

419

assert len(delta) == 1 or len(delta) == 0

420

if len(delta):

421

assert delta[0][0] == 0

422

assert delta[0][1] == 0, delta[0][1]

423

return super(KnitVersionedFile, self)._add_delta(version_id,

424

parents,

425

delta_parent,

426

sha1,

427

noeol,

428

delta)

429

430

digest = sha1

431

432

options = []

433

if noeol:

434

options.append('no-eol')

435

436

if delta_parent is not None:

437

# determine the current delta chain length.

438

# To speed the extract of texts the delta chain is limited

439

# to a fixed number of deltas. This should minimize both

440

# I/O and the time spend applying deltas.

441

# The window was changed to a maximum of 200 deltas, but also added

442

# was a check that the total compressed size of the deltas is

443

# smaller than the compressed size of the fulltext.

444

if not self._check_should_delta([delta_parent]):

445

# We don't want a delta here, just do a normal insertion.

446

return super(KnitVersionedFile, self)._add_delta(version_id,

447

parents,

448

delta_parent,

449

sha1,

450

noeol,

451

delta)

452

453

options.append('line-delta')

454

store_lines = self.factory.lower_line_delta(delta)

455

456

where, size = self._data.add_record(version_id, digest, store_lines)

457

self._index.add_version(version_id, options, where, size, parents)

458

459

def _add_raw_records(self, records, data):

460

"""Add all the records 'records' with data pre-joined in 'data'.

461

462

:param records: A list of tuples(version_id, options, parents, size).

463

:param data: The data for the records. When it is written, the records

464

are adjusted to have pos pointing into data by the sum of

465

the preceding records sizes.

466

"""

467

# write all the data

468

pos = self._data.add_raw_record(data)

469

offset = 0

470

index_entries = []

471

for (version_id, options, parents, size) in records:

472

index_entries.append((version_id, options, pos+offset,

473

size, parents))

474

if self._data._do_cache:

475

self._data._cache[version_id] = data[offset:offset+size]

476

offset += size

477

self._index.add_versions(index_entries)

478

479

def enable_cache(self):

480

"""Start caching data for this knit"""

481

self._data.enable_cache()

482

483

def clear_cache(self):

484

"""Clear the data cache only."""

485

self._data.clear_cache()

486

487

def copy_to(self, name, transport):

488

"""See VersionedFile.copy_to()."""

489

# copy the current index to a temp index to avoid racing with local

490

# writes

491

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

492

self.transport.get(self._index._filename))

493

# copy the data file

494

f = self._data._open_file()

495

try:

496

transport.put_file(name + DATA_SUFFIX, f)

497

finally:

498

f.close()

499

# move the copied index into place

500

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

501

502

def create_empty(self, name, transport, mode=None):

503

return KnitVersionedFile(name, transport, factory=self.factory,

504

delta=self.delta, create=True)

505

506

def _fix_parents(self, version_id, new_parents):

507

"""Fix the parents list for version.

508

509

This is done by appending a new version to the index

510

with identical data except for the parents list.

511

the parents list must be a superset of the current

512

list.

513

"""

514

current_values = self._index._cache[version_id]

515

assert set(current_values[4]).difference(set(new_parents)) == set()

516

self._index.add_version(version_id,

517

current_values[1],

518

current_values[2],

519

current_values[3],

520

new_parents)

521

522

def get_delta(self, version_id):

523

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

525

self.check_not_reserved_id(version_id)

526

if not self.has_version(version_id):

527

raise RevisionNotPresent(version_id, self.filename)

528

529

parents = self.get_parents(version_id)

530

if len(parents):

531

parent = parents[0]

532

else:

533

parent = None

534

data_pos, data_size = self._index.get_position(version_id)

535

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

536

noeol = 'no-eol' in self._index.get_options(version_id)

537

if 'fulltext' == self._index.get_method(version_id):

538

new_content = self.factory.parse_fulltext(data, version_id)

539

if parent is not None:

540

reference_content = self._get_content(parent)

541

old_texts = reference_content.text()

542

else:

543

old_texts = []

544

new_texts = new_content.text()

545

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

546

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

547

else:

548

delta = self.factory.parse_line_delta(data, version_id)

549

return parent, sha1, noeol, delta

550

551

def get_graph_with_ghosts(self):

552

"""See VersionedFile.get_graph_with_ghosts()."""

553

graph_items = self._index.get_graph()

554

return dict(graph_items)

555

556

def get_sha1(self, version_id):

557

"""See VersionedFile.get_sha1()."""

558

version_id = osutils.safe_revision_id(version_id)

559

record_map = self._get_record_map([version_id])

560

method, content, digest, next = record_map[version_id]

561

return digest

562

563

@staticmethod

564

def get_suffixes():

565

"""See VersionedFile.get_suffixes()."""

566

return [DATA_SUFFIX, INDEX_SUFFIX]

567

568

def has_ghost(self, version_id):

569

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

571

# maybe we have it

572

if self.has_version(version_id):

573

return False

574

# optimisable if needed by memoising the _ghosts set.

575

items = self._index.get_graph()

576

for node, parents in items:

577

for parent in parents:

578

if parent not in self._index._cache:

579

if parent == version_id:

580

return True

581

return False

582

583

def versions(self):

584

"""See VersionedFile.versions."""

585

return self._index.get_versions()

586

587

def has_version(self, version_id):

588

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

590

return self._index.has_version(version_id)

591

592

__contains__ = has_version

1875

593

1876

594

def _merge_annotations(self, content, parents, parent_texts={},

1877

delta=None, annotated=None,

1878

left_matching_blocks=None):

1879

"""Merge annotations for content and generate deltas.

1880

1881

This is done by comparing the annotations based on changes to the text

1882

and generating a delta on the resulting full texts. If annotations are

1883

not being created then a simple delta is created.

595

delta=None, annotated=None):

596

"""Merge annotations for content. This is done by comparing

597

the annotations based on changed to the text.

1884

598

"""

1885

if left_matching_blocks is not None:

1886

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1887

else:

599

if annotated:

1888

600

delta_seq = None

1889

if annotated:

1890

for parent_key in parents:

1891

merge_content = self._get_content(parent_key, parent_texts)

1892

if (parent_key == parents[0] and delta_seq is not None):

1893

seq = delta_seq

1894

else:

1895

seq = patiencediff.PatienceSequenceMatcher(

1896

None, merge_content.text(), content.text())

601

for parent_id in parents:

602

merge_content = self._get_content(parent_id, parent_texts)

603

seq = patiencediff.PatienceSequenceMatcher(

604

None, merge_content.text(), content.text())

605

if delta_seq is None:

606

# setup a delta seq to reuse.

607

delta_seq = seq

1897

608

for i, j, n in seq.get_matching_blocks():

1898

609

if n == 0:

1899

610

continue

1900

# this copies (origin, text) pairs across to the new

1901

# content for any line that matches the last-checked

1902

# parent.

611

# this appears to copy (origin, text) pairs across to the new

612

# content for any line that matches the last-checked parent.

613

# FIXME: save the sequence control data for delta compression

614

# against the most relevant parent rather than rediffing.

1903

615

content._lines[j:j+n] = merge_content._lines[i:i+n]

1904

# XXX: Robert says the following block is a workaround for a

1905

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1906

if content._lines and not content._lines[-1][1].endswith(b'\n'):

1907

# The copied annotation was from a line without a trailing EOL,

1908

# reinstate one for the content object, to ensure correct

1909

# serialization.

1910

line = content._lines[-1][1] + b'\n'

1911

content._lines[-1] = (content._lines[-1][0], line)

1912

616

if delta:

1913

if delta_seq is None:

617

if not annotated:

1914

618

reference_content = self._get_content(parents[0], parent_texts)

1915

619

new_texts = content.text()

1916

620

old_texts = reference_content.text()

1918

622

None, old_texts, new_texts)

1919

623

return self._make_line_delta(delta_seq, content)

1920

624

1921

def _parse_record(self, version_id, data):

1922

"""Parse an original format knit record.

1923

1924

These have the last element of the key only present in the stored data.

1925

"""

1926

rec, record_contents = self._parse_record_unchecked(data)

1927

self._check_header_version(rec, version_id)

1928

return record_contents, rec[3]

1929

1930

def _parse_record_header(self, key, raw_data):

1931

"""Parse a record header for consistency.

1932

1933

:return: the header and the decompressor stream.

1934

as (stream, header_record)

1935

"""

1936

df = gzip.GzipFile(mode='rb', fileobj=BytesIO(raw_data))

1937

try:

1938

# Current serialise

1939

rec = self._check_header(key, df.readline())

1940

except Exception as e:

1941

raise KnitCorrupt(self,

1942

"While reading {%s} got %s(%s)"

1943

% (key, e.__class__.__name__, str(e)))

1944

return df, rec

1945

1946

def _parse_record_unchecked(self, data):

1947

# profiling notes:

1948

# 4168 calls in 2880 217 internal

1949

# 4168 calls to _parse_record_header in 2121

1950

# 4168 calls to readlines in 330

1951

with gzip.GzipFile(mode='rb', fileobj=BytesIO(data)) as df:

1952

try:

1953

record_contents = df.readlines()

1954

except Exception as e:

1955

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1956

(data, e.__class__.__name__, str(e)))

1957

header = record_contents.pop(0)

1958

rec = self._split_header(header)

1959

last_line = record_contents.pop()

1960

if len(record_contents) != int(rec[2]):

1961

raise KnitCorrupt(self,

1962

'incorrect number of lines %s != %s'

1963

' for version {%s} %s'

1964

% (len(record_contents), int(rec[2]),

1965

rec[1], record_contents))

1966

if last_line != b'end %s\n' % rec[1]:

1967

raise KnitCorrupt(self,

1968

'unexpected version end line %r, wanted %r'

1969

% (last_line, rec[1]))

1970

return rec, record_contents

1971

1972

def _read_records_iter(self, records):

1973

"""Read text records from data file and yield result.

1974

1975

The result will be returned in whatever is the fastest to read.

1976

Not by the order requested. Also, multiple requests for the same

1977

record will only yield 1 response.

1978

1979

:param records: A list of (key, access_memo) entries

1980

:return: Yields (key, contents, digest) in the order

1981

read, not the order requested

1982

"""

1983

if not records:

1984

return

1985

1986

# XXX: This smells wrong, IO may not be getting ordered right.

1987

needed_records = sorted(set(records), key=operator.itemgetter(1))

1988

if not needed_records:

1989

return

1990

1991

# The transport optimizes the fetching as well

1992

# (ie, reads continuous ranges.)

1993

raw_data = self._access.get_raw_records(

1994

[index_memo for key, index_memo in needed_records])

1995

1996

for (key, index_memo), data in zip(needed_records, raw_data):

1997

content, digest = self._parse_record(key[-1], data)

1998

yield key, content, digest

1999

2000

def _read_records_iter_raw(self, records):

2001

"""Read text records from data file and yield raw data.

2002

2003

This unpacks enough of the text record to validate the id is

2004

as expected but thats all.

2005

2006

Each item the iterator yields is (key, bytes,

2007

expected_sha1_of_full_text).

2008

"""

2009

for key, data in self._read_records_iter_unchecked(records):

2010

# validate the header (note that we can only use the suffix in

2011

# current knit records).

2012

df, rec = self._parse_record_header(key, data)

2013

df.close()

2014

yield key, data, rec[3]

2015

2016

def _read_records_iter_unchecked(self, records):

2017

"""Read text records from data file and yield raw data.

2018

2019

No validation is done.

2020

2021

Yields tuples of (key, data).

2022

"""

2023

# setup an iterator of the external records:

2024

# uses readv so nice and fast we hope.

2025

if len(records):

2026

# grab the disk data needed.

2027

needed_offsets = [index_memo for key, index_memo

2028

in records]

2029

raw_records = self._access.get_raw_records(needed_offsets)

2030

2031

for key, index_memo in records:

2032

data = next(raw_records)

2033

yield key, data

2034

2035

def _record_to_data(self, key, digest, lines, dense_lines=None):

2036

"""Convert key, digest, lines into a raw data block.

2037

2038

:param key: The key of the record. Currently keys are always serialised

2039

using just the trailing component.

2040

:param dense_lines: The bytes of lines but in a denser form. For

2041

instance, if lines is a list of 1000 bytestrings each ending in

2042

\\n, dense_lines may be a list with one line in it, containing all

2043

the 1000's lines and their \\n's. Using dense_lines if it is

2044

already known is a win because the string join to create bytes in

2045

this function spends less time resizing the final string.

2046

:return: (len, a BytesIO instance with the raw data ready to read.)

2047

"""

2048

chunks = [b"version %s %d %s\n" % (key[-1], len(lines), digest)]

2049

chunks.extend(dense_lines or lines)

2050

chunks.append(b"end " + key[-1] + b"\n")

2051

for chunk in chunks:

2052

if not isinstance(chunk, bytes):

2053

raise AssertionError(

2054

'data must be plain bytes was %s' % type(chunk))

2055

if lines and not lines[-1].endswith(b'\n'):

2056

raise ValueError('corrupt lines value %r' % lines)

2057

compressed_bytes = b''.join(tuned_gzip.chunks_to_gzip(chunks))

2058

return len(compressed_bytes), compressed_bytes

2059

2060

def _split_header(self, line):

2061

rec = line.split()

2062

if len(rec) != 4:

2063

raise KnitCorrupt(self,

2064

'unexpected number of elements in record header')

2065

return rec

2066

2067

def keys(self):

2068

"""See VersionedFiles.keys."""

2069

if 'evil' in debug.debug_flags:

2070

trace.mutter_callsite(2, "keys scales with size of history")

2071

sources = [self._index] + self._immediate_fallback_vfs

2072

result = set()

2073

for source in sources:

2074

result.update(source.keys())

2075

return result

2076

2077

2078

class _ContentMapGenerator(object):

2079

"""Generate texts or expose raw deltas for a set of texts."""

2080

2081

def __init__(self, ordering='unordered'):

2082

self._ordering = ordering

2083

2084

def _get_content(self, key):

2085

"""Get the content object for key."""

2086

# Note that _get_content is only called when the _ContentMapGenerator

2087

# has been constructed with just one key requested for reconstruction.

2088

if key in self.nonlocal_keys:

2089

record = next(self.get_record_stream())

2090

# Create a content object on the fly

2091

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

2092

return PlainKnitContent(lines, record.key)

625

def _make_line_delta(self, delta_seq, new_content):

626

"""Generate a line delta from delta_seq and new_content."""

627

diff_hunks = []

628

for op in delta_seq.get_opcodes():

629

if op[0] == 'equal':

630

continue

631

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

632

return diff_hunks

633

634

def _get_components_positions(self, version_ids):

635

"""Produce a map of position data for the components of versions.

636

637

This data is intended to be used for retrieving the knit records.

638

639

A dict of version_id to (method, data_pos, data_size, next) is

640

returned.

641

method is the way referenced data should be applied.

642

data_pos is the position of the data in the knit.

643

data_size is the size of the data in the knit.

644

next is the build-parent of the version, or None for fulltexts.

645

"""

646

component_data = {}

647

for version_id in version_ids:

648

cursor = version_id

649

650

while cursor is not None and cursor not in component_data:

651

method = self._index.get_method(cursor)

652

if method == 'fulltext':

653

next = None

654

else:

655

next = self.get_parents(cursor)[0]

656

data_pos, data_size = self._index.get_position(cursor)

657

component_data[cursor] = (method, data_pos, data_size, next)

658

cursor = next

659

return component_data

660

661

def _get_content(self, version_id, parent_texts={}):

662

"""Returns a content object that makes up the specified

663

version."""

664

if not self.has_version(version_id):

665

raise RevisionNotPresent(version_id, self.filename)

666

667

cached_version = parent_texts.get(version_id, None)

668

if cached_version is not None:

669

return cached_version

670

671

text_map, contents_map = self._get_content_maps([version_id])

672

return contents_map[version_id]

673

674

def _check_versions_present(self, version_ids):

675

"""Check that all specified versions are present."""

676

self._index.check_versions_present(version_ids)

677

678

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

679

"""See VersionedFile.add_lines_with_ghosts()."""

680

self._check_add(version_id, lines)

681

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

682

683

def _add_lines(self, version_id, parents, lines, parent_texts):

684

"""See VersionedFile.add_lines."""

685

self._check_add(version_id, lines)

686

self._check_versions_present(parents)

687

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

688

689

def _check_add(self, version_id, lines):

690

"""check that version_id and lines are safe to add."""

691

assert self.writable, "knit is not opened for write"

692

### FIXME escape. RBC 20060228

693

if contains_whitespace(version_id):

694

raise InvalidRevisionId(version_id, self.filename)

695

self.check_not_reserved_id(version_id)

696

if self.has_version(version_id):

697

raise RevisionAlreadyPresent(version_id, self.filename)

698

self._check_lines_not_unicode(lines)

699

self._check_lines_are_lines(lines)

700

701

def _add(self, version_id, lines, parents, delta, parent_texts):

702

"""Add a set of lines on top of version specified by parents.

703

704

If delta is true, compress the text as a line-delta against

705

the first parent.

706

707

Any versions not present will be converted into ghosts.

708

"""

709

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

710

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

711

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

712

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

713

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

714

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

715

# +1383 0 8.0370 8.0370 +<len>

716

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

717

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

718

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

719

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

720

721

present_parents = []

722

ghosts = []

723

if parent_texts is None:

724

parent_texts = {}

725

for parent in parents:

726

if not self.has_version(parent):

727

ghosts.append(parent)

728

else:

729

present_parents.append(parent)

730

731

if delta and not len(present_parents):

732

delta = False

733

734

digest = sha_strings(lines)

735

options = []

736

if lines:

737

if lines[-1][-1] != '\n':

738

options.append('no-eol')

739

lines[-1] = lines[-1] + '\n'

740

741

if len(present_parents) and delta:

742

# To speed the extract of texts the delta chain is limited

743

# to a fixed number of deltas. This should minimize both

744

# I/O and the time spend applying deltas.

745

delta = self._check_should_delta(present_parents)

746

747

assert isinstance(version_id, str)

748

lines = self.factory.make(lines, version_id)

749

if delta or (self.factory.annotated and len(present_parents) > 0):

750

# Merge annotations from parent texts if so is needed.

751

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

752

delta, self.factory.annotated)

753

754

if delta:

755

options.append('line-delta')

756

store_lines = self.factory.lower_line_delta(delta_hunks)

2093

757

else:

2094

# local keys we can ask for directly

2095

return self._get_one_work(key)

2096

2097

def get_record_stream(self):

2098

"""Get a record stream for the keys requested during __init__."""

2099

for record in self._work():

2100

yield record

2101

2102

def _work(self):

2103

"""Produce maps of text and KnitContents as dicts.

2104

758

options.append('fulltext')

759

store_lines = self.factory.lower_fulltext(lines)

760

761

where, size = self._data.add_record(version_id, digest, store_lines)

762

self._index.add_version(version_id, options, where, size, parents)

763

return lines

764

765

def check(self, progress_bar=None):

766

"""See VersionedFile.check()."""

767

768

def _clone_text(self, new_version_id, old_version_id, parents):

769

"""See VersionedFile.clone_text()."""

770

# FIXME RBC 20060228 make fast by only inserting an index with null

771

# delta.

772

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

773

774

def get_lines(self, version_id):

775

"""See VersionedFile.get_lines()."""

776

return self.get_line_list([version_id])[0]

777

778

def _get_record_map(self, version_ids):

779

"""Produce a dictionary of knit records.

780

781

The keys are version_ids, the values are tuples of (method, content,

782

digest, next).

783

method is the way the content should be applied.

784

content is a KnitContent object.

785

digest is the SHA1 digest of this version id after all steps are done

786

next is the build-parent of the version, i.e. the leftmost ancestor.

787

If the method is fulltext, next will be None.

788

"""

789

position_map = self._get_components_positions(version_ids)

790

# c = component_id, m = method, p = position, s = size, n = next

791

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

792

record_map = {}

793

for component_id, content, digest in \

794

self._data.read_records_iter(records):

795

method, position, size, next = position_map[component_id]

796

record_map[component_id] = method, content, digest, next

797

798

return record_map

799

800

def get_text(self, version_id):

801

"""See VersionedFile.get_text"""

802

return self.get_texts([version_id])[0]

803

804

def get_texts(self, version_ids):

805

return [''.join(l) for l in self.get_line_list(version_ids)]

806

807

def get_line_list(self, version_ids):

808

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

810

for version_id in version_ids:

811

self.check_not_reserved_id(version_id)

812

text_map, content_map = self._get_content_maps(version_ids)

813

return [text_map[v] for v in version_ids]

814

815

def _get_content_maps(self, version_ids):

816

"""Produce maps of text and KnitContents

817

2105

818

:return: (text_map, content_map) where text_map contains the texts for

2106

the requested versions and content_map contains the KnitContents.

819

the requested versions and content_map contains the KnitContents.

820

Both dicts take version_ids as their keys.

2107

821

"""

2108

# NB: By definition we never need to read remote sources unless texts

2109

# are requested from them: we don't delta across stores - and we

2110

# explicitly do not want to to prevent data loss situations.

2111

if self.global_map is None:

2112

self.global_map = self.vf.get_parent_map(self.keys)

2113

nonlocal_keys = self.nonlocal_keys

2114

2115

missing_keys = set(nonlocal_keys)

2116

# Read from remote versioned file instances and provide to our caller.

2117

for source in self.vf._immediate_fallback_vfs:

2118

if not missing_keys:

2119

break

2120

# Loop over fallback repositories asking them for texts - ignore

2121

# any missing from a particular fallback.

2122

for record in source.get_record_stream(missing_keys,

2123

self._ordering, True):

2124

if record.storage_kind == 'absent':

2125

# Not in thie particular stream, may be in one of the

2126

# other fallback vfs objects.

2127

continue

2128

missing_keys.remove(record.key)

2129

yield record

2130

2131

if self._raw_record_map is None:

2132

raise AssertionError('_raw_record_map should have been filled')

2133

first = True

2134

for key in self.keys:

2135

if key in self.nonlocal_keys:

2136

continue

2137

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2138

first = False

2139

2140

def _get_one_work(self, requested_key):

2141

# Now, if we have calculated everything already, just return the

2142

# desired text.

2143

if requested_key in self._contents_map:

2144

return self._contents_map[requested_key]

2145

# To simplify things, parse everything at once - code that wants one text

2146

# probably wants them all.

2147

# FUTURE: This function could be improved for the 'extract many' case

2148

# by tracking each component and only doing the copy when the number of

2149

# children than need to apply delta's to it is > 1 or it is part of the

2150

# final output.

2151

multiple_versions = len(self.keys) != 1

2152

if self._record_map is None:

2153

self._record_map = self.vf._raw_map_to_record_map(

2154

self._raw_record_map)

2155

record_map = self._record_map

2156

# raw_record_map is key:

2157

# Have read and parsed records at this point.

2158

for key in self.keys:

2159

if key in self.nonlocal_keys:

2160

# already handled

2161

continue

822

for version_id in version_ids:

823

if not self.has_version(version_id):

824

raise RevisionNotPresent(version_id, self.filename)

825

record_map = self._get_record_map(version_ids)

826

827

text_map = {}

828

content_map = {}

829

final_content = {}

830

for version_id in version_ids:

2162

831

components = []

2163

cursor = key

832

cursor = version_id

2164

833

while cursor is not None:

2165

try:

2166

record, record_details, digest, next = record_map[cursor]

2167

except KeyError:

2168

raise RevisionNotPresent(cursor, self)

2169

components.append((cursor, record, record_details, digest))

2170

cursor = next

2171

if cursor in self._contents_map:

2172

# no need to plan further back

2173

components.append((cursor, None, None, None))

834

method, data, digest, next = record_map[cursor]

835

components.append((cursor, method, data, digest))

836

if cursor in content_map:

2174

837

break

838

cursor = next

2175

839

2176

840

content = None

2177

for (component_id, record, record_details,

2178

digest) in reversed(components):

2179

if component_id in self._contents_map:

2180

content = self._contents_map[component_id]

841

for component_id, method, data, digest in reversed(components):

842

if component_id in content_map:

843

content = content_map[component_id]

2181

844

else:

2182

content, delta = self._factory.parse_record(key[-1],

2183

record, record_details, content,

2184

copy_base_content=multiple_versions)

2185

if multiple_versions:

2186

self._contents_map[component_id] = content

845

if method == 'fulltext':

846

assert content is None

847

content = self.factory.parse_fulltext(data, version_id)

848

elif method == 'line-delta':

849

delta = self.factory.parse_line_delta(data, version_id)

850

content = content.copy()

851

content._lines = self._apply_delta(content._lines,

852

delta)

853

content_map[component_id] = content

854

855

if 'no-eol' in self._index.get_options(version_id):

856

content = content.copy()

857

line = content._lines[-1][1].rstrip('\n')

858

content._lines[-1] = (content._lines[-1][0], line)

859

final_content[version_id] = content

2187

860

2188

861

# digest here is the digest from the last applied component.

2189

862

text = content.text()

2190

actual_sha = sha_strings(text)

2191

if actual_sha != digest:

2192

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2193

if multiple_versions:

2194

return self._contents_map[requested_key]

2195

else:

2196

return content

2197

2198

def _wire_bytes(self):

2199

"""Get the bytes to put on the wire for 'key'.

2200

2201

The first collection of bytes asked for returns the serialised

2202

raw_record_map and the additional details (key, parent) for key.

2203

Subsequent calls return just the additional details (key, parent).

2204

The wire storage_kind given for the first key is 'knit-delta-closure',

2205

For subsequent keys it is 'knit-delta-closure-ref'.

2206

2207

:param key: A key from the content generator.

2208

:return: Bytes to put on the wire.

2209

"""

2210

lines = []

2211

# kind marker for dispatch on the far side,

2212

lines.append(b'knit-delta-closure')

2213

# Annotated or not

2214

if self.vf._factory.annotated:

2215

lines.append(b'annotated')

2216

else:

2217

lines.append(b'')

2218

# then the list of keys

2219

lines.append(b'\t'.join(b'\x00'.join(key) for key in self.keys

2220

if key not in self.nonlocal_keys))

2221

# then the _raw_record_map in serialised form:

2222

map_byte_list = []

2223

# for each item in the map:

2224

# 1 line with key

2225

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2226

# one line with method

2227

# one line with noeol

2228

# one line with next ('' for None)

2229

# one line with byte count of the record bytes

2230

# the record bytes

2231

for key, (record_bytes, (method, noeol), next) in viewitems(

2232

self._raw_record_map):

2233

key_bytes = b'\x00'.join(key)

2234

parents = self.global_map.get(key, None)

2235

if parents is None:

2236

parent_bytes = b'None:'

2237

else:

2238

parent_bytes = b'\t'.join(b'\x00'.join(key) for key in parents)

2239

method_bytes = method.encode('ascii')

2240

if noeol:

2241

noeol_bytes = b"T"

2242

else:

2243

noeol_bytes = b"F"

2244

if next:

2245

next_bytes = b'\x00'.join(next)

2246

else:

2247

next_bytes = b''

2248

map_byte_list.append(b'\n'.join(

2249

[key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2250

b'%d' % len(record_bytes), record_bytes]))

2251

map_bytes = b''.join(map_byte_list)

2252

lines.append(map_bytes)

2253

bytes = b'\n'.join(lines)

2254

return bytes

2255

2256

2257

class _VFContentMapGenerator(_ContentMapGenerator):

2258

"""Content map generator reading from a VersionedFiles object."""

2259

2260

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2261

global_map=None, raw_record_map=None, ordering='unordered'):

2262

"""Create a _ContentMapGenerator.

2263

2264

:param versioned_files: The versioned files that the texts are being

2265

extracted from.

2266

:param keys: The keys to produce content maps for.

2267

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2268

which are known to not be in this knit, but rather in one of the

2269

fallback knits.

2270

:param global_map: The result of get_parent_map(keys) (or a supermap).

2271

This is required if get_record_stream() is to be used.

2272

:param raw_record_map: A unparsed raw record map to use for answering

2273

contents.

2274

"""

2275

_ContentMapGenerator.__init__(self, ordering=ordering)

2276

# The vf to source data from

2277

self.vf = versioned_files

2278

# The keys desired

2279

self.keys = list(keys)

2280

# Keys known to be in fallback vfs objects

2281

if nonlocal_keys is None:

2282

self.nonlocal_keys = set()

2283

else:

2284

self.nonlocal_keys = frozenset(nonlocal_keys)

2285

# Parents data for keys to be returned in get_record_stream

2286

self.global_map = global_map

2287

# The chunked lists for self.keys in text form

2288

self._text_map = {}

2289

# A cache of KnitContent objects used in extracting texts.

2290

self._contents_map = {}

2291

# All the knit records needed to assemble the requested keys as full

2292

# texts.

2293

self._record_map = None

2294

if raw_record_map is None:

2295

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2296

allow_missing=True)

2297

else:

2298

self._raw_record_map = raw_record_map

2299

# the factory for parsing records

2300

self._factory = self.vf._factory

2301

2302

2303

class _NetworkContentMapGenerator(_ContentMapGenerator):

2304

"""Content map generator sourced from a network stream."""

2305

2306

def __init__(self, bytes, line_end):

2307

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2308

self._bytes = bytes

2309

self.global_map = {}

2310

self._raw_record_map = {}

2311

self._contents_map = {}

2312

self._record_map = None

2313

self.nonlocal_keys = []

2314

# Get access to record parsing facilities

2315

self.vf = KnitVersionedFiles(None, None)

2316

start = line_end

2317

# Annotated or not

2318

line_end = bytes.find(b'\n', start)

2319

line = bytes[start:line_end]

2320

start = line_end + 1

2321

if line == b'annotated':

2322

self._factory = KnitAnnotateFactory()

2323

else:

2324

self._factory = KnitPlainFactory()

2325

# list of keys to emit in get_record_stream

2326

line_end = bytes.find(b'\n', start)

2327

line = bytes[start:line_end]

2328

start = line_end + 1

2329

self.keys = [

2330

tuple(segment.split(b'\x00')) for segment in line.split(b'\t')

2331

if segment]

2332

# now a loop until the end. XXX: It would be nice if this was just a

2333

# bunch of the same records as get_record_stream(..., False) gives, but

2334

# there is a decent sized gap stopping that at the moment.

2335

end = len(bytes)

2336

while start < end:

2337

# 1 line with key

2338

line_end = bytes.find(b'\n', start)

2339

key = tuple(bytes[start:line_end].split(b'\x00'))

2340

start = line_end + 1

2341

# 1 line with parents (None: for None, '' for ())

2342

line_end = bytes.find(b'\n', start)

2343

line = bytes[start:line_end]

2344

if line == b'None:':

2345

parents = None

2346

else:

2347

parents = tuple(

2348

tuple(segment.split(b'\x00')) for segment in line.split(b'\t')

2349

if segment)

2350

self.global_map[key] = parents

2351

start = line_end + 1

2352

# one line with method

2353

line_end = bytes.find(b'\n', start)

2354

line = bytes[start:line_end]

2355

method = line

2356

start = line_end + 1

2357

# one line with noeol

2358

line_end = bytes.find(b'\n', start)

2359

line = bytes[start:line_end]

2360

noeol = line == b"T"

2361

start = line_end + 1

2362

# one line with next (b'' for None)

2363

line_end = bytes.find(b'\n', start)

2364

line = bytes[start:line_end]

2365

if not line:

2366

next = None

2367

else:

2368

next = tuple(bytes[start:line_end].split(b'\x00'))

2369

start = line_end + 1

2370

# one line with byte count of the record bytes

2371

line_end = bytes.find(b'\n', start)

2372

line = bytes[start:line_end]

2373

count = int(line)

2374

start = line_end + 1

2375

# the record bytes

2376

record_bytes = bytes[start:start+count]

2377

start = start + count

2378

# put it in the map

2379

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2380

2381

def get_record_stream(self):

2382

"""Get a record stream for for keys requested by the bytestream."""

2383

first = True

2384

for key in self.keys:

2385

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2386

first = False

2387

2388

def _wire_bytes(self):

2389

return self._bytes

2390

2391

2392

class _KndxIndex(object):

2393

"""Manages knit index files

2394

2395

The index is kept in memory and read on startup, to enable

863

if sha_strings(text) != digest:

864

raise KnitCorrupt(self.filename,

865

'sha-1 does not match %s' % version_id)

866

867

text_map[version_id] = text

868

return text_map, final_content

869

870

def iter_lines_added_or_present_in_versions(self, version_ids=None,

871

pb=None):

872

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

873

if version_ids is None:

874

version_ids = self.versions()

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

877

if pb is None:

878

pb = progress.DummyProgress()

879

# we don't care about inclusions, the caller cares.

880

# but we need to setup a list of records to visit.

881

# we need version_id, position, length

882

version_id_records = []

883

requested_versions = set(version_ids)

884

# filter for available versions

885

for version_id in requested_versions:

886

if not self.has_version(version_id):

887

raise RevisionNotPresent(version_id, self.filename)

888

# get a in-component-order queue:

889

for version_id in self.versions():

890

if version_id in requested_versions:

891

data_pos, length = self._index.get_position(version_id)

892

version_id_records.append((version_id, data_pos, length))

893

894

total = len(version_id_records)

895

for version_idx, (version_id, data, sha_value) in \

896

enumerate(self._data.read_records_iter(version_id_records)):

897

pb.update('Walking content.', version_idx, total)

898

method = self._index.get_method(version_id)

899

900

assert method in ('fulltext', 'line-delta')

901

if method == 'fulltext':

902

line_iterator = self.factory.get_fulltext_content(data)

903

else:

904

line_iterator = self.factory.get_linedelta_content(data)

905

for line in line_iterator:

906

yield line

907

908

pb.update('Walking content.', total, total)

909

910

def num_versions(self):

911

"""See VersionedFile.num_versions()."""

912

return self._index.num_versions()

913

914

__len__ = num_versions

915

916

def annotate_iter(self, version_id):

917

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

919

content = self._get_content(version_id)

920

for origin, text in content.annotate_iter():

921

yield origin, text

922

923

def get_parents(self, version_id):

924

"""See VersionedFile.get_parents."""

925

# perf notes:

926

# optimism counts!

927

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

929

try:

930

return self._index.get_parents(version_id)

931

except KeyError:

932

raise RevisionNotPresent(version_id, self.filename)

933

934

def get_parents_with_ghosts(self, version_id):

935

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

937

try:

938

return self._index.get_parents_with_ghosts(version_id)

939

except KeyError:

940

raise RevisionNotPresent(version_id, self.filename)

941

942

def get_ancestry(self, versions):

943

"""See VersionedFile.get_ancestry."""

944

if isinstance(versions, basestring):

945

versions = [versions]

946

if not versions:

947

return []

948

versions = [osutils.safe_revision_id(v) for v in versions]

949

return self._index.get_ancestry(versions)

950

951

def get_ancestry_with_ghosts(self, versions):

952

"""See VersionedFile.get_ancestry_with_ghosts."""

953

if isinstance(versions, basestring):

954

versions = [versions]

955

if not versions:

956

return []

957

versions = [osutils.safe_revision_id(v) for v in versions]

958

return self._index.get_ancestry_with_ghosts(versions)

959

960

#@deprecated_method(zero_eight)

961

def walk(self, version_ids):

962

"""See VersionedFile.walk."""

963

# We take the short path here, and extract all relevant texts

964

# and put them in a weave and let that do all the work. Far

965

# from optimal, but is much simpler.

966

# FIXME RB 20060228 this really is inefficient!

967

from bzrlib.weave import Weave

968

969

w = Weave(self.filename)

970

ancestry = self.get_ancestry(version_ids)

971

sorted_graph = topo_sort(self._index.get_graph())

972

version_list = [vid for vid in sorted_graph if vid in ancestry]

973

974

for version_id in version_list:

975

lines = self.get_lines(version_id)

976

w.add_lines(version_id, self.get_parents(version_id), lines)

977

978

for lineno, insert_id, dset, line in w.walk(version_ids):

979

yield lineno, insert_id, dset, line

980

981

def plan_merge(self, ver_a, ver_b):

982

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

985

ancestors_b = set(self.get_ancestry(ver_b))

986

def status_a(revision, text):

987

if revision in ancestors_b:

988

return 'killed-b', text

989

else:

990

return 'new-a', text

991

992

ancestors_a = set(self.get_ancestry(ver_a))

993

def status_b(revision, text):

994

if revision in ancestors_a:

995

return 'killed-a', text

996

else:

997

return 'new-b', text

998

999

annotated_a = self.annotate(ver_a)

1000

annotated_b = self.annotate(ver_b)

1001

plain_a = [t for (a, t) in annotated_a]

1002

plain_b = [t for (a, t) in annotated_b]

1003

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

1004

a_cur = 0

1005

b_cur = 0

1006

for ai, bi, l in blocks:

1007

# process all mismatched sections

1008

# (last mismatched section is handled because blocks always

1009

# includes a 0-length last block)

1010

for revision, text in annotated_a[a_cur:ai]:

1011

yield status_a(revision, text)

1012

for revision, text in annotated_b[b_cur:bi]:

1013

yield status_b(revision, text)

1014

1015

# and now the matched section

1016

a_cur = ai + l

1017

b_cur = bi + l

1018

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

1019

assert text_a == text_b

1020

yield "unchanged", text_a

1021

1022

1023

class _KnitComponentFile(object):

1024

"""One of the files used to implement a knit database"""

1025

1026

def __init__(self, transport, filename, mode, file_mode=None,

1027

create_parent_dir=False, dir_mode=None):

1028

self._transport = transport

1029

self._filename = filename

1030

self._mode = mode

1031

self._file_mode = file_mode

1032

self._dir_mode = dir_mode

1033

self._create_parent_dir = create_parent_dir

1034

self._need_to_create = False

1035

1036

def _full_path(self):

1037

"""Return the full path to this file."""

1038

return self._transport.base + self._filename

1039

1040

def check_header(self, fp):

1041

line = fp.readline()

1042

if line == '':

1043

# An empty file can actually be treated as though the file doesn't

1044

# exist yet.

1045

raise errors.NoSuchFile(self._full_path())

1046

if line != self.HEADER:

1047

raise KnitHeaderError(badline=line,

1048

filename=self._transport.abspath(self._filename))

1049

1050

def commit(self):

1051

"""Commit is a nop."""

1052

1053

def __repr__(self):

1054

return '%s(%s)' % (self.__class__.__name__, self._filename)

1055

1056

1057

class _KnitIndex(_KnitComponentFile):

1058

"""Manages knit index file.

1059

1060

The index is already kept in memory and read on startup, to enable

2396

1061

fast lookups of revision information. The cursor of the index

2397

1062

file is always pointing to the end, making it easy to append

2398

1063

entries.

2408

1073

2409

1074

Duplicate entries may be written to the index for a single version id

2410

1075

if this is done then the latter one completely replaces the former:

2411

this allows updates to correct version and parent information.

1076

this allows updates to correct version and parent information.

2412

1077

Note that the two entries may share the delta, and that successive

2413

1078

annotations and references MUST point to the first entry.

2414

1079

2415

1080

The index file on disc contains a header, followed by one line per knit

2416

1081

record. The same revision can be present in an index file more than once.

2417

The first occurrence gets assigned a sequence number starting from 0.

2418

1082

The first occurrence gets assigned a sequence number starting from 0.

1083

2419

1084

The format of a single line is

2420

1085

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

2421

1086

REVISION_ID is a utf8-encoded revision id

2422

FLAGS is a comma separated list of flags about the record. Values include

1087

FLAGS is a comma separated list of flags about the record. Values include

2423

1088

no-eol, line-delta, fulltext.

2424

1089

BYTE_OFFSET is the ascii representation of the byte offset in the data file

2425

that the compressed data starts at.

1090

that the the compressed data starts at.

2426

1091

LENGTH is the ascii representation of the length of the data file.

2427

1092

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

2428

1093

REVISION_ID.

2429

1094

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

2430

1095

revision id already in the knit that is a parent of REVISION_ID.

2431

1096

The ' :' marker is the end of record marker.

2432

1097

2433

1098

partial writes:

2434

1099

when a write is interrupted to the index file, it will result in a line

2435

1100

that does not end in ' :'. If the ' :' is not present at the end of a line,

2440

1105

to ensure that records always start on new lines even if the last write was

2441

1106

interrupted. As a result its normal for the last line in the index to be

2442

1107

missing a trailing newline. One can be added with no harmful effects.

2443

2444

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

2445

where prefix is e.g. the (fileid,) for .texts instances or () for

2446

constant-mapped things like .revisions, and the old state is

2447

tuple(history_vector, cache_dict). This is used to prevent having an

2448

ABI change with the C extension that reads .kndx files.

2449

1108

"""

2450

1109

2451

HEADER = b"# bzr knit index 8\n"

2452

2453

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

2454

"""Create a _KndxIndex on transport using mapper."""

2455

self._transport = transport

2456

self._mapper = mapper

2457

self._get_scope = get_scope

2458

self._allow_writes = allow_writes

2459

self._is_locked = is_locked

2460

self._reset_cache()

2461

self.has_graph = True

2462

2463

def add_records(self, records, random_id=False, missing_compression_parents=False):

2464

"""Add multiple records to the index.

2465

2466

:param records: a list of tuples:

2467

(key, options, access_memo, parents).

2468

:param random_id: If True the ids being added were randomly generated

2469

and no check for existence will be performed.

2470

:param missing_compression_parents: If True the records being added are

2471

only compressed against texts already in the index (or inside

2472

records). If False the records all refer to unavailable texts (or

2473

texts inside records) as compression parents.

2474

"""

2475

if missing_compression_parents:

2476

# It might be nice to get the edge of the records. But keys isn't

2477

# _wrong_.

2478

keys = sorted(record[0] for record in records)

2479

raise errors.RevisionNotPresent(keys, self)

2480

paths = {}

2481

for record in records:

2482

key = record[0]

2483

prefix = key[:-1]

2484

path = self._mapper.map(key) + '.kndx'

2485

path_keys = paths.setdefault(path, (prefix, []))

2486

path_keys[1].append(record)

2487

for path in sorted(paths):

2488

prefix, path_keys = paths[path]

2489

self._load_prefixes([prefix])

2490

lines = []

2491

orig_history = self._kndx_cache[prefix][1][:]

2492

orig_cache = self._kndx_cache[prefix][0].copy()

2493

2494

try:

2495

for key, options, (_, pos, size), parents in path_keys:

2496

if not all(isinstance(option, bytes) for option in options):

2497

raise TypeError(options)

2498

if parents is None:

2499

# kndx indices cannot be parentless.

2500

parents = ()

2501

line = b' '.join([

2502

b'\n' + key[-1], b','.join(options), b'%d' % pos, b'%d' % size,

2503

self._dictionary_compress(parents), b':'])

2504

if not isinstance(line, bytes):

2505

raise AssertionError(

2506

'data must be utf8 was %s' % type(line))

2507

lines.append(line)

2508

self._cache_key(key, options, pos, size, parents)

2509

if len(orig_history):

2510

self._transport.append_bytes(path, b''.join(lines))

2511

else:

2512

self._init_index(path, lines)

2513

except:

2514

# If any problems happen, restore the original values and re-raise

2515

self._kndx_cache[prefix] = (orig_cache, orig_history)

2516

raise

2517

2518

def scan_unvalidated_index(self, graph_index):

2519

"""See _KnitGraphIndex.scan_unvalidated_index."""

2520

# Because kndx files do not support atomic insertion via separate index

2521

# files, they do not support this method.

2522

raise NotImplementedError(self.scan_unvalidated_index)

2523

2524

def get_missing_compression_parents(self):

2525

"""See _KnitGraphIndex.get_missing_compression_parents."""

2526

# Because kndx files do not support atomic insertion via separate index

2527

# files, they do not support this method.

2528

raise NotImplementedError(self.get_missing_compression_parents)

2529

2530

def _cache_key(self, key, options, pos, size, parent_keys):

1110

HEADER = "# bzr knit index 8\n"

1111

1112

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1113

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1114

1115

def _cache_version(self, version_id, options, pos, size, parents):

2531

1116

"""Cache a version record in the history array and index cache.

2532

1117

2533

1118

This is inlined into _load_data for performance. KEEP IN SYNC.

2534

1119

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

2535

1120

indexes).

2536

1121

"""

2537

prefix = key[:-1]

2538

version_id = key[-1]

2539

# last-element only for compatibilty with the C load_data.

2540

parents = tuple(parent[-1] for parent in parent_keys)

2541

for parent in parent_keys:

2542

if parent[:-1] != prefix:

2543

raise ValueError("mismatched prefixes for %r, %r" % (

2544

key, parent_keys))

2545

cache, history = self._kndx_cache[prefix]

2546

1122

# only want the _history index to reference the 1st index entry

2547

1123

# for version_id

2548

if version_id not in cache:

2549

index = len(history)

2550

history.append(version_id)

1124

if version_id not in self._cache:

1125

index = len(self._history)

1126

self._history.append(version_id)

2551

1127

else:

2552

index = cache[version_id][5]

2553

cache[version_id] = (version_id,

1128

index = self._cache[version_id][5]

1129

self._cache[version_id] = (version_id,

2554

1130

options,

2555

1131

pos,

2556

1132

size,

2557

1133

parents,

2558

1134

index)

2559

1135

2560

def check_header(self, fp):

2561

line = fp.readline()

2562

if line == b'':

2563

# An empty file can actually be treated as though the file doesn't

2564

# exist yet.

2565

raise errors.NoSuchFile(self)

2566

if line != self.HEADER:

2567

raise KnitHeaderError(badline=line, filename=self)

2568

2569

def _check_read(self):

2570

if not self._is_locked():

2571

raise errors.ObjectNotLocked(self)

2572

if self._get_scope() != self._scope:

2573

self._reset_cache()

2574

2575

def _check_write_ok(self):

2576

"""Assert if not writes are permitted."""

2577

if not self._is_locked():

2578

raise errors.ObjectNotLocked(self)

2579

if self._get_scope() != self._scope:

2580

self._reset_cache()

2581

if self._mode != 'w':

2582

raise errors.ReadOnlyObjectDirtiedError(self)

2583

2584

def get_build_details(self, keys):

2585

"""Get the method, index_memo and compression parent for keys.

2586

2587

Ghosts are omitted from the result.

2588

2589

:param keys: An iterable of keys.

2590

:return: A dict of key:(index_memo, compression_parent, parents,

2591

record_details).

2592

index_memo

2593

opaque structure to pass to read_records to extract the raw

2594

data

2595

compression_parent

2596

Content that this record is built upon, may be None

2597

parents

2598

Logical parents of this node

2599

record_details

2600

extra information about the content which needs to be passed to

2601

Factory.parse_record

2602

"""

2603

parent_map = self.get_parent_map(keys)

2604

result = {}

2605

for key in keys:

2606

if key not in parent_map:

2607

continue # Ghost

2608

method = self.get_method(key)

2609

if not isinstance(method, str):

2610

raise TypeError(method)

2611

parents = parent_map[key]

2612

if method == 'fulltext':

2613

compression_parent = None

2614

else:

2615

compression_parent = parents[0]

2616

noeol = b'no-eol' in self.get_options(key)

2617

index_memo = self.get_position(key)

2618

result[key] = (index_memo, compression_parent,

2619

parents, (method, noeol))

2620

return result

2621

2622

def get_method(self, key):

2623

"""Return compression method of specified key."""

2624

options = self.get_options(key)

2625

if b'fulltext' in options:

2626

return 'fulltext'

2627

elif b'line-delta' in options:

2628

return 'line-delta'

2629

else:

2630

raise KnitIndexUnknownMethod(self, options)

2631

2632

def get_options(self, key):

2633

"""Return a list representing options.

2634

2635

e.g. ['foo', 'bar']

2636

"""

2637

prefix, suffix = self._split_key(key)

2638

self._load_prefixes([prefix])

1136

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1137

create_parent_dir=False, delay_create=False, dir_mode=None):

1138

_KnitComponentFile.__init__(self, transport, filename, mode,

1139

file_mode=file_mode,

1140

create_parent_dir=create_parent_dir,

1141

dir_mode=dir_mode)

1142

self._cache = {}

1143

# position in _history is the 'official' index for a revision

1144

# but the values may have come from a newer entry.

1145

# so - wc -l of a knit index is != the number of unique names

1146

# in the knit.

1147

self._history = []

2639

1148

try:

2640

return self._kndx_cache[prefix][0][suffix][1]

2641

except KeyError:

2642

raise RevisionNotPresent(key, self)

2643

2644

def find_ancestry(self, keys):

2645

"""See CombinedGraphIndex.find_ancestry()"""

2646

prefixes = set(key[:-1] for key in keys)

2647

self._load_prefixes(prefixes)

2648

result = {}

2649

parent_map = {}

2650

missing_keys = set()

2651

pending_keys = list(keys)

2652

# This assumes that keys will not reference parents in a different

2653

# prefix, which is accurate so far.

2654

while pending_keys:

2655

key = pending_keys.pop()

2656

if key in parent_map:

1149

fp = self._transport.get(self._filename)

1150

try:

1151

# _load_data may raise NoSuchFile if the target knit is

1152

# completely empty.

1153

self._load_data(fp)

1154

finally:

1155

fp.close()

1156

except NoSuchFile:

1157

if mode != 'w' or not create:

1158

raise

1159

elif delay_create:

1160

self._need_to_create = True

1161

else:

1162

self._transport.put_bytes_non_atomic(

1163

self._filename, self.HEADER, mode=self._file_mode)

1164

1165

def _load_data(self, fp):

1166

cache = self._cache

1167

history = self._history

1168

1169

self.check_header(fp)

1170

# readlines reads the whole file at once:

1171

# bad for transports like http, good for local disk

1172

# we save 60 ms doing this one change (

1173

# from calling readline each time to calling

1174

# readlines once.

1175

# probably what we want for nice behaviour on

1176

# http is a incremental readlines that yields, or

1177

# a check for local vs non local indexes,

1178

history_top = len(history) - 1

1179

for line in fp.readlines():

1180

rec = line.split()

1181

if len(rec) < 5 or rec[-1] != ':':

1182

# corrupt line.

1183

# FIXME: in the future we should determine if its a

1184

# short write - and ignore it

1185

# or a different failure, and raise. RBC 20060407

2657

1186

continue

2658

prefix = key[:-1]

2659

try:

2660

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2661

except KeyError:

2662

missing_keys.add(key)

2663

else:

2664

parent_keys = tuple([prefix + (suffix,)

2665

for suffix in suffix_parents])

2666

parent_map[key] = parent_keys

2667

pending_keys.extend([p for p in parent_keys

2668

if p not in parent_map])

2669

return parent_map, missing_keys

2670

2671

def get_parent_map(self, keys):

2672

"""Get a map of the parents of keys.

2673

2674

:param keys: The keys to look up parents for.

2675

:return: A mapping from keys to parents. Absent keys are absent from

2676

the mapping.

2677

"""

2678

# Parse what we need to up front, this potentially trades off I/O

2679

# locality (.kndx and .knit in the same block group for the same file

2680

# id) for less checking in inner loops.

2681

prefixes = set(key[:-1] for key in keys)

2682

self._load_prefixes(prefixes)

2683

result = {}

2684

for key in keys:

2685

prefix = key[:-1]

2686

try:

2687

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2688

except KeyError:

2689

pass

2690

else:

2691

result[key] = tuple(prefix + (suffix,) for

2692

suffix in suffix_parents)

2693

return result

2694

2695

def get_position(self, key):

2696

"""Return details needed to access the version.

2697

2698

:return: a tuple (key, data position, size) to hand to the access

2699

logic to get the record.

2700

"""

2701

prefix, suffix = self._split_key(key)

2702

self._load_prefixes([prefix])

2703

entry = self._kndx_cache[prefix][0][suffix]

2704

return key, entry[2], entry[3]

2705

2706

__contains__ = _mod_index._has_key_from_parent_map

2707

2708

def _init_index(self, path, extra_lines=[]):

2709

"""Initialize an index."""

2710

sio = BytesIO()

2711

sio.write(self.HEADER)

2712

sio.writelines(extra_lines)

2713

sio.seek(0)

2714

self._transport.put_file_non_atomic(path, sio,

2715

create_parent_dir=True)

2716

# self._create_parent_dir)

2717

# mode=self._file_mode,

2718

# dir_mode=self._dir_mode)

2719

2720

def keys(self):

2721

"""Get all the keys in the collection.

2722

2723

The keys are not ordered.

2724

"""

2725

result = set()

2726

# Identify all key prefixes.

2727

# XXX: A bit hacky, needs polish.

2728

if isinstance(self._mapper, ConstantMapper):

2729

prefixes = [()]

2730

else:

2731

relpaths = set()

2732

for quoted_relpath in self._transport.iter_files_recursive():

2733

path, ext = os.path.splitext(quoted_relpath)

2734

relpaths.add(path)

2735

prefixes = [self._mapper.unmap(path) for path in relpaths]

2736

self._load_prefixes(prefixes)

2737

for prefix in prefixes:

2738

for suffix in self._kndx_cache[prefix][1]:

2739

result.add(prefix + (suffix,))

2740

return result

2741

2742

def _load_prefixes(self, prefixes):

2743

"""Load the indices for prefixes."""

2744

self._check_read()

2745

for prefix in prefixes:

2746

if prefix not in self._kndx_cache:

2747

# the load_data interface writes to these variables.

2748

self._cache = {}

2749

self._history = []

2750

self._filename = prefix

2751

try:

2752

path = self._mapper.map(prefix) + '.kndx'

2753

with self._transport.get(path) as fp:

2754

# _load_data may raise NoSuchFile if the target knit is

2755

# completely empty.

2756

_load_data(self, fp)

2757

self._kndx_cache[prefix] = (self._cache, self._history)

2758

del self._cache

2759

del self._filename

2760

del self._history

2761

except NoSuchFile:

2762

self._kndx_cache[prefix] = ({}, [])

2763

if isinstance(self._mapper, ConstantMapper):

2764

# preserve behaviour for revisions.kndx etc.

2765

self._init_index(path)

2766

del self._cache

2767

del self._filename

2768

del self._history

2769

2770

missing_keys = _mod_index._missing_keys_from_parent_map

2771

2772

def _partition_keys(self, keys):

2773

"""Turn keys into a dict of prefix:suffix_list."""

2774

result = {}

2775

for key in keys:

2776

prefix_keys = result.setdefault(key[:-1], [])

2777

prefix_keys.append(key[-1])

2778

return result

2779

2780

def _dictionary_compress(self, keys):

2781

"""Dictionary compress keys.

2782

2783

:param keys: The keys to generate references to.

2784

:return: A string representation of keys. keys which are present are

2785

dictionary compressed, and others are emitted as fulltext with a

2786

'.' prefix.

2787

"""

2788

if not keys:

2789

return b''

1187

1188

parents = []

1189

for value in rec[4:-1]:

1190

if value[0] == '.':

1191

# uncompressed reference

1192

parent_id = value[1:]

1193

else:

1194

parent_id = history[int(value)]

1195

parents.append(parent_id)

1196

1197

version_id, options, pos, size = rec[:4]

1198

version_id = version_id

1199

1200

# See self._cache_version

1201

# only want the _history index to reference the 1st

1202

# index entry for version_id

1203

if version_id not in cache:

1204

history_top += 1

1205

index = history_top

1206

history.append(version_id)

1207

else:

1208

index = cache[version_id][5]

1209

cache[version_id] = (version_id,

1210

options.split(','),

1211

int(pos),

1212

int(size),

1213

parents,

1214

index)

1215

# end self._cache_version

1216

1217

def get_graph(self):

1218

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1219

1220

def get_ancestry(self, versions):

1221

"""See VersionedFile.get_ancestry."""

1222

# get a graph of all the mentioned versions:

1223

graph = {}

1224

pending = set(versions)

1225

cache = self._cache

1226

while pending:

1227

version = pending.pop()

1228

# trim ghosts

1229

try:

1230

parents = [p for p in cache[version][4] if p in cache]

1231

except KeyError:

1232

raise RevisionNotPresent(version, self._filename)

1233

# if not completed and not a ghost

1234

pending.update([p for p in parents if p not in graph])

1235

graph[version] = parents

1236

return topo_sort(graph.items())

1237

1238

def get_ancestry_with_ghosts(self, versions):

1239

"""See VersionedFile.get_ancestry_with_ghosts."""

1240

# get a graph of all the mentioned versions:

1241

self.check_versions_present(versions)

1242

cache = self._cache

1243

graph = {}

1244

pending = set(versions)

1245

while pending:

1246

version = pending.pop()

1247

try:

1248

parents = cache[version][4]

1249

except KeyError:

1250

# ghost, fake it

1251

graph[version] = []

1252

else:

1253

# if not completed

1254

pending.update([p for p in parents if p not in graph])

1255

graph[version] = parents

1256

return topo_sort(graph.items())

1257

1258

def num_versions(self):

1259

return len(self._history)

1260

1261

__len__ = num_versions

1262

1263

def get_versions(self):

1264

return self._history

1265

1266

def idx_to_name(self, idx):

1267

return self._history[idx]

1268

1269

def lookup(self, version_id):

1270

assert version_id in self._cache

1271

return self._cache[version_id][5]

1272

1273

def _version_list_to_index(self, versions):

2790

1274

result_list = []

2791

prefix = keys[0][:-1]

2792

cache = self._kndx_cache[prefix][0]

2793

for key in keys:

2794

if key[:-1] != prefix:

2795

# kndx indices cannot refer across partitioned storage.

2796

raise ValueError("mismatched prefixes for %r" % keys)

2797

if key[-1] in cache:

1275

cache = self._cache

1276

for version in versions:

1277

if version in cache:

2798

1278

# -- inlined lookup() --

2799

result_list.append(b'%d' % cache[key[-1]][5])

1279

result_list.append(str(cache[version][5]))

2800

1280

# -- end lookup () --

2801

1281

else:

2802

result_list.append(b'.' + key[-1])

2803

return b' '.join(result_list)

2804

2805

def _reset_cache(self):

2806

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2807

# (cache_dict, history_vector) for parsed kndx files.

2808

self._kndx_cache = {}

2809

self._scope = self._get_scope()

2810

allow_writes = self._allow_writes()

2811

if allow_writes:

2812

self._mode = 'w'

2813

else:

2814

self._mode = 'r'

2815

2816

def _sort_keys_by_io(self, keys, positions):

2817

"""Figure out an optimal order to read the records for the given keys.

2818

2819

Sort keys, grouped by index and sorted by position.

2820

2821

:param keys: A list of keys whose records we want to read. This will be

2822

sorted 'in-place'.

2823

:param positions: A dict, such as the one returned by

2824

_get_components_positions()

2825

:return: None

2826

"""

2827

def get_sort_key(key):

2828

index_memo = positions[key][1]

2829

# Group by prefix and position. index_memo[0] is the key, so it is

2830

# (file_id, revision_id) and we don't want to sort on revision_id,

2831

# index_memo[1] is the position, and index_memo[2] is the size,

2832

# which doesn't matter for the sort

2833

return index_memo[0][:-1], index_memo[1]

2834

return keys.sort(key=get_sort_key)

2835

2836

_get_total_build_size = _get_total_build_size

2837

2838

def _split_key(self, key):

2839

"""Split key into a prefix and suffix."""

2840

# GZ 2018-07-03: This is intentionally either a sequence or bytes?

2841

if isinstance(key, bytes):

2842

return key[:-1], key[-1:]

2843

return key[:-1], key[-1]

2844

2845

2846

class _KnitGraphIndex(object):

2847

"""A KnitVersionedFiles index layered on GraphIndex."""

2848

2849

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2850

add_callback=None, track_external_parent_refs=False):

2851

"""Construct a KnitGraphIndex on a graph_index.

2852

2853

:param graph_index: An implementation of breezy.index.GraphIndex.

2854

:param is_locked: A callback to check whether the object should answer

2855

queries.

2856

:param deltas: Allow delta-compressed records.

2857

:param parents: If True, record knits parents, if not do not record

2858

parents.

2859

:param add_callback: If not None, allow additions to the index and call

2860

this callback with a list of added GraphIndex nodes:

2861

[(node, value, node_refs), ...]

2862

:param is_locked: A callback, returns True if the index is locked and

2863

thus usable.

2864

:param track_external_parent_refs: If True, record all external parent

2865

references parents from added records. These can be retrieved

2866

later by calling get_missing_parents().

2867

"""

2868

self._add_callback = add_callback

2869

self._graph_index = graph_index

2870

self._deltas = deltas

2871

self._parents = parents

2872

if deltas and not parents:

2873

# XXX: TODO: Delta tree and parent graph should be conceptually

2874

# separate.

2875

raise KnitCorrupt(self, "Cannot do delta compression without "

2876

"parent tracking.")

2877

self.has_graph = parents

2878

self._is_locked = is_locked

2879

self._missing_compression_parents = set()

2880

if track_external_parent_refs:

2881

self._key_dependencies = _KeyRefs()

2882

else:

2883

self._key_dependencies = None

2884

2885

def __repr__(self):

2886

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2887

2888

def add_records(self, records, random_id=False,

2889

missing_compression_parents=False):

2890

"""Add multiple records to the index.

2891

2892

This function does not insert data into the Immutable GraphIndex

2893

backing the KnitGraphIndex, instead it prepares data for insertion by

2894

the caller and checks that it is safe to insert then calls

2895

self._add_callback with the prepared GraphIndex nodes.

2896

2897

:param records: a list of tuples:

2898

(key, options, access_memo, parents).

2899

:param random_id: If True the ids being added were randomly generated

2900

and no check for existence will be performed.

2901

:param missing_compression_parents: If True the records being added are

2902

only compressed against texts already in the index (or inside

2903

records). If False the records all refer to unavailable texts (or

2904

texts inside records) as compression parents.

2905

"""

2906

if not self._add_callback:

2907

raise errors.ReadOnlyError(self)

2908

# we hope there are no repositories with inconsistent parentage

2909

# anymore.

2910

2911

keys = {}

2912

compression_parents = set()

2913

key_dependencies = self._key_dependencies

2914

for (key, options, access_memo, parents) in records:

2915

if self._parents:

2916

parents = tuple(parents)

2917

if key_dependencies is not None:

2918

key_dependencies.add_references(key, parents)

2919

index, pos, size = access_memo

2920

if b'no-eol' in options:

2921

value = b'N'

2922

else:

2923

value = b' '

2924

value += b"%d %d" % (pos, size)

2925

if not self._deltas:

2926

if b'line-delta' in options:

2927

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2928

if self._parents:

2929

if self._deltas:

2930

if b'line-delta' in options:

2931

node_refs = (parents, (parents[0],))

2932

if missing_compression_parents:

2933

compression_parents.add(parents[0])

2934

else:

2935

node_refs = (parents, ())

1282

result_list.append('.' + version)

1283

return ' '.join(result_list)

1284

1285

def add_version(self, version_id, options, pos, size, parents):

1286

"""Add a version record to the index."""

1287

self.add_versions(((version_id, options, pos, size, parents),))

1288

1289

def add_versions(self, versions):

1290

"""Add multiple versions to the index.

1291

1292

:param versions: a list of tuples:

1293

(version_id, options, pos, size, parents).

1294

"""

1295

lines = []

1296

orig_history = self._history[:]

1297

orig_cache = self._cache.copy()

1298

1299

try:

1300

for version_id, options, pos, size, parents in versions:

1301

line = "\n%s %s %s %s %s :" % (version_id,

1302

','.join(options),

1303

pos,

1304

size,

1305

self._version_list_to_index(parents))

1306

assert isinstance(line, str), \

1307

'content must be utf-8 encoded: %r' % (line,)

1308

lines.append(line)

1309

self._cache_version(version_id, options, pos, size, parents)

1310

if not self._need_to_create:

1311

self._transport.append_bytes(self._filename, ''.join(lines))

1312

else:

1313

sio = StringIO()

1314

sio.write(self.HEADER)

1315

sio.writelines(lines)

1316

sio.seek(0)

1317

self._transport.put_file_non_atomic(self._filename, sio,

1318

create_parent_dir=self._create_parent_dir,

1319

mode=self._file_mode,

1320

dir_mode=self._dir_mode)

1321

self._need_to_create = False

1322

except:

1323

# If any problems happen, restore the original values and re-raise

1324

self._history = orig_history

1325

self._cache = orig_cache

1326

raise

1327

1328

def has_version(self, version_id):

1329

"""True if the version is in the index."""

1330

return version_id in self._cache

1331

1332

def get_position(self, version_id):

1333

"""Return data position and size of specified version."""

1334

entry = self._cache[version_id]

1335

return entry[2], entry[3]

1336

1337

def get_method(self, version_id):

1338

"""Return compression method of specified version."""

1339

options = self._cache[version_id][1]

1340

if 'fulltext' in options:

1341

return 'fulltext'

1342

else:

1343

if 'line-delta' not in options:

1344

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1345

return 'line-delta'

1346

1347

def get_options(self, version_id):

1348

return self._cache[version_id][1]

1349

1350

def get_parents(self, version_id):

1351

"""Return parents of specified version ignoring ghosts."""

1352

return [parent for parent in self._cache[version_id][4]

1353

if parent in self._cache]

1354

1355

def get_parents_with_ghosts(self, version_id):

1356

"""Return parents of specified version with ghosts."""

1357

return self._cache[version_id][4]

1358

1359

def check_versions_present(self, version_ids):

1360

"""Check that all specified versions are present."""

1361

cache = self._cache

1362

for version_id in version_ids:

1363

if version_id not in cache:

1364

raise RevisionNotPresent(version_id, self._filename)

1365

1366

1367

class _KnitData(_KnitComponentFile):

1368

"""Contents of the knit data file"""

1369

1370

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1371

create_parent_dir=False, delay_create=False,

1372

dir_mode=None):

1373

_KnitComponentFile.__init__(self, transport, filename, mode,

1374

file_mode=file_mode,

1375

create_parent_dir=create_parent_dir,

1376

dir_mode=dir_mode)

1377

self._checked = False

1378

# TODO: jam 20060713 conceptually, this could spill to disk

1379

# if the cached size gets larger than a certain amount

1380

# but it complicates the model a bit, so for now just use

1381

# a simple dictionary

1382

self._cache = {}

1383

self._do_cache = False

1384

if create:

1385

if delay_create:

1386

self._need_to_create = create

1387

else:

1388

self._transport.put_bytes_non_atomic(self._filename, '',

1389

mode=self._file_mode)

1390

1391

def enable_cache(self):

1392

"""Enable caching of reads."""

1393

self._do_cache = True

1394

1395

def clear_cache(self):

1396

"""Clear the record cache."""

1397

self._do_cache = False

1398

self._cache = {}

1399

1400

def _open_file(self):

1401

try:

1402

return self._transport.get(self._filename)

1403

except NoSuchFile:

1404

pass

1405

return None

1406

1407

def _record_to_data(self, version_id, digest, lines):

1408

"""Convert version_id, digest, lines into a raw data block.

1409

1410

:return: (len, a StringIO instance with the raw data ready to read.)

1411

"""

1412

sio = StringIO()

1413

data_file = GzipFile(None, mode='wb', fileobj=sio)

1414

1415

assert isinstance(version_id, str)

1416

data_file.writelines(chain(

1417

["version %s %d %s\n" % (version_id,

1418

len(lines),

1419

digest)],

1420

lines,

1421

["end %s\n" % version_id]))

1422

data_file.close()

1423

length= sio.tell()

1424

1425

sio.seek(0)

1426

return length, sio

1427

1428

def add_raw_record(self, raw_data):

1429

"""Append a prepared record to the data file.

1430

1431

:return: the offset in the data file raw_data was written.

1432

"""

1433

assert isinstance(raw_data, str), 'data must be plain bytes'

1434

if not self._need_to_create:

1435

return self._transport.append_bytes(self._filename, raw_data)

1436

else:

1437

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1438

create_parent_dir=self._create_parent_dir,

1439

mode=self._file_mode,

1440

dir_mode=self._dir_mode)

1441

self._need_to_create = False

1442

return 0

1443

1444

def add_record(self, version_id, digest, lines):

1445

"""Write new text record to disk. Returns the position in the

1446

file where it was written."""

1447

size, sio = self._record_to_data(version_id, digest, lines)

1448

# write to disk

1449

if not self._need_to_create:

1450

start_pos = self._transport.append_file(self._filename, sio)

1451

else:

1452

self._transport.put_file_non_atomic(self._filename, sio,

1453

create_parent_dir=self._create_parent_dir,

1454

mode=self._file_mode,

1455

dir_mode=self._dir_mode)

1456

self._need_to_create = False

1457

start_pos = 0

1458

if self._do_cache:

1459

self._cache[version_id] = sio.getvalue()

1460

return start_pos, size

1461

1462

def _parse_record_header(self, version_id, raw_data):

1463

"""Parse a record header for consistency.

1464

1465

:return: the header and the decompressor stream.

1466

as (stream, header_record)

1467

"""

1468

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1469

try:

1470

rec = self._check_header(version_id, df.readline())

1471

except Exception, e:

1472

raise KnitCorrupt(self._filename,

1473

"While reading {%s} got %s(%s)"

1474

% (version_id, e.__class__.__name__, str(e)))

1475

return df, rec

1476

1477

def _check_header(self, version_id, line):

1478

rec = line.split()

1479

if len(rec) != 4:

1480

raise KnitCorrupt(self._filename,

1481

'unexpected number of elements in record header')

1482

if rec[1] != version_id:

1483

raise KnitCorrupt(self._filename,

1484

'unexpected version, wanted %r, got %r'

1485

% (version_id, rec[1]))

1486

return rec

1487

1488

def _parse_record(self, version_id, data):

1489

# profiling notes:

1490

# 4168 calls in 2880 217 internal

1491

# 4168 calls to _parse_record_header in 2121

1492

# 4168 calls to readlines in 330

1493

df = GzipFile(mode='rb', fileobj=StringIO(data))

1494

1495

try:

1496

record_contents = df.readlines()

1497

except Exception, e:

1498

raise KnitCorrupt(self._filename,

1499

"While reading {%s} got %s(%s)"

1500

% (version_id, e.__class__.__name__, str(e)))

1501

header = record_contents.pop(0)

1502

rec = self._check_header(version_id, header)

1503

1504

last_line = record_contents.pop()

1505

if len(record_contents) != int(rec[2]):

1506

raise KnitCorrupt(self._filename,

1507

'incorrect number of lines %s != %s'

1508

' for version {%s}'

1509

% (len(record_contents), int(rec[2]),

1510

version_id))

1511

if last_line != 'end %s\n' % rec[1]:

1512

raise KnitCorrupt(self._filename,

1513

'unexpected version end line %r, wanted %r'

1514

% (last_line, version_id))

1515

df.close()

1516

return record_contents, rec[3]

1517

1518

def read_records_iter_raw(self, records):

1519

"""Read text records from data file and yield raw data.

1520

1521

This unpacks enough of the text record to validate the id is

1522

as expected but thats all.

1523

"""

1524

# setup an iterator of the external records:

1525

# uses readv so nice and fast we hope.

1526

if len(records):

1527

# grab the disk data needed.

1528

if self._cache:

1529

# Don't check _cache if it is empty

1530

needed_offsets = [(pos, size) for version_id, pos, size

1531

in records

1532

if version_id not in self._cache]

1533

else:

1534

needed_offsets = [(pos, size) for version_id, pos, size

1535

in records]

1536

1537

raw_records = self._transport.readv(self._filename, needed_offsets)

1538

1539

for version_id, pos, size in records:

1540

if version_id in self._cache:

1541

# This data has already been validated

1542

data = self._cache[version_id]

1543

else:

1544

pos, data = raw_records.next()

1545

if self._do_cache:

1546

self._cache[version_id] = data

1547

1548

# validate the header

1549

df, rec = self._parse_record_header(version_id, data)

1550

df.close()

1551

yield version_id, data

1552

1553

def read_records_iter(self, records):

1554

"""Read text records from data file and yield result.

1555

1556

The result will be returned in whatever is the fastest to read.

1557

Not by the order requested. Also, multiple requests for the same

1558

record will only yield 1 response.

1559

:param records: A list of (version_id, pos, len) entries

1560

:return: Yields (version_id, contents, digest) in the order

1561

read, not the order requested

1562

"""

1563

if not records:

1564

return

1565

1566

if self._cache:

1567

# Skip records we have alread seen

1568

yielded_records = set()

1569

needed_records = set()

1570

for record in records:

1571

if record[0] in self._cache:

1572

if record[0] in yielded_records:

1573

continue

1574

yielded_records.add(record[0])

1575

data = self._cache[record[0]]

1576

content, digest = self._parse_record(record[0], data)

1577

yield (record[0], content, digest)

2936

1578

else:

2937

node_refs = (parents, )

2938

else:

2939

if parents:

2940

raise KnitCorrupt(self, "attempt to add node with parents "

2941

"in parentless index.")

2942

node_refs = ()

2943

keys[key] = (value, node_refs)

2944

# check for dups

2945

if not random_id:

2946

present_nodes = self._get_entries(keys)

2947

for (index, key, value, node_refs) in present_nodes:

2948

parents = node_refs[:1]

2949

# Sometimes these are passed as a list rather than a tuple

2950

passed = static_tuple.as_tuples(keys[key])

2951

passed_parents = passed[1][:1]

2952

if (value[0:1] != keys[key][0][0:1] or

2953

parents != passed_parents):

2954

node_refs = static_tuple.as_tuples(node_refs)

2955

raise KnitCorrupt(self, "inconsistent details in add_records"

2956

": %s %s" % ((value, node_refs), passed))

2957

del keys[key]

2958

result = []

2959

if self._parents:

2960

for key, (value, node_refs) in viewitems(keys):

2961

result.append((key, value, node_refs))

2962

else:

2963

for key, (value, node_refs) in viewitems(keys):

2964

result.append((key, value))

2965

self._add_callback(result)

2966

if missing_compression_parents:

2967

# This may appear to be incorrect (it does not check for

2968

# compression parents that are in the existing graph index),

2969

# but such records won't have been buffered, so this is

2970

# actually correct: every entry when

2971

# missing_compression_parents==True either has a missing parent, or

2972

# a parent that is one of the keys in records.

2973

compression_parents.difference_update(keys)

2974

self._missing_compression_parents.update(compression_parents)

2975

# Adding records may have satisfied missing compression parents.

2976

self._missing_compression_parents.difference_update(keys)

2977

2978

def scan_unvalidated_index(self, graph_index):

2979

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2980

2981

This allows this _KnitGraphIndex to keep track of any missing

2982

compression parents we may want to have filled in to make those

2983

indices valid.

2984

2985

:param graph_index: A GraphIndex

2986

"""

2987

if self._deltas:

2988

new_missing = graph_index.external_references(ref_list_num=1)

2989

new_missing.difference_update(self.get_parent_map(new_missing))

2990

self._missing_compression_parents.update(new_missing)

2991

if self._key_dependencies is not None:

2992

# Add parent refs from graph_index (and discard parent refs that

2993

# the graph_index has).

2994

for node in graph_index.iter_all_entries():

2995

self._key_dependencies.add_references(node[1], node[3][0])

2996

2997

def get_missing_compression_parents(self):

2998

"""Return the keys of missing compression parents.

2999

3000

Missing compression parents occur when a record stream was missing

3001

basis texts, or a index was scanned that had missing basis texts.

3002

"""

3003

return frozenset(self._missing_compression_parents)

3004

3005

def get_missing_parents(self):

3006

"""Return the keys of missing parents."""

3007

# If updating this, you should also update

3008

# groupcompress._GCGraphIndex.get_missing_parents

3009

# We may have false positives, so filter those out.

3010

self._key_dependencies.satisfy_refs_for_keys(

3011

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

3012

return frozenset(self._key_dependencies.get_unsatisfied_refs())

3013

3014

def _check_read(self):

3015

"""raise if reads are not permitted."""

3016

if not self._is_locked():

3017

raise errors.ObjectNotLocked(self)

3018

3019

def _check_write_ok(self):

3020

"""Assert if writes are not permitted."""

3021

if not self._is_locked():

3022

raise errors.ObjectNotLocked(self)

3023

3024

def _compression_parent(self, an_entry):

3025

# return the key that an_entry is compressed against, or None

3026

# Grab the second parent list (as deltas implies parents currently)

3027

compression_parents = an_entry[3][1]

3028

if not compression_parents:

3029

return None

3030

if len(compression_parents) != 1:

3031

raise AssertionError(

3032

"Too many compression parents: %r" % compression_parents)

3033

return compression_parents[0]

3034

3035

def get_build_details(self, keys):

3036

"""Get the method, index_memo and compression parent for version_ids.

3037

3038

Ghosts are omitted from the result.

3039

3040

:param keys: An iterable of keys.

3041

:return: A dict of key:

3042

(index_memo, compression_parent, parents, record_details).

3043

index_memo

3044

opaque structure to pass to read_records to extract the raw

3045

data

3046

compression_parent

3047

Content that this record is built upon, may be None

3048

parents

3049

Logical parents of this node

3050

record_details

3051

extra information about the content which needs to be passed to

3052

Factory.parse_record

3053

"""

3054

self._check_read()

3055

result = {}

3056

entries = self._get_entries(keys, False)

3057

for entry in entries:

3058

key = entry[1]

3059

if not self._parents:

3060

parents = ()

3061

else:

3062

parents = entry[3][0]

3063

if not self._deltas:

3064

compression_parent_key = None

3065

else:

3066

compression_parent_key = self._compression_parent(entry)

3067

noeol = (entry[2][0:1] == b'N')

3068

if compression_parent_key:

3069

method = 'line-delta'

3070

else:

3071

method = 'fulltext'

3072

result[key] = (self._node_to_position(entry),

3073

compression_parent_key, parents,

3074

(method, noeol))

3075

return result

3076

3077

def _get_entries(self, keys, check_present=False):

3078

"""Get the entries for keys.

3079

3080

:param keys: An iterable of index key tuples.

3081

"""

3082

keys = set(keys)

3083

found_keys = set()

3084

if self._parents:

3085

for node in self._graph_index.iter_entries(keys):

3086

yield node

3087

found_keys.add(node[1])

3088

else:

3089

# adapt parentless index to the rest of the code.

3090

for node in self._graph_index.iter_entries(keys):

3091

yield node[0], node[1], node[2], ()

3092

found_keys.add(node[1])

3093

if check_present:

3094

missing_keys = keys.difference(found_keys)

3095

if missing_keys:

3096

raise RevisionNotPresent(missing_keys.pop(), self)

3097

3098

def get_method(self, key):

3099

"""Return compression method of specified key."""

3100

return self._get_method(self._get_node(key))

3101

3102

def _get_method(self, node):

3103

if not self._deltas:

3104

return 'fulltext'

3105

if self._compression_parent(node):

3106

return 'line-delta'

3107

else:

3108

return 'fulltext'

3109

3110

def _get_node(self, key):

3111

try:

3112

return list(self._get_entries([key]))[0]

3113

except IndexError:

3114

raise RevisionNotPresent(key, self)

3115

3116

def get_options(self, key):

3117

"""Return a list representing options.

3118

3119

e.g. ['foo', 'bar']

3120

"""

3121

node = self._get_node(key)

3122

options = [self._get_method(node).encode('ascii')]

3123

if node[2][0:1] == b'N':

3124

options.append(b'no-eol')

3125

return options

3126

3127

def find_ancestry(self, keys):

3128

"""See CombinedGraphIndex.find_ancestry()"""

3129

return self._graph_index.find_ancestry(keys, 0)

3130

3131

def get_parent_map(self, keys):

3132

"""Get a map of the parents of keys.

3133

3134

:param keys: The keys to look up parents for.

3135

:return: A mapping from keys to parents. Absent keys are absent from

3136

the mapping.

3137

"""

3138

self._check_read()

3139

nodes = self._get_entries(keys)

3140

result = {}

3141

if self._parents:

3142

for node in nodes:

3143

result[node[1]] = node[3][0]

3144

else:

3145

for node in nodes:

3146

result[node[1]] = None

3147

return result

3148

3149

def get_position(self, key):

3150

"""Return details needed to access the version.

3151

3152

:return: a tuple (index, data position, size) to hand to the access

3153

logic to get the record.

3154

"""

3155

node = self._get_node(key)

3156

return self._node_to_position(node)

3157

3158

__contains__ = _mod_index._has_key_from_parent_map

3159

3160

def keys(self):

3161

"""Get all the keys in the collection.

3162

3163

The keys are not ordered.

3164

"""

3165

self._check_read()

3166

return [node[1] for node in self._graph_index.iter_all_entries()]

3167

3168

missing_keys = _mod_index._missing_keys_from_parent_map

3169

3170

def _node_to_position(self, node):

3171

"""Convert an index value to position details."""

3172

bits = node[2][1:].split(b' ')

3173

return node[0], int(bits[0]), int(bits[1])

3174

3175

def _sort_keys_by_io(self, keys, positions):

3176

"""Figure out an optimal order to read the records for the given keys.

3177

3178

Sort keys, grouped by index and sorted by position.

3179

3180

:param keys: A list of keys whose records we want to read. This will be

3181

sorted 'in-place'.

3182

:param positions: A dict, such as the one returned by

3183

_get_components_positions()

3184

:return: None

3185

"""

3186

def get_index_memo(key):

3187

# index_memo is at offset [1]. It is made up of (GraphIndex,

3188

# position, size). GI is an object, which will be unique for each

3189

# pack file. This causes us to group by pack file, then sort by

3190

# position. Size doesn't matter, but it isn't worth breaking up the

3191

# tuple.

3192

return positions[key][1]

3193

return keys.sort(key=get_index_memo)

3194

3195

_get_total_build_size = _get_total_build_size

3196

3197

3198

class _KnitKeyAccess(object):

3199

"""Access to records in .knit files."""

3200

3201

def __init__(self, transport, mapper):

3202

"""Create a _KnitKeyAccess with transport and mapper.

3203

3204

:param transport: The transport the access object is rooted at.

3205

:param mapper: The mapper used to map keys to .knit files.

3206

"""

3207

self._transport = transport

3208

self._mapper = mapper

3209

3210

def add_raw_records(self, key_sizes, raw_data):

3211

"""Add raw knit bytes to a storage area.

3212

3213

The data is spooled to the container writer in one bytes-record per

3214

raw data item.

3215

3216

:param sizes: An iterable of tuples containing the key and size of each

3217

raw data segment.

3218

:param raw_data: A bytestring containing the data.

3219

:return: A list of memos to retrieve the record later. Each memo is an

3220

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3221

length), where the key is the record key.

3222

"""

3223

if not isinstance(raw_data, bytes):

3224

raise AssertionError(

3225

'data must be plain bytes was %s' % type(raw_data))

3226

result = []

3227

offset = 0

3228

# TODO: This can be tuned for writing to sftp and other servers where

3229

# append() is relatively expensive by grouping the writes to each key

3230

# prefix.

3231

for key, size in key_sizes:

3232

path = self._mapper.map(key)

3233

try:

3234

base = self._transport.append_bytes(path + '.knit',

3235

raw_data[offset:offset+size])

3236

except errors.NoSuchFile:

3237

self._transport.mkdir(osutils.dirname(path))

3238

base = self._transport.append_bytes(path + '.knit',

3239

raw_data[offset:offset+size])

3240

# if base == 0:

3241

# chmod.

3242

offset += size

3243

result.append((key, base, size))

3244

return result

3245

3246

def flush(self):

3247

"""Flush pending writes on this access object.

3248

3249

For .knit files this is a no-op.

3250

"""

3251

pass

3252

3253

def get_raw_records(self, memos_for_retrieval):

3254

"""Get the raw bytes for a records.

3255

3256

:param memos_for_retrieval: An iterable containing the access memo for

3257

retrieving the bytes.

3258

:return: An iterator over the bytes of the records.

3259

"""

3260

# first pass, group into same-index request to minimise readv's issued.

3261

request_lists = []

3262

current_prefix = None

3263

for (key, offset, length) in memos_for_retrieval:

3264

if current_prefix == key[:-1]:

3265

current_list.append((offset, length))

3266

else:

3267

if current_prefix is not None:

3268

request_lists.append((current_prefix, current_list))

3269

current_prefix = key[:-1]

3270

current_list = [(offset, length)]

3271

# handle the last entry

3272

if current_prefix is not None:

3273

request_lists.append((current_prefix, current_list))

3274

for prefix, read_vector in request_lists:

3275

path = self._mapper.map(prefix) + '.knit'

3276

for pos, data in self._transport.readv(path, read_vector):

3277

yield data

3278

3279

3280

def annotate_knit(knit, revision_id):

3281

"""Annotate a knit with no cached annotations.

3282

3283

This implementation is for knits with no cached annotations.

3284

It will work for knits with cached annotations, but this is not

3285

recommended.

1579

needed_records.add(record)

1580

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1581

else:

1582

needed_records = sorted(set(records), key=operator.itemgetter(1))

1583

1584

if not needed_records:

1585

return

1586

1587

# The transport optimizes the fetching as well

1588

# (ie, reads continuous ranges.)

1589

readv_response = self._transport.readv(self._filename,

1590

[(pos, size) for version_id, pos, size in needed_records])

1591

1592

for (version_id, pos, size), (pos, data) in \

1593

izip(iter(needed_records), readv_response):

1594

content, digest = self._parse_record(version_id, data)

1595

if self._do_cache:

1596

self._cache[version_id] = data

1597

yield version_id, content, digest

1598

1599

def read_records(self, records):

1600

"""Read records into a dictionary."""

1601

components = {}

1602

for record_id, content, digest in \

1603

self.read_records_iter(records):

1604

components[record_id] = (content, digest)

1605

return components

1606

1607

1608

class InterKnit(InterVersionedFile):

1609

"""Optimised code paths for knit to knit operations."""

1610

1611

_matching_file_from_factory = KnitVersionedFile

1612

_matching_file_to_factory = KnitVersionedFile

1613

1614

@staticmethod

1615

def is_compatible(source, target):

1616

"""Be compatible with knits. """

1617

try:

1618

return (isinstance(source, KnitVersionedFile) and

1619

isinstance(target, KnitVersionedFile))

1620

except AttributeError:

1621

return False

1622

1623

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1624

"""See InterVersionedFile.join."""

1625

assert isinstance(self.source, KnitVersionedFile)

1626

assert isinstance(self.target, KnitVersionedFile)

1627

1628

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1629

1630

if not version_ids:

1631

return 0

1632

1633

pb = ui.ui_factory.nested_progress_bar()

1634

try:

1635

version_ids = list(version_ids)

1636

if None in version_ids:

1637

version_ids.remove(None)

1638

1639

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1640

this_versions = set(self.target._index.get_versions())

1641

needed_versions = self.source_ancestry - this_versions

1642

cross_check_versions = self.source_ancestry.intersection(this_versions)

1643

mismatched_versions = set()

1644

for version in cross_check_versions:

1645

# scan to include needed parents.

1646

n1 = set(self.target.get_parents_with_ghosts(version))

1647

n2 = set(self.source.get_parents_with_ghosts(version))

1648

if n1 != n2:

1649

# FIXME TEST this check for cycles being introduced works

1650

# the logic is we have a cycle if in our graph we are an

1651

# ancestor of any of the n2 revisions.

1652

for parent in n2:

1653

if parent in n1:

1654

# safe

1655

continue

1656

else:

1657

parent_ancestors = self.source.get_ancestry(parent)

1658

if version in parent_ancestors:

1659

raise errors.GraphCycleError([parent, version])

1660

# ensure this parent will be available later.

1661

new_parents = n2.difference(n1)

1662

needed_versions.update(new_parents.difference(this_versions))

1663

mismatched_versions.add(version)

1664

1665

if not needed_versions and not mismatched_versions:

1666

return 0

1667

full_list = topo_sort(self.source.get_graph())

1668

1669

version_list = [i for i in full_list if (not self.target.has_version(i)

1670

and i in needed_versions)]

1671

1672

# plan the join:

1673

copy_queue = []

1674

copy_queue_records = []

1675

copy_set = set()

1676

for version_id in version_list:

1677

options = self.source._index.get_options(version_id)

1678

parents = self.source._index.get_parents_with_ghosts(version_id)

1679

# check that its will be a consistent copy:

1680

for parent in parents:

1681

# if source has the parent, we must :

1682

# * already have it or

1683

# * have it scheduled already

1684

# otherwise we don't care

1685

assert (self.target.has_version(parent) or

1686

parent in copy_set or

1687

not self.source.has_version(parent))

1688

data_pos, data_size = self.source._index.get_position(version_id)

1689

copy_queue_records.append((version_id, data_pos, data_size))

1690

copy_queue.append((version_id, options, parents))

1691

copy_set.add(version_id)

1692

1693

# data suck the join:

1694

count = 0

1695

total = len(version_list)

1696

raw_datum = []

1697

raw_records = []

1698

for (version_id, raw_data), \

1699

(version_id2, options, parents) in \

1700

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1701

copy_queue):

1702

assert version_id == version_id2, 'logic error, inconsistent results'

1703

count = count + 1

1704

pb.update("Joining knit", count, total)

1705

raw_records.append((version_id, options, parents, len(raw_data)))

1706

raw_datum.append(raw_data)

1707

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1708

1709

for version in mismatched_versions:

1710

# FIXME RBC 20060309 is this needed?

1711

n1 = set(self.target.get_parents_with_ghosts(version))

1712

n2 = set(self.source.get_parents_with_ghosts(version))

1713

# write a combined record to our history preserving the current

1714

# parents as first in the list

1715

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1716

self.target.fix_parents(version, new_parents)

1717

return count

1718

finally:

1719

pb.finished()

1720

1721

1722

InterVersionedFile.register_optimiser(InterKnit)

1723

1724

1725

class WeaveToKnit(InterVersionedFile):

1726

"""Optimised code paths for weave to knit operations."""

1727

1728

_matching_file_from_factory = bzrlib.weave.WeaveFile

1729

_matching_file_to_factory = KnitVersionedFile

1730

1731

@staticmethod

1732

def is_compatible(source, target):

1733

"""Be compatible with weaves to knits."""

1734

try:

1735

return (isinstance(source, bzrlib.weave.Weave) and

1736

isinstance(target, KnitVersionedFile))

1737

except AttributeError:

1738

return False

1739

1740

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1741

"""See InterVersionedFile.join."""

1742

assert isinstance(self.source, bzrlib.weave.Weave)

1743

assert isinstance(self.target, KnitVersionedFile)

1744

1745

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1746

1747

if not version_ids:

1748

return 0

1749

1750

pb = ui.ui_factory.nested_progress_bar()

1751

try:

1752

version_ids = list(version_ids)

1753

1754

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1755

this_versions = set(self.target._index.get_versions())

1756

needed_versions = self.source_ancestry - this_versions

1757

cross_check_versions = self.source_ancestry.intersection(this_versions)

1758

mismatched_versions = set()

1759

for version in cross_check_versions:

1760

# scan to include needed parents.

1761

n1 = set(self.target.get_parents_with_ghosts(version))

1762

n2 = set(self.source.get_parents(version))

1763

# if all of n2's parents are in n1, then its fine.

1764

if n2.difference(n1):

1765

# FIXME TEST this check for cycles being introduced works

1766

# the logic is we have a cycle if in our graph we are an

1767

# ancestor of any of the n2 revisions.

1768

for parent in n2:

1769

if parent in n1:

1770

# safe

1771

continue

1772

else:

1773

parent_ancestors = self.source.get_ancestry(parent)

1774

if version in parent_ancestors:

1775

raise errors.GraphCycleError([parent, version])

1776

# ensure this parent will be available later.

1777

new_parents = n2.difference(n1)

1778

needed_versions.update(new_parents.difference(this_versions))

1779

mismatched_versions.add(version)

1780

1781

if not needed_versions and not mismatched_versions:

1782

return 0

1783

full_list = topo_sort(self.source.get_graph())

1784

1785

version_list = [i for i in full_list if (not self.target.has_version(i)

1786

and i in needed_versions)]

1787

1788

# do the join:

1789

count = 0

1790

total = len(version_list)

1791

for version_id in version_list:

1792

pb.update("Converting to knit", count, total)

1793

parents = self.source.get_parents(version_id)

1794

# check that its will be a consistent copy:

1795

for parent in parents:

1796

# if source has the parent, we must already have it

1797

assert (self.target.has_version(parent))

1798

self.target.add_lines(

1799

version_id, parents, self.source.get_lines(version_id))

1800

count = count + 1

1801

1802

for version in mismatched_versions:

1803

# FIXME RBC 20060309 is this needed?

1804

n1 = set(self.target.get_parents_with_ghosts(version))

1805

n2 = set(self.source.get_parents(version))

1806

# write a combined record to our history preserving the current

1807

# parents as first in the list

1808

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1809

self.target.fix_parents(version, new_parents)

1810

return count

1811

finally:

1812

pb.finished()

1813

1814

1815

InterVersionedFile.register_optimiser(WeaveToKnit)

1816

1817

1818

class KnitSequenceMatcher(difflib.SequenceMatcher):

1819

"""Knit tuned sequence matcher.

1820

1821

This is based on profiling of difflib which indicated some improvements

1822

for our usage pattern.

3286

1823

"""

3287

annotator = _KnitAnnotator(knit)

3288

return iter(annotator.annotate_flat(revision_id))

3289

3290

3291

class _KnitAnnotator(annotate.Annotator):

3292

"""Build up the annotations for a text."""

3293

3294

def __init__(self, vf):

3295

annotate.Annotator.__init__(self, vf)

3296

3297

# TODO: handle Nodes which cannot be extracted

3298

# self._ghosts = set()

3299

3300

# Map from (key, parent_key) => matching_blocks, should be 'use once'

3301

self._matching_blocks = {}

3302

3303

# KnitContent objects

3304

self._content_objects = {}

3305

# The number of children that depend on this fulltext content object

3306

self._num_compression_children = {}

3307

# Delta records that need their compression parent before they can be

3308

# expanded

3309

self._pending_deltas = {}

3310

# Fulltext records that are waiting for their parents fulltexts before

3311

# they can be yielded for annotation

3312

self._pending_annotation = {}

3313

3314

self._all_build_details = {}

3315

3316

def _get_build_graph(self, key):

3317

"""Get the graphs for building texts and annotations.

3318

3319

The data you need for creating a full text may be different than the

3320

data you need to annotate that text. (At a minimum, you need both

3321

parents to create an annotation, but only need 1 parent to generate the

3322

fulltext.)

3323

3324

:return: A list of (key, index_memo) records, suitable for

3325

passing to read_records_iter to start reading in the raw data from

3326

the pack file.

1824

1825

def find_longest_match(self, alo, ahi, blo, bhi):

1826

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1827

1828

If isjunk is not defined:

1829

1830

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1831

alo <= i <= i+k <= ahi

1832

blo <= j <= j+k <= bhi

1833

and for all (i',j',k') meeting those conditions,

1834

k >= k'

1835

i <= i'

1836

and if i == i', j <= j'

1837

1838

In other words, of all maximal matching blocks, return one that

1839

starts earliest in a, and of all those maximal matching blocks that

1840

start earliest in a, return the one that starts earliest in b.

1841

1842

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1843

>>> s.find_longest_match(0, 5, 0, 9)

1844

(0, 4, 5)

1845

1846

If isjunk is defined, first the longest matching block is

1847

determined as above, but with the additional restriction that no

1848

junk element appears in the block. Then that block is extended as

1849

far as possible by matching (only) junk elements on both sides. So

1850

the resulting block never matches on junk except as identical junk

1851

happens to be adjacent to an "interesting" match.

1852

1853

Here's the same example as before, but considering blanks to be

1854

junk. That prevents " abcd" from matching the " abcd" at the tail

1855

end of the second sequence directly. Instead only the "abcd" can

1856

match, and matches the leftmost "abcd" in the second sequence:

1857

1858

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1859

>>> s.find_longest_match(0, 5, 0, 9)

1860

(1, 0, 4)

1861

1862

If no blocks match, return (alo, blo, 0).

1863

1864

>>> s = SequenceMatcher(None, "ab", "c")

1865

>>> s.find_longest_match(0, 2, 0, 1)

1866

(0, 0, 0)

3327

1867

"""

3328

pending = {key}

3329

records = []

3330

ann_keys = set()

3331

self._num_needed_children[key] = 1

3332

while pending:

3333

# get all pending nodes

3334

this_iteration = pending

3335

build_details = self._vf._index.get_build_details(this_iteration)

3336

self._all_build_details.update(build_details)

3337

# new_nodes = self._vf._index._get_entries(this_iteration)

3338

pending = set()

3339

for key, details in viewitems(build_details):

3340

(index_memo, compression_parent, parent_keys,

3341

record_details) = details

3342

self._parent_map[key] = parent_keys

3343

self._heads_provider = None

3344

records.append((key, index_memo))

3345

# Do we actually need to check _annotated_lines?

3346

pending.update([p for p in parent_keys

3347

if p not in self._all_build_details])

3348

if parent_keys:

3349

for parent_key in parent_keys:

3350

if parent_key in self._num_needed_children:

3351

self._num_needed_children[parent_key] += 1

3352

else:

3353

self._num_needed_children[parent_key] = 1

3354

if compression_parent:

3355

if compression_parent in self._num_compression_children:

3356

self._num_compression_children[compression_parent] += 1

3357

else:

3358

self._num_compression_children[compression_parent] = 1

3359

3360

missing_versions = this_iteration.difference(build_details)

3361

if missing_versions:

3362

for key in missing_versions:

3363

if key in self._parent_map and key in self._text_cache:

3364

# We already have this text ready, we just need to

3365

# yield it later so we get it annotated

3366

ann_keys.add(key)

3367

parent_keys = self._parent_map[key]

3368

for parent_key in parent_keys:

3369

if parent_key in self._num_needed_children:

3370

self._num_needed_children[parent_key] += 1

3371

else:

3372

self._num_needed_children[parent_key] = 1

3373

pending.update([p for p in parent_keys

3374

if p not in self._all_build_details])

3375

else:

3376

raise errors.RevisionNotPresent(key, self._vf)

3377

# Generally we will want to read the records in reverse order, because

3378

# we find the parent nodes after the children

3379

records.reverse()

3380

return records, ann_keys

3381

3382

def _get_needed_texts(self, key, pb=None):

3383

# if True or len(self._vf._immediate_fallback_vfs) > 0:

3384

if len(self._vf._immediate_fallback_vfs) > 0:

3385

# If we have fallbacks, go to the generic path

3386

for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):

3387

yield v

3388

return

3389

while True:

1868

1869

# CAUTION: stripping common prefix or suffix would be incorrect.

1870

# E.g.,

1871

# ab

1872

# acab

1873

# Longest matching block is "ab", but if common prefix is

1874

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1875

# strip, so ends up claiming that ab is changed to acab by

1876

# inserting "ca" in the middle. That's minimal but unintuitive:

1877

# "it's obvious" that someone inserted "ac" at the front.

1878

# Windiff ends up at the same place as diff, but by pairing up

1879

# the unique 'b's and then matching the first two 'a's.

1880

1881

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1882

besti, bestj, bestsize = alo, blo, 0

1883

# find longest junk-free match

1884

# during an iteration of the loop, j2len[j] = length of longest

1885

# junk-free match ending with a[i-1] and b[j]

1886

j2len = {}

1887

# nothing = []

1888

b2jget = b2j.get

1889

for i in xrange(alo, ahi):

1890

# look at all instances of a[i] in b; note that because

1891

# b2j has no junk keys, the loop is skipped if a[i] is junk

1892

j2lenget = j2len.get

1893

newj2len = {}

1894

1895

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1896

# following improvement

1897

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1898

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1899

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1900

# to

1901

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1902

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1903

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1904

3390

1905

try:

3391

records, ann_keys = self._get_build_graph(key)

3392

for idx, (sub_key, text, num_lines) in enumerate(

3393

self._extract_texts(records)):

3394

if pb is not None:

3395

pb.update(gettext('annotating'), idx, len(records))

3396

yield sub_key, text, num_lines

3397

for sub_key in ann_keys:

3398

text = self._text_cache[sub_key]

3399

num_lines = len(text) # bad assumption

3400

yield sub_key, text, num_lines

3401

return

3402

except errors.RetryWithNewPacks as e:

3403

self._vf._access.reload_or_raise(e)

3404

# The cached build_details are no longer valid

3405

self._all_build_details.clear()

3406

3407

def _cache_delta_blocks(self, key, compression_parent, delta, lines):

3408

parent_lines = self._text_cache[compression_parent]

3409

blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))

3410

self._matching_blocks[(key, compression_parent)] = blocks

3411

3412

def _expand_record(self, key, parent_keys, compression_parent, record,

3413

record_details):

3414

delta = None

3415

if compression_parent:

3416

if compression_parent not in self._content_objects:

3417

# Waiting for the parent

3418

self._pending_deltas.setdefault(compression_parent, []).append(

3419

(key, parent_keys, record, record_details))

3420

return None

3421

# We have the basis parent, so expand the delta

3422

num = self._num_compression_children[compression_parent]

3423

num -= 1

3424

if num == 0:

3425

base_content = self._content_objects.pop(compression_parent)

3426

self._num_compression_children.pop(compression_parent)

1906

js = b2j[a[i]]

1907

except KeyError:

1908

pass

3427

1909

else:

3428

self._num_compression_children[compression_parent] = num

3429

base_content = self._content_objects[compression_parent]

3430

# It is tempting to want to copy_base_content=False for the last

3431

# child object. However, whenever noeol=False,

3432

# self._text_cache[parent_key] is content._lines. So mutating it

3433

# gives very bad results.

3434

# The alternative is to copy the lines into text cache, but then we

3435

# are copying anyway, so just do it here.

3436

content, delta = self._vf._factory.parse_record(

3437

key, record, record_details, base_content,

3438

copy_base_content=True)

3439

else:

3440

# Fulltext record

3441

content, _ = self._vf._factory.parse_record(

3442

key, record, record_details, None)

3443

if self._num_compression_children.get(key, 0) > 0:

3444

self._content_objects[key] = content

3445

lines = content.text()

3446

self._text_cache[key] = lines

3447

if delta is not None:

3448

self._cache_delta_blocks(key, compression_parent, delta, lines)

3449

return lines

3450

3451

def _get_parent_annotations_and_matches(self, key, text, parent_key):

3452

"""Get the list of annotations for the parent, and the matching lines.

3453

3454

:param text: The opaque value given by _get_needed_texts

3455

:param parent_key: The key for the parent text

3456

:return: (parent_annotations, matching_blocks)

3457

parent_annotations is a list as long as the number of lines in

3458

parent

3459

matching_blocks is a list of (parent_idx, text_idx, len) tuples

3460

indicating which lines match between the two texts

3461

"""

3462

block_key = (key, parent_key)

3463

if block_key in self._matching_blocks:

3464

blocks = self._matching_blocks.pop(block_key)

3465

parent_annotations = self._annotations_cache[parent_key]

3466

return parent_annotations, blocks

3467

return annotate.Annotator._get_parent_annotations_and_matches(self,

3468

key, text, parent_key)

3469

3470

def _process_pending(self, key):

3471

"""The content for 'key' was just processed.

3472

3473

Determine if there is any more pending work to be processed.

3474

"""

3475

to_return = []

3476

if key in self._pending_deltas:

3477

compression_parent = key

3478

children = self._pending_deltas.pop(key)

3479

for child_key, parent_keys, record, record_details in children:

3480

lines = self._expand_record(child_key, parent_keys,

3481

compression_parent,

3482

record, record_details)

3483

if self._check_ready_for_annotations(child_key, parent_keys):

3484

to_return.append(child_key)

3485

# Also check any children that are waiting for this parent to be

3486

# annotation ready

3487

if key in self._pending_annotation:

3488

children = self._pending_annotation.pop(key)

3489

to_return.extend([c for c, p_keys in children

3490

if self._check_ready_for_annotations(c, p_keys)])

3491

return to_return

3492

3493

def _check_ready_for_annotations(self, key, parent_keys):

3494

"""return true if this text is ready to be yielded.

3495

3496

Otherwise, this will return False, and queue the text into

3497

self._pending_annotation

3498

"""

3499

for parent_key in parent_keys:

3500

if parent_key not in self._annotations_cache:

3501

# still waiting on at least one parent text, so queue it up

3502

# Note that if there are multiple parents, we need to wait

3503

# for all of them.

3504

self._pending_annotation.setdefault(parent_key,

3505

[]).append((key, parent_keys))

3506

return False

3507

return True

3508

3509

def _extract_texts(self, records):

3510

"""Extract the various texts needed based on records"""

3511

# We iterate in the order read, rather than a strict order requested

3512

# However, process what we can, and put off to the side things that

3513

# still need parents, cleaning them up when those parents are

3514

# processed.

3515

# Basic data flow:

3516

# 1) As 'records' are read, see if we can expand these records into

3517

# Content objects (and thus lines)

3518

# 2) If a given line-delta is waiting on its compression parent, it

3519

# gets queued up into self._pending_deltas, otherwise we expand

3520

# it, and put it into self._text_cache and self._content_objects

3521

# 3) If we expanded the text, we will then check to see if all

3522

# parents have also been processed. If so, this text gets yielded,

3523

# else this record gets set aside into pending_annotation

3524

# 4) Further, if we expanded the text in (2), we will then check to

3525

# see if there are any children in self._pending_deltas waiting to

3526

# also be processed. If so, we go back to (2) for those

3527

# 5) Further again, if we yielded the text, we can then check if that

3528

# 'unlocks' any of the texts in pending_annotations, which should

3529

# then get yielded as well

3530

# Note that both steps 4 and 5 are 'recursive' in that unlocking one

3531

# compression child could unlock yet another, and yielding a fulltext

3532

# will also 'unlock' the children that are waiting on that annotation.

3533

# (Though also, unlocking 1 parent's fulltext, does not unlock a child

3534

# if other parents are also waiting.)

3535

# We want to yield content before expanding child content objects, so

3536

# that we know when we can re-use the content lines, and the annotation

3537

# code can know when it can stop caching fulltexts, as well.

3538

3539

# Children that are missing their compression parent

3540

pending_deltas = {}

3541

for (key, record, digest) in self._vf._read_records_iter(records):

3542

# ghosts?

3543

details = self._all_build_details[key]

3544

(_, compression_parent, parent_keys, record_details) = details

3545

lines = self._expand_record(key, parent_keys, compression_parent,

3546

record, record_details)

3547

if lines is None:

3548

# Pending delta should be queued up

3549

continue

3550

# At this point, we may be able to yield this content, if all

3551

# parents are also finished

3552

yield_this_text = self._check_ready_for_annotations(key,

3553

parent_keys)

3554

if yield_this_text:

3555

# All parents present

3556

yield key, lines, len(lines)

3557

to_process = self._process_pending(key)

3558

while to_process:

3559

this_process = to_process

3560

to_process = []

3561

for key in this_process:

3562

lines = self._text_cache[key]

3563

yield key, lines, len(lines)

3564

to_process.extend(self._process_pending(key))

3565

3566

try:

3567

from ._knit_load_data_pyx import _load_data_c as _load_data

3568

except ImportError as e:

3569

osutils.failed_to_load_extension(e)

3570

from ._knit_load_data_py import _load_data_py as _load_data

1910

for j in js:

1911

# a[i] matches b[j]

1912

if j >= blo:

1913

if j >= bhi:

1914

break

1915

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1916

if k > bestsize:

1917

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1918

j2len = newj2len

1919

1920

# Extend the best by non-junk elements on each end. In particular,

1921

# "popular" non-junk elements aren't in b2j, which greatly speeds

1922

# the inner loop above, but also means "the best" match so far

1923

# doesn't contain any junk *or* popular non-junk elements.

1924

while besti > alo and bestj > blo and \

1925

not isbjunk(b[bestj-1]) and \

1926

a[besti-1] == b[bestj-1]:

1927

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1928

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1929

not isbjunk(b[bestj+bestsize]) and \

1930

a[besti+bestsize] == b[bestj+bestsize]:

1931

bestsize += 1

1932

1933

# Now that we have a wholly interesting match (albeit possibly

1934

# empty!), we may as well suck up the matching junk on each

1935

# side of it too. Can't think of a good reason not to, and it

1936

# saves post-processing the (possibly considerable) expense of

1937

# figuring out what to do with it. In the case of an empty

1938

# interesting match, this is clearly the right thing to do,

1939

# because no other kind of match is possible in the regions.

1940

while besti > alo and bestj > blo and \

1941

isbjunk(b[bestj-1]) and \

1942

a[besti-1] == b[bestj-1]:

1943

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1944

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1945

isbjunk(b[bestj+bestsize]) and \

1946

a[besti+bestsize] == b[bestj+bestsize]:

1947

bestsize = bestsize + 1

1948

1949

return besti, bestj, bestsize

Older »