/brz/remove-bazaar : revision 1910.2.56

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Aaron Bentley
Date: 2006-09-21 22:02:58 UTC
mto: This revision was merged to the branch mainline in revision 2048.
Revision ID: abentley@panoramicfeedback.com-20060921220258-50026dc4220c8b11

More work on bundles

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

dir.py

dulwich

dulwich/.bzrignore

dulwich/COPYING

dulwich/Makefile

dulwich/README

dulwich/bin

dulwich/bin/dul-daemon

dulwich/bin/dul-receive-pack

dulwich/bin/dul-upload-pack

dulwich/bin/dulwich

dulwich/docs

dulwich/docs/protocol.txt

dulwich/dulwich

dulwich/dulwich/__init__.py

dulwich/dulwich/client.py

dulwich/dulwich/commit.py

dulwich/dulwich/errors.py

dulwich/dulwich/objects.py

dulwich/dulwich/pack.py

dulwich/dulwich/protocol.py

dulwich/dulwich/repo.py

dulwich/dulwich/server.py

dulwich/dulwich/tests

dulwich/dulwich/tests/__init__.py

dulwich/dulwich/tests/data

dulwich/dulwich/tests/data/blobs

dulwich/dulwich/tests/data/blobs/6f670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/blobs/954a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/commits

dulwich/dulwich/tests/data/commits/0d89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/commits/5dac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/commits/60dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/packs

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx

dulwich/dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack

dulwich/dulwich/tests/data/repos

dulwich/dulwich/tests/data/repos/a

dulwich/dulwich/tests/data/repos/a/.git

dulwich/dulwich/tests/data/repos/a/.git/HEAD

dulwich/dulwich/tests/data/repos/a/.git/index

dulwich/dulwich/tests/data/repos/a/.git/objects

dulwich/dulwich/tests/data/repos/a/.git/objects/2a

dulwich/dulwich/tests/data/repos/a/.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91

dulwich/dulwich/tests/data/repos/a/.git/objects/4e

dulwich/dulwich/tests/data/repos/a/.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec

dulwich/dulwich/tests/data/repos/a/.git/objects/4f

dulwich/dulwich/tests/data/repos/a/.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

dulwich/dulwich/tests/data/repos/a/.git/objects/7d

dulwich/dulwich/tests/data/repos/a/.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364

dulwich/dulwich/tests/data/repos/a/.git/objects/a2

dulwich/dulwich/tests/data/repos/a/.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005

dulwich/dulwich/tests/data/repos/a/.git/objects/a9

dulwich/dulwich/tests/data/repos/a/.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097

dulwich/dulwich/tests/data/repos/a/.git/objects/ff

dulwich/dulwich/tests/data/repos/a/.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f

dulwich/dulwich/tests/data/repos/a/.git/objects/info

dulwich/dulwich/tests/data/repos/a/.git/objects/pack

dulwich/dulwich/tests/data/repos/a/.git/refs

dulwich/dulwich/tests/data/repos/a/.git/refs/heads

dulwich/dulwich/tests/data/repos/a/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/a/.git/refs/tags

dulwich/dulwich/tests/data/repos/a/a

dulwich/dulwich/tests/data/repos/a/b

dulwich/dulwich/tests/data/repos/a/c

dulwich/dulwich/tests/data/repos/ooo_merge

dulwich/dulwich/tests/data/repos/ooo_merge/.git

dulwich/dulwich/tests/data/repos/ooo_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/ooo_merge/.git/index

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/ooo_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/ooo_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/ooo_merge/a

dulwich/dulwich/tests/data/repos/ooo_merge/b

dulwich/dulwich/tests/data/repos/ooo_merge/c

dulwich/dulwich/tests/data/repos/simple_merge

dulwich/dulwich/tests/data/repos/simple_merge/.git

dulwich/dulwich/tests/data/repos/simple_merge/.git/HEAD

dulwich/dulwich/tests/data/repos/simple_merge/.git/index

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/info

dulwich/dulwich/tests/data/repos/simple_merge/.git/objects/pack

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/heads/master

dulwich/dulwich/tests/data/repos/simple_merge/.git/refs/tags

dulwich/dulwich/tests/data/repos/simple_merge/a

dulwich/dulwich/tests/data/repos/simple_merge/b

dulwich/dulwich/tests/data/repos/simple_merge/d

dulwich/dulwich/tests/data/repos/simple_merge/e

dulwich/dulwich/tests/data/trees

dulwich/dulwich/tests/data/trees/70c190eb48fa8bbb50ddc692a17b44cb781af7f6

dulwich/dulwich/tests/test_objects.py

dulwich/dulwich/tests/test_pack.py

dulwich/dulwich/tests/test_repository.py

dulwich/setup.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

remote.py

repository.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_ids.py

tests/test_repository.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.weave

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accommodate binaries, perhaps by storing a byte count

100

101

# TODO: function to check whole file

102

103

# TODO: atomically append data, then measure backwards from the cursor

104

# position after writing to work out where it was located. we may need to

105

# bypass python file buffering.

106

107

DATA_SUFFIX = '.knit'

108

INDEX_SUFFIX = '.kndx'

109

110

111

class KnitContent(object):

112

"""Content of a knit version to which deltas can be applied."""

113

114

def __init__(self, lines):

115

self._lines = lines

116

117

def annotate_iter(self):

118

"""Yield tuples of (origin, text) for each content line."""

119

for origin, text in self._lines:

120

yield origin, text

121

122

def annotate(self):

123

"""Return a list of (origin, text) tuples."""

124

return list(self.annotate_iter())

125

126

def line_delta_iter(self, new_lines):

127

"""Generate line-based delta from this content to new_lines."""

128

new_texts = [text for origin, text in new_lines._lines]

129

old_texts = [text for origin, text in self._lines]

130

s = KnitSequenceMatcher(None, old_texts, new_texts)

131

for op in s.get_opcodes():

132

if op[0] == 'equal':

133

continue

134

# ofrom oto length data

135

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

136

137

def line_delta(self, new_lines):

138

return list(self.line_delta_iter(new_lines))

139

140

def text(self):

141

return [text for origin, text in self._lines]

142

143

def copy(self):

144

return KnitContent(self._lines[:])

145

146

147

class _KnitFactory(object):

148

"""Base factory for creating content objects."""

149

150

def make(self, lines, version):

151

num_lines = len(lines)

152

return KnitContent(zip([version] * num_lines, lines))

153

154

155

class KnitAnnotateFactory(_KnitFactory):

156

"""Factory for creating annotated Content objects."""

157

158

annotated = True

159

160

def parse_fulltext(self, content, version):

161

"""Convert fulltext to internal representation

162

163

fulltext content is of the format

164

revid(utf8) plaintext\n

165

internal representation is of the format:

166

(revid, plaintext)

167

"""

168

decode_utf8 = cache_utf8.decode

169

lines = []

170

for line in content:

171

origin, text = line.split(' ', 1)

172

lines.append((decode_utf8(origin), text))

173

return KnitContent(lines)

174

175

def parse_line_delta_iter(self, lines):

176

for result_item in self.parse_line_delta[lines]:

177

yield result_item

178

179

def parse_line_delta(self, lines, version):

180

"""Convert a line based delta into internal representation.

181

182

line delta is in the form of:

183

intstart intend intcount

184

1..count lines:

185

revid(utf8) newline\n

186

internal representation is

187

(start, end, count, [1..count tuples (revid, newline)])

188

"""

189

decode_utf8 = cache_utf8.decode

190

result = []

191

lines = iter(lines)

192

next = lines.next

193

# walk through the lines parsing.

194

for header in lines:

195

start, end, count = [int(n) for n in header.split(',')]

196

contents = []

197

remaining = count

198

while remaining:

199

origin, text = next().split(' ', 1)

200

remaining -= 1

201

contents.append((decode_utf8(origin), text))

202

result.append((start, end, count, contents))

203

return result

204

205

def lower_fulltext(self, content):

206

"""convert a fulltext content record into a serializable form.

207

208

see parse_fulltext which this inverts.

209

"""

210

encode_utf8 = cache_utf8.encode

211

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

212

213

def lower_line_delta(self, delta):

214

"""convert a delta into a serializable form.

215

216

See parse_line_delta which this inverts.

217

"""

218

encode_utf8 = cache_utf8.encode

219

out = []

220

for start, end, c, lines in delta:

221

out.append('%d,%d,%d\n' % (start, end, c))

222

out.extend(encode_utf8(origin) + ' ' + text

223

for origin, text in lines)

224

return out

225

226

227

class KnitPlainFactory(_KnitFactory):

228

"""Factory for creating plain Content objects."""

229

230

annotated = False

231

232

def parse_fulltext(self, content, version):

233

"""This parses an unannotated fulltext.

234

235

Note that this is not a noop - the internal representation

236

has (versionid, line) - its just a constant versionid.

237

"""

238

return self.make(content, version)

239

240

def parse_line_delta_iter(self, lines, version):

241

while lines:

242

header = lines.pop(0)

243

start, end, c = [int(n) for n in header.split(',')]

244

yield start, end, c, zip([version] * c, lines[:c])

245

del lines[:c]

246

247

def parse_line_delta(self, lines, version):

248

return list(self.parse_line_delta_iter(lines, version))

249

250

def lower_fulltext(self, content):

251

return content.text()

252

253

def lower_line_delta(self, delta):

254

out = []

255

for start, end, c, lines in delta:

256

out.append('%d,%d,%d\n' % (start, end, c))

257

out.extend([text for origin, text in lines])

258

return out

259

260

261

def make_empty_knit(transport, relpath):

262

"""Construct a empty knit at the specified location."""

263

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

264

k._data._open_file()

265

266

267

class KnitVersionedFile(VersionedFile):

268

"""Weave-like structure with faster random access.

269

270

A knit stores a number of texts and a summary of the relationships

271

between them. Texts are identified by a string version-id. Texts

272

are normally stored and retrieved as a series of lines, but can

273

also be passed as single strings.

274

275

Lines are stored with the trailing newline (if any) included, to

276

avoid special cases for files with no final newline. Lines are

277

composed of 8-bit characters, not unicode. The combination of

278

these approaches should mean any 'binary' file can be safely

279

stored and retrieved.

280

"""

281

282

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

283

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

284

create=False, create_parent_dir=False, delay_create=False,

285

dir_mode=None):

286

"""Construct a knit at location specified by relpath.

287

288

:param create: If not True, only open an existing knit.

289

:param create_parent_dir: If True, create the parent directory if

290

creating the file fails. (This is used for stores with

291

hash-prefixes that may not exist yet)

292

:param delay_create: The calling code is aware that the knit won't

293

actually be created until the first data is stored.

294

"""

295

if deprecated_passed(basis_knit):

296

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

297

" deprecated as of bzr 0.9.",

298

DeprecationWarning, stacklevel=2)

299

if access_mode is None:

300

access_mode = 'w'

301

super(KnitVersionedFile, self).__init__(access_mode)

302

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

303

self.transport = transport

304

self.filename = relpath

305

self.factory = factory or KnitAnnotateFactory()

306

self.writable = (access_mode == 'w')

307

self.delta = delta

308

309

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

310

access_mode, create=create, file_mode=file_mode,

311

create_parent_dir=create_parent_dir, delay_create=delay_create,

312

dir_mode=dir_mode)

313

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

314

access_mode, create=create and not len(self), file_mode=file_mode,

315

create_parent_dir=create_parent_dir, delay_create=delay_create,

316

dir_mode=dir_mode)

317

318

def __repr__(self):

319

return '%s(%s)' % (self.__class__.__name__,

320

self.transport.abspath(self.filename))

321

322

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

323

"""See VersionedFile._add_delta()."""

324

self._check_add(version_id, []) # should we check the lines ?

325

self._check_versions_present(parents)

326

present_parents = []

327

ghosts = []

328

parent_texts = {}

329

for parent in parents:

330

if not self.has_version(parent):

331

ghosts.append(parent)

332

else:

333

present_parents.append(parent)

334

335

if delta_parent is None:

336

# reconstitute as full text.

337

assert len(delta) == 1 or len(delta) == 0

338

if len(delta):

339

assert delta[0][0] == 0

340

assert delta[0][1] == 0, delta[0][1]

341

return super(KnitVersionedFile, self)._add_delta(version_id,

342

parents,

343

delta_parent,

344

sha1,

345

noeol,

346

delta)

347

348

digest = sha1

349

350

options = []

351

if noeol:

352

options.append('no-eol')

353

354

if delta_parent is not None:

355

# determine the current delta chain length.

356

# To speed the extract of texts the delta chain is limited

357

# to a fixed number of deltas. This should minimize both

358

# I/O and the time spend applying deltas.

359

count = 0

360

delta_parents = [delta_parent]

361

while count < 25:

362

parent = delta_parents[0]

363

method = self._index.get_method(parent)

364

if method == 'fulltext':

365

break

366

delta_parents = self._index.get_parents(parent)

367

count = count + 1

368

if method == 'line-delta':

369

# did not find a fulltext in the delta limit.

370

# just do a normal insertion.

371

return super(KnitVersionedFile, self)._add_delta(version_id,

372

parents,

373

delta_parent,

374

sha1,

375

noeol,

376

delta)

377

378

options.append('line-delta')

379

store_lines = self.factory.lower_line_delta(delta)

380

381

where, size = self._data.add_record(version_id, digest, store_lines)

382

self._index.add_version(version_id, options, where, size, parents)

383

384

def _add_raw_records(self, records, data):

385

"""Add all the records 'records' with data pre-joined in 'data'.

386

387

:param records: A list of tuples(version_id, options, parents, size).

388

:param data: The data for the records. When it is written, the records

389

are adjusted to have pos pointing into data by the sum of

390

the preceding records sizes.

391

"""

392

# write all the data

393

pos = self._data.add_raw_record(data)

394

offset = 0

395

index_entries = []

396

for (version_id, options, parents, size) in records:

397

index_entries.append((version_id, options, pos+offset,

398

size, parents))

399

if self._data._do_cache:

400

self._data._cache[version_id] = data[offset:offset+size]

401

offset += size

402

self._index.add_versions(index_entries)

403

404

def enable_cache(self):

405

"""Start caching data for this knit"""

406

self._data.enable_cache()

407

408

def clear_cache(self):

409

"""Clear the data cache only."""

410

self._data.clear_cache()

411

412

def copy_to(self, name, transport):

413

"""See VersionedFile.copy_to()."""

414

# copy the current index to a temp index to avoid racing with local

415

# writes

416

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

417

self.transport.get(self._index._filename))

418

# copy the data file

419

f = self._data._open_file()

420

try:

421

transport.put_file(name + DATA_SUFFIX, f)

422

finally:

423

f.close()

424

# move the copied index into place

425

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

426

427

def create_empty(self, name, transport, mode=None):

428

return KnitVersionedFile(name, transport, factory=self.factory,

429

delta=self.delta, create=True)

430

431

def _fix_parents(self, version, new_parents):

432

"""Fix the parents list for version.

433

434

This is done by appending a new version to the index

435

with identical data except for the parents list.

436

the parents list must be a superset of the current

437

list.

438

"""

439

current_values = self._index._cache[version]

440

assert set(current_values[4]).difference(set(new_parents)) == set()

441

self._index.add_version(version,

442

current_values[1],

443

current_values[2],

444

current_values[3],

445

new_parents)

446

447

def get_delta(self, version_id):

448

"""Get a delta for constructing version from some other version."""

449

if not self.has_version(version_id):

450

raise RevisionNotPresent(version_id, self.filename)

451

452

parents = self.get_parents(version_id)

453

if len(parents):

454

parent = parents[0]

455

else:

456

parent = None

457

data_pos, data_size = self._index.get_position(version_id)

458

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

459

version_idx = self._index.lookup(version_id)

460

noeol = 'no-eol' in self._index.get_options(version_id)

461

if 'fulltext' == self._index.get_method(version_id):

462

new_content = self.factory.parse_fulltext(data, version_idx)

463

if parent is not None:

464

reference_content = self._get_content(parent)

465

old_texts = reference_content.text()

466

else:

467

old_texts = []

468

new_texts = new_content.text()

469

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

470

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

471

else:

472

delta = self.factory.parse_line_delta(data, version_idx)

473

return parent, sha1, noeol, delta

474

475

def get_graph_with_ghosts(self):

476

"""See VersionedFile.get_graph_with_ghosts()."""

477

graph_items = self._index.get_graph()

478

return dict(graph_items)

479

480

def get_sha1s(self, version_ids):

481

"""Return a map of sha1 digests for the specified versions"""

482

record_map = self._get_record_map(version_ids)

483

return dict((v, record_map[v][2]) for v in version_ids)

484

485

def get_sha1(self, version_id):

486

"""See VersionedFile.get_sha1()."""

487

return self.get_sha1s([version_id])[version_id]

488

489

@staticmethod

490

def get_suffixes():

491

"""See VersionedFile.get_suffixes()."""

492

return [DATA_SUFFIX, INDEX_SUFFIX]

493

494

def has_ghost(self, version_id):

495

"""True if there is a ghost reference in the file to version_id."""

496

# maybe we have it

497

if self.has_version(version_id):

498

return False

499

# optimisable if needed by memoising the _ghosts set.

500

items = self._index.get_graph()

501

for node, parents in items:

502

for parent in parents:

503

if parent not in self._index._cache:

504

if parent == version_id:

505

return True

506

return False

507

508

def versions(self):

509

"""See VersionedFile.versions."""

510

return self._index.get_versions()

511

512

def has_version(self, version_id):

513

"""See VersionedFile.has_version."""

514

return self._index.has_version(version_id)

515

516

__contains__ = has_version

517

518

def _merge_annotations(self, content, parents, parent_texts={},

519

delta=None, annotated=None):

520

"""Merge annotations for content. This is done by comparing

521

the annotations based on changed to the text.

522

"""

523

if annotated:

524

delta_seq = None

525

for parent_id in parents:

526

merge_content = self._get_content(parent_id, parent_texts)

527

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

528

if delta_seq is None:

529

# setup a delta seq to reuse.

530

delta_seq = seq

531

for i, j, n in seq.get_matching_blocks():

532

if n == 0:

533

continue

534

# this appears to copy (origin, text) pairs across to the new

535

# content for any line that matches the last-checked parent.

536

# FIXME: save the sequence control data for delta compression

537

# against the most relevant parent rather than rediffing.

538

content._lines[j:j+n] = merge_content._lines[i:i+n]

539

if delta:

540

if not annotated:

541

reference_content = self._get_content(parents[0], parent_texts)

542

new_texts = content.text()

543

old_texts = reference_content.text()

544

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

545

return self._make_line_delta(delta_seq, content)

546

547

def _make_line_delta(self, delta_seq, new_content):

548

"""Generate a line delta from delta_seq and new_content."""

549

diff_hunks = []

550

for op in delta_seq.get_opcodes():

551

if op[0] == 'equal':

552

continue

553

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

554

return diff_hunks

555

556

def _get_components_positions(self, version_ids):

557

"""Produce a map of position data for the components of versions.

558

559

This data is intended to be used for retrieving the knit records.

560

561

A dict of version_id to (method, data_pos, data_size, next) is

562

returned.

563

method is the way referenced data should be applied.

564

data_pos is the position of the data in the knit.

565

data_size is the size of the data in the knit.

566

next is the build-parent of the version, or None for fulltexts.

567

"""

568

component_data = {}

569

for version_id in version_ids:

570

cursor = version_id

571

572

while cursor is not None and cursor not in component_data:

573

method = self._index.get_method(cursor)

574

if method == 'fulltext':

575

next = None

576

else:

577

next = self.get_parents(cursor)[0]

578

data_pos, data_size = self._index.get_position(cursor)

579

component_data[cursor] = (method, data_pos, data_size, next)

580

cursor = next

581

return component_data

582

583

def _get_content(self, version_id, parent_texts={}):

584

"""Returns a content object that makes up the specified

585

version."""

586

if not self.has_version(version_id):

587

raise RevisionNotPresent(version_id, self.filename)

588

589

cached_version = parent_texts.get(version_id, None)

590

if cached_version is not None:

591

return cached_version

592

593

text_map, contents_map = self._get_content_maps([version_id])

594

return contents_map[version_id]

595

596

def _check_versions_present(self, version_ids):

597

"""Check that all specified versions are present."""

598

version_ids = set(version_ids)

599

for r in list(version_ids):

600

if self._index.has_version(r):

601

version_ids.remove(r)

602

if version_ids:

603

raise RevisionNotPresent(list(version_ids)[0], self.filename)

604

605

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

606

"""See VersionedFile.add_lines_with_ghosts()."""

607

self._check_add(version_id, lines)

608

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

609

610

def _add_lines(self, version_id, parents, lines, parent_texts):

611

"""See VersionedFile.add_lines."""

612

self._check_add(version_id, lines)

613

self._check_versions_present(parents)

614

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

615

616

def _check_add(self, version_id, lines):

617

"""check that version_id and lines are safe to add."""

618

assert self.writable, "knit is not opened for write"

619

### FIXME escape. RBC 20060228

620

if contains_whitespace(version_id):

621

raise InvalidRevisionId(version_id, self.filename)

622

if self.has_version(version_id):

623

raise RevisionAlreadyPresent(version_id, self.filename)

624

self._check_lines_not_unicode(lines)

625

self._check_lines_are_lines(lines)

626

627

def _add(self, version_id, lines, parents, delta, parent_texts):

628

"""Add a set of lines on top of version specified by parents.

629

630

If delta is true, compress the text as a line-delta against

631

the first parent.

632

633

Any versions not present will be converted into ghosts.

634

"""

635

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

636

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

637

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

638

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

639

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

640

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

641

# +1383 0 8.0370 8.0370 +<len>

642

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

643

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

644

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

645

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

646

647

present_parents = []

648

ghosts = []

649

if parent_texts is None:

650

parent_texts = {}

651

for parent in parents:

652

if not self.has_version(parent):

653

ghosts.append(parent)

654

else:

655

present_parents.append(parent)

656

657

if delta and not len(present_parents):

658

delta = False

659

660

digest = sha_strings(lines)

661

options = []

662

if lines:

663

if lines[-1][-1] != '\n':

664

options.append('no-eol')

665

lines[-1] = lines[-1] + '\n'

666

667

if len(present_parents) and delta:

668

# To speed the extract of texts the delta chain is limited

669

# to a fixed number of deltas. This should minimize both

670

# I/O and the time spend applying deltas.

671

count = 0

672

delta_parents = present_parents

673

while count < 25:

674

parent = delta_parents[0]

675

method = self._index.get_method(parent)

676

if method == 'fulltext':

677

break

678

delta_parents = self._index.get_parents(parent)

679

count = count + 1

680

if method == 'line-delta':

681

delta = False

682

683

lines = self.factory.make(lines, version_id)

684

if delta or (self.factory.annotated and len(present_parents) > 0):

685

# Merge annotations from parent texts if so is needed.

686

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

687

delta, self.factory.annotated)

688

689

if delta:

690

options.append('line-delta')

691

store_lines = self.factory.lower_line_delta(delta_hunks)

692

else:

693

options.append('fulltext')

694

store_lines = self.factory.lower_fulltext(lines)

695

696

where, size = self._data.add_record(version_id, digest, store_lines)

697

self._index.add_version(version_id, options, where, size, parents)

698

return lines

699

700

def check(self, progress_bar=None):

701

"""See VersionedFile.check()."""

702

703

def _clone_text(self, new_version_id, old_version_id, parents):

704

"""See VersionedFile.clone_text()."""

705

# FIXME RBC 20060228 make fast by only inserting an index with null

706

# delta.

707

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

708

709

def get_lines(self, version_id):

710

"""See VersionedFile.get_lines()."""

711

return self.get_line_list([version_id])[0]

712

713

def _get_record_map(self, version_ids):

714

"""Produce a dictionary of knit records.

715

716

The keys are version_ids, the values are tuples of (method, content,

717

digest, next).

718

method is the way the content should be applied.

719

content is a KnitContent object.

720

digest is the SHA1 digest of this version id after all steps are done

721

next is the build-parent of the version, i.e. the leftmost ancestor.

722

If the method is fulltext, next will be None.

723

"""

724

position_map = self._get_components_positions(version_ids)

725

# c = component_id, m = method, p = position, s = size, n = next

726

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

727

record_map = {}

728

for component_id, content, digest in \

729

self._data.read_records_iter(records):

730

method, position, size, next = position_map[component_id]

731

record_map[component_id] = method, content, digest, next

732

733

return record_map

734

735

def get_text(self, version_id):

736

"""See VersionedFile.get_text"""

737

return self.get_texts([version_id])[0]

738

739

def get_texts(self, version_ids):

740

return [''.join(l) for l in self.get_line_list(version_ids)]

741

742

def get_line_list(self, version_ids):

743

"""Return the texts of listed versions as a list of strings."""

744

text_map, content_map = self._get_content_maps(version_ids)

745

return [text_map[v] for v in version_ids]

746

747

def _get_content_maps(self, version_ids):

748

"""Produce maps of text and KnitContents

749

750

:return: (text_map, content_map) where text_map contains the texts for

751

the requested versions and content_map contains the KnitContents.

752

Both dicts take version_ids as their keys.

753

"""

754

for version_id in version_ids:

755

if not self.has_version(version_id):

756

raise RevisionNotPresent(version_id, self.filename)

757

record_map = self._get_record_map(version_ids)

758

759

text_map = {}

760

content_map = {}

761

final_content = {}

762

for version_id in version_ids:

763

components = []

764

cursor = version_id

765

while cursor is not None:

766

method, data, digest, next = record_map[cursor]

767

components.append((cursor, method, data, digest))

768

if cursor in content_map:

769

break

770

cursor = next

771

772

content = None

773

for component_id, method, data, digest in reversed(components):

774

if component_id in content_map:

775

content = content_map[component_id]

776

else:

777

version_idx = self._index.lookup(component_id)

778

if method == 'fulltext':

779

assert content is None

780

content = self.factory.parse_fulltext(data, version_idx)

781

elif method == 'line-delta':

782

delta = self.factory.parse_line_delta(data[:],

783

version_idx)

784

content = content.copy()

785

content._lines = self._apply_delta(content._lines,

786

delta)

787

content_map[component_id] = content

788

789

if 'no-eol' in self._index.get_options(version_id):

790

content = content.copy()

791

line = content._lines[-1][1].rstrip('\n')

792

content._lines[-1] = (content._lines[-1][0], line)

793

final_content[version_id] = content

794

795

# digest here is the digest from the last applied component.

796

text = content.text()

797

if sha_strings(text) != digest:

798

raise KnitCorrupt(self.filename,

799

'sha-1 does not match %s' % version_id)

800

801

text_map[version_id] = text

802

return text_map, final_content

803

804

def iter_lines_added_or_present_in_versions(self, version_ids=None):

805

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

806

if version_ids is None:

807

version_ids = self.versions()

808

# we don't care about inclusions, the caller cares.

809

# but we need to setup a list of records to visit.

810

# we need version_id, position, length

811

version_id_records = []

812

requested_versions = list(version_ids)

813

# filter for available versions

814

for version_id in requested_versions:

815

if not self.has_version(version_id):

816

raise RevisionNotPresent(version_id, self.filename)

817

# get a in-component-order queue:

818

version_ids = []

819

for version_id in self.versions():

820

if version_id in requested_versions:

821

version_ids.append(version_id)

822

data_pos, length = self._index.get_position(version_id)

823

version_id_records.append((version_id, data_pos, length))

824

825

pb = bzrlib.ui.ui_factory.nested_progress_bar()

826

count = 0

827

total = len(version_id_records)

828

try:

829

pb.update('Walking content.', count, total)

830

for version_id, data, sha_value in \

831

self._data.read_records_iter(version_id_records):

832

pb.update('Walking content.', count, total)

833

method = self._index.get_method(version_id)

834

version_idx = self._index.lookup(version_id)

835

assert method in ('fulltext', 'line-delta')

836

if method == 'fulltext':

837

content = self.factory.parse_fulltext(data, version_idx)

838

for line in content.text():

839

yield line

840

else:

841

delta = self.factory.parse_line_delta(data, version_idx)

842

for start, end, count, lines in delta:

843

for origin, line in lines:

844

yield line

845

count +=1

846

pb.update('Walking content.', total, total)

847

pb.finished()

848

except:

849

pb.update('Walking content.', total, total)

850

pb.finished()

851

raise

852

853

def num_versions(self):

854

"""See VersionedFile.num_versions()."""

855

return self._index.num_versions()

856

857

__len__ = num_versions

858

859

def annotate_iter(self, version_id):

860

"""See VersionedFile.annotate_iter."""

861

content = self._get_content(version_id)

862

for origin, text in content.annotate_iter():

863

yield origin, text

864

865

def get_parents(self, version_id):

866

"""See VersionedFile.get_parents."""

867

# perf notes:

868

# optimism counts!

869

# 52554 calls in 1264 872 internal down from 3674

870

try:

871

return self._index.get_parents(version_id)

872

except KeyError:

873

raise RevisionNotPresent(version_id, self.filename)

874

875

def get_parents_with_ghosts(self, version_id):

876

"""See VersionedFile.get_parents."""

877

try:

878

return self._index.get_parents_with_ghosts(version_id)

879

except KeyError:

880

raise RevisionNotPresent(version_id, self.filename)

881

882

def get_ancestry(self, versions):

883

"""See VersionedFile.get_ancestry."""

884

if isinstance(versions, basestring):

885

versions = [versions]

886

if not versions:

887

return []

888

self._check_versions_present(versions)

889

return self._index.get_ancestry(versions)

890

891

def get_ancestry_with_ghosts(self, versions):

892

"""See VersionedFile.get_ancestry_with_ghosts."""

893

if isinstance(versions, basestring):

894

versions = [versions]

895

if not versions:

896

return []

897

self._check_versions_present(versions)

898

return self._index.get_ancestry_with_ghosts(versions)

899

900

#@deprecated_method(zero_eight)

901

def walk(self, version_ids):

902

"""See VersionedFile.walk."""

903

# We take the short path here, and extract all relevant texts

904

# and put them in a weave and let that do all the work. Far

905

# from optimal, but is much simpler.

906

# FIXME RB 20060228 this really is inefficient!

907

from bzrlib.weave import Weave

908

909

w = Weave(self.filename)

910

ancestry = self.get_ancestry(version_ids)

911

sorted_graph = topo_sort(self._index.get_graph())

912

version_list = [vid for vid in sorted_graph if vid in ancestry]

913

914

for version_id in version_list:

915

lines = self.get_lines(version_id)

916

w.add_lines(version_id, self.get_parents(version_id), lines)

917

918

for lineno, insert_id, dset, line in w.walk(version_ids):

919

yield lineno, insert_id, dset, line

920

921

def plan_merge(self, ver_a, ver_b):

922

"""See VersionedFile.plan_merge."""

923

ancestors_b = set(self.get_ancestry(ver_b))

924

def status_a(revision, text):

925

if revision in ancestors_b:

926

return 'killed-b', text

927

else:

928

return 'new-a', text

929

930

ancestors_a = set(self.get_ancestry(ver_a))

931

def status_b(revision, text):

932

if revision in ancestors_a:

933

return 'killed-a', text

934

else:

935

return 'new-b', text

936

937

annotated_a = self.annotate(ver_a)

938

annotated_b = self.annotate(ver_b)

939

plain_a = [t for (a, t) in annotated_a]

940

plain_b = [t for (a, t) in annotated_b]

941

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

942

a_cur = 0

943

b_cur = 0

944

for ai, bi, l in blocks:

945

# process all mismatched sections

946

# (last mismatched section is handled because blocks always

947

# includes a 0-length last block)

948

for revision, text in annotated_a[a_cur:ai]:

949

yield status_a(revision, text)

950

for revision, text in annotated_b[b_cur:bi]:

951

yield status_b(revision, text)

952

953

# and now the matched section

954

a_cur = ai + l

955

b_cur = bi + l

956

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

957

assert text_a == text_b

958

yield "unchanged", text_a

959

960

961

class _KnitComponentFile(object):

962

"""One of the files used to implement a knit database"""

963

964

def __init__(self, transport, filename, mode, file_mode=None,

965

create_parent_dir=False, dir_mode=None):

966

self._transport = transport

967

self._filename = filename

968

self._mode = mode

969

self._file_mode = file_mode

970

self._dir_mode = dir_mode

971

self._create_parent_dir = create_parent_dir

972

self._need_to_create = False

973

974

def check_header(self, fp):

975

line = fp.readline()

976

if line != self.HEADER:

977

raise KnitHeaderError(badline=line)

978

979

def commit(self):

980

"""Commit is a nop."""

981

982

def __repr__(self):

983

return '%s(%s)' % (self.__class__.__name__, self._filename)

984

985

986

class _KnitIndex(_KnitComponentFile):

987

"""Manages knit index file.

988

989

The index is already kept in memory and read on startup, to enable

990

fast lookups of revision information. The cursor of the index

991

file is always pointing to the end, making it easy to append

992

entries.

993

994

_cache is a cache for fast mapping from version id to a Index

995

object.

996

997

_history is a cache for fast mapping from indexes to version ids.

998

999

The index data format is dictionary compressed when it comes to

1000

parent references; a index entry may only have parents that with a

1001

lover index number. As a result, the index is topological sorted.

1002

1003

Duplicate entries may be written to the index for a single version id

1004

if this is done then the latter one completely replaces the former:

1005

this allows updates to correct version and parent information.

1006

Note that the two entries may share the delta, and that successive

1007

annotations and references MUST point to the first entry.

1008

1009

The index file on disc contains a header, followed by one line per knit

1010

record. The same revision can be present in an index file more than once.

1011

The first occurrence gets assigned a sequence number starting from 0.

1012

1013

The format of a single line is

1014

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1015

REVISION_ID is a utf8-encoded revision id

1016

FLAGS is a comma separated list of flags about the record. Values include

1017

no-eol, line-delta, fulltext.

1018

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1019

that the the compressed data starts at.

1020

LENGTH is the ascii representation of the length of the data file.

1021

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1022

REVISION_ID.

1023

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1024

revision id already in the knit that is a parent of REVISION_ID.

1025

The ' :' marker is the end of record marker.

1026

1027

partial writes:

1028

when a write is interrupted to the index file, it will result in a line that

1029

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1030

the end of the file, then the record that is missing it will be ignored by

1031

the parser.

1032

1033

When writing new records to the index file, the data is preceded by '\n'

1034

to ensure that records always start on new lines even if the last write was

1035

interrupted. As a result its normal for the last line in the index to be

1036

missing a trailing newline. One can be added with no harmful effects.

1037

"""

1038

1039

HEADER = "# bzr knit index 8\n"

1040

1041

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1042

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1043

1044

def _cache_version(self, version_id, options, pos, size, parents):

1045

"""Cache a version record in the history array and index cache.

1046

1047

This is inlined into __init__ for performance. KEEP IN SYNC.

1048

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1049

indexes).

1050

"""

1051

# only want the _history index to reference the 1st index entry

1052

# for version_id

1053

if version_id not in self._cache:

1054

index = len(self._history)

1055

self._history.append(version_id)

1056

else:

1057

index = self._cache[version_id][5]

1058

self._cache[version_id] = (version_id,

1059

options,

1060

pos,

1061

size,

1062

parents,

1063

index)

1064

1065

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1066

create_parent_dir=False, delay_create=False, dir_mode=None):

1067

_KnitComponentFile.__init__(self, transport, filename, mode,

1068

file_mode=file_mode,

1069

create_parent_dir=create_parent_dir,

1070

dir_mode=dir_mode)

1071

self._cache = {}

1072

# position in _history is the 'official' index for a revision

1073

# but the values may have come from a newer entry.

1074

# so - wc -l of a knit index is != the number of unique names

1075

# in the knit.

1076

self._history = []

1077

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1078

try:

1079

count = 0

1080

total = 1

1081

try:

1082

pb.update('read knit index', count, total)

1083

fp = self._transport.get(self._filename)

1084

try:

1085

self.check_header(fp)

1086

# readlines reads the whole file at once:

1087

# bad for transports like http, good for local disk

1088

# we save 60 ms doing this one change (

1089

# from calling readline each time to calling

1090

# readlines once.

1091

# probably what we want for nice behaviour on

1092

# http is a incremental readlines that yields, or

1093

# a check for local vs non local indexes,

1094

for l in fp.readlines():

1095

rec = l.split()

1096

if len(rec) < 5 or rec[-1] != ':':

1097

# corrupt line.

1098

# FIXME: in the future we should determine if its a

1099

# short write - and ignore it

1100

# or a different failure, and raise. RBC 20060407

1101

continue

1102

count += 1

1103

total += 1

1104

#pb.update('read knit index', count, total)

1105

# See self._parse_parents

1106

parents = []

1107

for value in rec[4:-1]:

1108

if '.' == value[0]:

1109

# uncompressed reference

1110

parents.append(value[1:])

1111

else:

1112

# this is 15/4000ms faster than isinstance,

1113

# (in lsprof)

1114

# this function is called thousands of times a

1115

# second so small variations add up.

1116

assert value.__class__ is str

1117

parents.append(self._history[int(value)])

1118

# end self._parse_parents

1119

# self._cache_version(rec[0],

1120

# rec[1].split(','),

1121

# int(rec[2]),

1122

# int(rec[3]),

1123

# parents)

1124

# --- self._cache_version

1125

# only want the _history index to reference the 1st

1126

# index entry for version_id

1127

version_id = rec[0]

1128

if version_id not in self._cache:

1129

index = len(self._history)

1130

self._history.append(version_id)

1131

else:

1132

index = self._cache[version_id][5]

1133

self._cache[version_id] = (version_id,

1134

rec[1].split(','),

1135

int(rec[2]),

1136

int(rec[3]),

1137

parents,

1138

index)

1139

# --- self._cache_version

1140

finally:

1141

fp.close()

1142

except NoSuchFile, e:

1143

if mode != 'w' or not create:

1144

raise

1145

if delay_create:

1146

self._need_to_create = True

1147

else:

1148

self._transport.put_bytes_non_atomic(self._filename,

1149

self.HEADER, mode=self._file_mode)

1150

1151

finally:

1152

pb.update('read knit index', total, total)

1153

pb.finished()

1154

1155

def _parse_parents(self, compressed_parents):

1156

"""convert a list of string parent values into version ids.

1157

1158

ints are looked up in the index.

1159

.FOO values are ghosts and converted in to FOO.

1160

1161

NOTE: the function is retained here for clarity, and for possible

1162

use in partial index reads. However bulk processing now has

1163

it inlined in __init__ for inner-loop optimisation.

1164

"""

1165

result = []

1166

for value in compressed_parents:

1167

if value[-1] == '.':

1168

# uncompressed reference

1169

result.append(value[1:])

1170

else:

1171

# this is 15/4000ms faster than isinstance,

1172

# this function is called thousands of times a

1173

# second so small variations add up.

1174

assert value.__class__ is str

1175

result.append(self._history[int(value)])

1176

return result

1177

1178

def get_graph(self):

1179

graph = []

1180

for version_id, index in self._cache.iteritems():

1181

graph.append((version_id, index[4]))

1182

return graph

1183

1184

def get_ancestry(self, versions):

1185

"""See VersionedFile.get_ancestry."""

1186

# get a graph of all the mentioned versions:

1187

graph = {}

1188

pending = set(versions)

1189

while len(pending):

1190

version = pending.pop()

1191

parents = self._cache[version][4]

1192

# got the parents ok

1193

# trim ghosts

1194

parents = [parent for parent in parents if parent in self._cache]

1195

for parent in parents:

1196

# if not completed and not a ghost

1197

if parent not in graph:

1198

pending.add(parent)

1199

graph[version] = parents

1200

return topo_sort(graph.items())

1201

1202

def get_ancestry_with_ghosts(self, versions):

1203

"""See VersionedFile.get_ancestry_with_ghosts."""

1204

# get a graph of all the mentioned versions:

1205

graph = {}

1206

pending = set(versions)

1207

while len(pending):

1208

version = pending.pop()

1209

try:

1210

parents = self._cache[version][4]

1211

except KeyError:

1212

# ghost, fake it

1213

graph[version] = []

1214

pass

1215

else:

1216

# got the parents ok

1217

for parent in parents:

1218

if parent not in graph:

1219

pending.add(parent)

1220

graph[version] = parents

1221

return topo_sort(graph.items())

1222

1223

def num_versions(self):

1224

return len(self._history)

1225

1226

__len__ = num_versions

1227

1228

def get_versions(self):

1229

return self._history

1230

1231

def idx_to_name(self, idx):

1232

return self._history[idx]

1233

1234

def lookup(self, version_id):

1235

assert version_id in self._cache

1236

return self._cache[version_id][5]

1237

1238

def _version_list_to_index(self, versions):

1239

encode_utf8 = cache_utf8.encode

1240

result_list = []

1241

for version in versions:

1242

if version in self._cache:

1243

# -- inlined lookup() --

1244

result_list.append(str(self._cache[version][5]))

1245

# -- end lookup () --

1246

else:

1247

result_list.append('.' + encode_utf8(version))

1248

return ' '.join(result_list)

1249

1250

def add_version(self, version_id, options, pos, size, parents):

1251

"""Add a version record to the index."""

1252

self.add_versions(((version_id, options, pos, size, parents),))

1253

1254

def add_versions(self, versions):

1255

"""Add multiple versions to the index.

1256

1257

:param versions: a list of tuples:

1258

(version_id, options, pos, size, parents).

1259

"""

1260

lines = []

1261

encode_utf8 = cache_utf8.encode

1262

for version_id, options, pos, size, parents in versions:

1263

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1264

','.join(options),

1265

pos,

1266

size,

1267

self._version_list_to_index(parents))

1268

assert isinstance(line, str), \

1269

'content must be utf-8 encoded: %r' % (line,)

1270

lines.append(line)

1271

if not self._need_to_create:

1272

self._transport.append_bytes(self._filename, ''.join(lines))

1273

else:

1274

sio = StringIO()

1275

sio.write(self.HEADER)

1276

sio.writelines(lines)

1277

sio.seek(0)

1278

self._transport.put_file_non_atomic(self._filename, sio,

1279

create_parent_dir=self._create_parent_dir,

1280

mode=self._file_mode,

1281

dir_mode=self._dir_mode)

1282

self._need_to_create = False

1283

1284

# cache after writing, so that a failed write leads to missing cache

1285

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1286

# failure, reload the index or flush it or some such, to prevent

1287

# writing records that did complete twice.

1288

for version_id, options, pos, size, parents in versions:

1289

self._cache_version(version_id, options, pos, size, parents)

1290

1291

def has_version(self, version_id):

1292

"""True if the version is in the index."""

1293

return (version_id in self._cache)

1294

1295

def get_position(self, version_id):

1296

"""Return data position and size of specified version."""

1297

return (self._cache[version_id][2], \

1298

self._cache[version_id][3])

1299

1300

def get_method(self, version_id):

1301

"""Return compression method of specified version."""

1302

options = self._cache[version_id][1]

1303

if 'fulltext' in options:

1304

return 'fulltext'

1305

else:

1306

assert 'line-delta' in options

1307

return 'line-delta'

1308

1309

def get_options(self, version_id):

1310

return self._cache[version_id][1]

1311

1312

def get_parents(self, version_id):

1313

"""Return parents of specified version ignoring ghosts."""

1314

return [parent for parent in self._cache[version_id][4]

1315

if parent in self._cache]

1316

1317

def get_parents_with_ghosts(self, version_id):

1318

"""Return parents of specified version with ghosts."""

1319

return self._cache[version_id][4]

1320

1321

def check_versions_present(self, version_ids):

1322

"""Check that all specified versions are present."""

1323

version_ids = set(version_ids)

1324

for version_id in list(version_ids):

1325

if version_id in self._cache:

1326

version_ids.remove(version_id)

1327

if version_ids:

1328

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1329

1330

1331

class _KnitData(_KnitComponentFile):

1332

"""Contents of the knit data file"""

1333

1334

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1335

create_parent_dir=False, delay_create=False,

1336

dir_mode=None):

1337

_KnitComponentFile.__init__(self, transport, filename, mode,

1338

file_mode=file_mode,

1339

create_parent_dir=create_parent_dir,

1340

dir_mode=dir_mode)

1341

self._checked = False

1342

# TODO: jam 20060713 conceptually, this could spill to disk

1343

# if the cached size gets larger than a certain amount

1344

# but it complicates the model a bit, so for now just use

1345

# a simple dictionary

1346

self._cache = {}

1347

self._do_cache = False

1348

if create:

1349

if delay_create:

1350

self._need_to_create = create

1351

else:

1352

self._transport.put_bytes_non_atomic(self._filename, '',

1353

mode=self._file_mode)

1354

1355

def enable_cache(self):

1356

"""Enable caching of reads."""

1357

self._do_cache = True

1358

1359

def clear_cache(self):

1360

"""Clear the record cache."""

1361

self._do_cache = False

1362

self._cache = {}

1363

1364

def _open_file(self):

1365

try:

1366

return self._transport.get(self._filename)

1367

except NoSuchFile:

1368

pass

1369

return None

1370

1371

def _record_to_data(self, version_id, digest, lines):

1372

"""Convert version_id, digest, lines into a raw data block.

1373

1374

:return: (len, a StringIO instance with the raw data ready to read.)

1375

"""

1376

sio = StringIO()

1377

data_file = GzipFile(None, mode='wb', fileobj=sio)

1378

1379

version_id_utf8 = cache_utf8.encode(version_id)

1380

data_file.writelines(chain(

1381

["version %s %d %s\n" % (version_id_utf8,

1382

len(lines),

1383

digest)],

1384

lines,

1385

["end %s\n" % version_id_utf8]))

1386

data_file.close()

1387

length= sio.tell()

1388

1389

sio.seek(0)

1390

return length, sio

1391

1392

def add_raw_record(self, raw_data):

1393

"""Append a prepared record to the data file.

1394

1395

:return: the offset in the data file raw_data was written.

1396

"""

1397

assert isinstance(raw_data, str), 'data must be plain bytes'

1398

if not self._need_to_create:

1399

return self._transport.append_bytes(self._filename, raw_data)

1400

else:

1401

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1402

create_parent_dir=self._create_parent_dir,

1403

mode=self._file_mode,

1404

dir_mode=self._dir_mode)

1405

self._need_to_create = False

1406

return 0

1407

1408

def add_record(self, version_id, digest, lines):

1409

"""Write new text record to disk. Returns the position in the

1410

file where it was written."""

1411

size, sio = self._record_to_data(version_id, digest, lines)

1412

# write to disk

1413

if not self._need_to_create:

1414

start_pos = self._transport.append_file(self._filename, sio)

1415

else:

1416

self._transport.put_file_non_atomic(self._filename, sio,

1417

create_parent_dir=self._create_parent_dir,

1418

mode=self._file_mode,

1419

dir_mode=self._dir_mode)

1420

self._need_to_create = False

1421

start_pos = 0

1422

if self._do_cache:

1423

self._cache[version_id] = sio.getvalue()

1424

return start_pos, size

1425

1426

def _parse_record_header(self, version_id, raw_data):

1427

"""Parse a record header for consistency.

1428

1429

:return: the header and the decompressor stream.

1430

as (stream, header_record)

1431

"""

1432

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1433

rec = df.readline().split()

1434

if len(rec) != 4:

1435

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1436

if cache_utf8.decode(rec[1]) != version_id:

1437

raise KnitCorrupt(self._filename,

1438

'unexpected version, wanted %r, got %r' % (

1439

version_id, rec[1]))

1440

return df, rec

1441

1442

def _parse_record(self, version_id, data):

1443

# profiling notes:

1444

# 4168 calls in 2880 217 internal

1445

# 4168 calls to _parse_record_header in 2121

1446

# 4168 calls to readlines in 330

1447

df, rec = self._parse_record_header(version_id, data)

1448

record_contents = df.readlines()

1449

l = record_contents.pop()

1450

assert len(record_contents) == int(rec[2])

1451

if l != 'end %s\n' % cache_utf8.encode(version_id):

1452

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1453

% (l, version_id))

1454

df.close()

1455

return record_contents, rec[3]

1456

1457

def read_records_iter_raw(self, records):

1458

"""Read text records from data file and yield raw data.

1459

1460

This unpacks enough of the text record to validate the id is

1461

as expected but thats all.

1462

"""

1463

# setup an iterator of the external records:

1464

# uses readv so nice and fast we hope.

1465

if len(records):

1466

# grab the disk data needed.

1467

if self._cache:

1468

# Don't check _cache if it is empty

1469

needed_offsets = [(pos, size) for version_id, pos, size

1470

in records

1471

if version_id not in self._cache]

1472

else:

1473

needed_offsets = [(pos, size) for version_id, pos, size

1474

in records]

1475

1476

raw_records = self._transport.readv(self._filename, needed_offsets)

1477

1478

1479

for version_id, pos, size in records:

1480

if version_id in self._cache:

1481

# This data has already been validated

1482

data = self._cache[version_id]

1483

else:

1484

pos, data = raw_records.next()

1485

if self._do_cache:

1486

self._cache[version_id] = data

1487

1488

# validate the header

1489

df, rec = self._parse_record_header(version_id, data)

1490

df.close()

1491

yield version_id, data

1492

1493

def read_records_iter(self, records):

1494

"""Read text records from data file and yield result.

1495

1496

The result will be returned in whatever is the fastest to read.

1497

Not by the order requested. Also, multiple requests for the same

1498

record will only yield 1 response.

1499

:param records: A list of (version_id, pos, len) entries

1500

:return: Yields (version_id, contents, digest) in the order

1501

read, not the order requested

1502

"""

1503

if not records:

1504

return

1505

1506

if self._cache:

1507

# Skip records we have alread seen

1508

yielded_records = set()

1509

needed_records = set()

1510

for record in records:

1511

if record[0] in self._cache:

1512

if record[0] in yielded_records:

1513

continue

1514

yielded_records.add(record[0])

1515

data = self._cache[record[0]]

1516

content, digest = self._parse_record(record[0], data)

1517

yield (record[0], content, digest)

1518

else:

1519

needed_records.add(record)

1520

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1521

else:

1522

needed_records = sorted(set(records), key=operator.itemgetter(1))

1523

1524

if not needed_records:

1525

return

1526

1527

# The transport optimizes the fetching as well

1528

# (ie, reads continuous ranges.)

1529

readv_response = self._transport.readv(self._filename,

1530

[(pos, size) for version_id, pos, size in needed_records])

1531

1532

for (version_id, pos, size), (pos, data) in \

1533

izip(iter(needed_records), readv_response):

1534

content, digest = self._parse_record(version_id, data)

1535

if self._do_cache:

1536

self._cache[version_id] = data

1537

yield version_id, content, digest

1538

1539

def read_records(self, records):

1540

"""Read records into a dictionary."""

1541

components = {}

1542

for record_id, content, digest in \

1543

self.read_records_iter(records):

1544

components[record_id] = (content, digest)

1545

return components

1546

1547

1548

class InterKnit(InterVersionedFile):

1549

"""Optimised code paths for knit to knit operations."""

1550

1551

_matching_file_from_factory = KnitVersionedFile

1552

_matching_file_to_factory = KnitVersionedFile

1553

1554

@staticmethod

1555

def is_compatible(source, target):

1556

"""Be compatible with knits. """

1557

try:

1558

return (isinstance(source, KnitVersionedFile) and

1559

isinstance(target, KnitVersionedFile))

1560

except AttributeError:

1561

return False

1562

1563

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1564

"""See InterVersionedFile.join."""

1565

assert isinstance(self.source, KnitVersionedFile)

1566

assert isinstance(self.target, KnitVersionedFile)

1567

1568

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1569

1570

if not version_ids:

1571

return 0

1572

1573

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1574

try:

1575

version_ids = list(version_ids)

1576

if None in version_ids:

1577

version_ids.remove(None)

1578

1579

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1580

this_versions = set(self.target._index.get_versions())

1581

needed_versions = self.source_ancestry - this_versions

1582

cross_check_versions = self.source_ancestry.intersection(this_versions)

1583

mismatched_versions = set()

1584

for version in cross_check_versions:

1585

# scan to include needed parents.

1586

n1 = set(self.target.get_parents_with_ghosts(version))

1587

n2 = set(self.source.get_parents_with_ghosts(version))

1588

if n1 != n2:

1589

# FIXME TEST this check for cycles being introduced works

1590

# the logic is we have a cycle if in our graph we are an

1591

# ancestor of any of the n2 revisions.

1592

for parent in n2:

1593

if parent in n1:

1594

# safe

1595

continue

1596

else:

1597

parent_ancestors = self.source.get_ancestry(parent)

1598

if version in parent_ancestors:

1599

raise errors.GraphCycleError([parent, version])

1600

# ensure this parent will be available later.

1601

new_parents = n2.difference(n1)

1602

needed_versions.update(new_parents.difference(this_versions))

1603

mismatched_versions.add(version)

1604

1605

if not needed_versions and not mismatched_versions:

1606

return 0

1607

1608

source_graph = self.source.get_graph()

1609

full_list = topo_sort(source_graph)

1610

version_list = [i for i in full_list if (i not in self.target

1611

and i in needed_versions)]

1612

self.check_matching_parents(version_list, source_graph)

1613

1614

# plan the join:

1615

copy_queue = []

1616

copy_queue_records = []

1617

copy_set = set()

1618

for version_id in version_list:

1619

options = self.source._index.get_options(version_id)

1620

parents = self.source._index.get_parents_with_ghosts(version_id)

1621

# check that its will be a consistent copy:

1622

for parent in parents:

1623

# if source has the parent, we must :

1624

# * already have it or

1625

# * have it scheduled already

1626

# otherwise we don't care

1627

assert (self.target.has_version(parent) or

1628

parent in copy_set or

1629

not self.source.has_version(parent))

1630

data_pos, data_size = self.source._index.get_position(version_id)

1631

copy_queue_records.append((version_id, data_pos, data_size))

1632

copy_queue.append((version_id, options, parents))

1633

copy_set.add(version_id)

1634

1635

# data suck the join:

1636

count = 0

1637

total = len(version_list)

1638

raw_datum = []

1639

raw_records = []

1640

for (version_id, raw_data), \

1641

(version_id2, options, parents) in \

1642

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1643

copy_queue):

1644

assert version_id == version_id2, 'logic error, inconsistent results'

1645

count = count + 1

1646

pb.update("Joining knit", count, total)

1647

raw_records.append((version_id, options, parents, len(raw_data)))

1648

raw_datum.append(raw_data)

1649

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1650

1651

for version in mismatched_versions:

1652

# FIXME RBC 20060309 is this needed?

1653

n1 = set(self.target.get_parents_with_ghosts(version))

1654

n2 = set(self.source.get_parents_with_ghosts(version))

1655

# write a combined record to our history preserving the current

1656

# parents as first in the list

1657

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1658

self.target.fix_parents(version, new_parents)

1659

return count

1660

finally:

1661

pb.finished()

1662

1663

def check_matching_parents(self, versions, source_graph):

1664

"""Check that parents texts match between knits

1665

1666

This prevents knit corruption when copying raw records.

1667

"""

1668

parents_to_check = set()

1669

for version in versions:

1670

for parent in source_graph[version]:

1671

if parent in self.target and parent in self.source:

1672

parents_to_check.add(parent)

1673

source_sha1s = self.source.get_sha1s(parents_to_check)

1674

target_sha1s = self.target.get_sha1s(parents_to_check)

1675

for parent in parents_to_check:

1676

if source_sha1s[parent] != target_sha1s[parent]:

1677

raise errors.KnitTextsDiffer(parent)

1678

1679

1680

InterVersionedFile.register_optimiser(InterKnit)

1681

1682

1683

class WeaveToKnit(InterVersionedFile):

1684

"""Optimised code paths for weave to knit operations."""

1685

1686

_matching_file_from_factory = bzrlib.weave.WeaveFile

1687

_matching_file_to_factory = KnitVersionedFile

1688

1689

@staticmethod

1690

def is_compatible(source, target):

1691

"""Be compatible with weaves to knits."""

1692

try:

1693

return (isinstance(source, bzrlib.weave.Weave) and

1694

isinstance(target, KnitVersionedFile))

1695

except AttributeError:

1696

return False

1697

1698

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1699

"""See InterVersionedFile.join."""

1700

assert isinstance(self.source, bzrlib.weave.Weave)

1701

assert isinstance(self.target, KnitVersionedFile)

1702

1703

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1704

1705

if not version_ids:

1706

return 0

1707

1708

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1709

try:

1710

version_ids = list(version_ids)

1711

1712

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1713

this_versions = set(self.target._index.get_versions())

1714

needed_versions = self.source_ancestry - this_versions

1715

cross_check_versions = self.source_ancestry.intersection(this_versions)

1716

mismatched_versions = set()

1717

for version in cross_check_versions:

1718

# scan to include needed parents.

1719

n1 = set(self.target.get_parents_with_ghosts(version))

1720

n2 = set(self.source.get_parents(version))

1721

# if all of n2's parents are in n1, then its fine.

1722

if n2.difference(n1):

1723

# FIXME TEST this check for cycles being introduced works

1724

# the logic is we have a cycle if in our graph we are an

1725

# ancestor of any of the n2 revisions.

1726

for parent in n2:

1727

if parent in n1:

1728

# safe

1729

continue

1730

else:

1731

parent_ancestors = self.source.get_ancestry(parent)

1732

if version in parent_ancestors:

1733

raise errors.GraphCycleError([parent, version])

1734

# ensure this parent will be available later.

1735

new_parents = n2.difference(n1)

1736

needed_versions.update(new_parents.difference(this_versions))

1737

mismatched_versions.add(version)

1738

1739

if not needed_versions and not mismatched_versions:

1740

return 0

1741

full_list = topo_sort(self.source.get_graph())

1742

1743

version_list = [i for i in full_list if (not self.target.has_version(i)

1744

and i in needed_versions)]

1745

1746

# do the join:

1747

count = 0

1748

total = len(version_list)

1749

for version_id in version_list:

1750

pb.update("Converting to knit", count, total)

1751

parents = self.source.get_parents(version_id)

1752

# check that its will be a consistent copy:

1753

for parent in parents:

1754

# if source has the parent, we must already have it

1755

assert (self.target.has_version(parent))

1756

self.target.add_lines(

1757

version_id, parents, self.source.get_lines(version_id))

1758

count = count + 1

1759

1760

for version in mismatched_versions:

1761

# FIXME RBC 20060309 is this needed?

1762

n1 = set(self.target.get_parents_with_ghosts(version))

1763

n2 = set(self.source.get_parents(version))

1764

# write a combined record to our history preserving the current

1765

# parents as first in the list

1766

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1767

self.target.fix_parents(version, new_parents)

1768

return count

1769

finally:

1770

pb.finished()

1771

1772

1773

InterVersionedFile.register_optimiser(WeaveToKnit)

1774

1775

1776

class KnitSequenceMatcher(difflib.SequenceMatcher):

1777

"""Knit tuned sequence matcher.

1778

1779

This is based on profiling of difflib which indicated some improvements

1780

for our usage pattern.

1781

"""

1782

1783

def find_longest_match(self, alo, ahi, blo, bhi):

1784

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1785

1786

If isjunk is not defined:

1787

1788

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1789

alo <= i <= i+k <= ahi

1790

blo <= j <= j+k <= bhi

1791

and for all (i',j',k') meeting those conditions,

1792

k >= k'

1793

i <= i'

1794

and if i == i', j <= j'

1795

1796

In other words, of all maximal matching blocks, return one that

1797

starts earliest in a, and of all those maximal matching blocks that

1798

start earliest in a, return the one that starts earliest in b.

1799

1800

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1801

>>> s.find_longest_match(0, 5, 0, 9)

1802

(0, 4, 5)

1803

1804

If isjunk is defined, first the longest matching block is

1805

determined as above, but with the additional restriction that no

1806

junk element appears in the block. Then that block is extended as

1807

far as possible by matching (only) junk elements on both sides. So

1808

the resulting block never matches on junk except as identical junk

1809

happens to be adjacent to an "interesting" match.

1810

1811

Here's the same example as before, but considering blanks to be

1812

junk. That prevents " abcd" from matching the " abcd" at the tail

1813

end of the second sequence directly. Instead only the "abcd" can

1814

match, and matches the leftmost "abcd" in the second sequence:

1815

1816

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1817

>>> s.find_longest_match(0, 5, 0, 9)

1818

(1, 0, 4)

1819

1820

If no blocks match, return (alo, blo, 0).

1821

1822

>>> s = SequenceMatcher(None, "ab", "c")

1823

>>> s.find_longest_match(0, 2, 0, 1)

1824

(0, 0, 0)

1825

"""

1826

1827

# CAUTION: stripping common prefix or suffix would be incorrect.

1828

# E.g.,

1829

# ab

1830

# acab

1831

# Longest matching block is "ab", but if common prefix is

1832

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1833

# strip, so ends up claiming that ab is changed to acab by

1834

# inserting "ca" in the middle. That's minimal but unintuitive:

1835

# "it's obvious" that someone inserted "ac" at the front.

1836

# Windiff ends up at the same place as diff, but by pairing up

1837

# the unique 'b's and then matching the first two 'a's.

1838

1839

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1840

besti, bestj, bestsize = alo, blo, 0

1841

# find longest junk-free match

1842

# during an iteration of the loop, j2len[j] = length of longest

1843

# junk-free match ending with a[i-1] and b[j]

1844

j2len = {}

1845

# nothing = []

1846

b2jget = b2j.get

1847

for i in xrange(alo, ahi):

1848

# look at all instances of a[i] in b; note that because

1849

# b2j has no junk keys, the loop is skipped if a[i] is junk

1850

j2lenget = j2len.get

1851

newj2len = {}

1852

1853

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1854

# following improvement

1855

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1856

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1857

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1858

# to

1859

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1860

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1861

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1862

1863

try:

1864

js = b2j[a[i]]

1865

except KeyError:

1866

pass

1867

else:

1868

for j in js:

1869

# a[i] matches b[j]

1870

if j >= blo:

1871

if j >= bhi:

1872

break

1873

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1874

if k > bestsize:

1875

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1876

j2len = newj2len

1877

1878

# Extend the best by non-junk elements on each end. In particular,

1879

# "popular" non-junk elements aren't in b2j, which greatly speeds

1880

# the inner loop above, but also means "the best" match so far

1881

# doesn't contain any junk *or* popular non-junk elements.

1882

while besti > alo and bestj > blo and \

1883

not isbjunk(b[bestj-1]) and \

1884

a[besti-1] == b[bestj-1]:

1885

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1886

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1887

not isbjunk(b[bestj+bestsize]) and \

1888

a[besti+bestsize] == b[bestj+bestsize]:

1889

bestsize += 1

1890

1891

# Now that we have a wholly interesting match (albeit possibly

1892

# empty!), we may as well suck up the matching junk on each

1893

# side of it too. Can't think of a good reason not to, and it

1894

# saves post-processing the (possibly considerable) expense of

1895

# figuring out what to do with it. In the case of an empty

1896

# interesting match, this is clearly the right thing to do,

1897

# because no other kind of match is possible in the regions.

1898

while besti > alo and bestj > blo and \

1899

isbjunk(b[bestj-1]) and \

1900

a[besti-1] == b[bestj-1]:

1901

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1902

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1903

isbjunk(b[bestj+bestsize]) and \

1904

a[besti+bestsize] == b[bestj+bestsize]:

1905

bestsize = bestsize + 1

1906

1907

return besti, bestj, bestsize

1908

Older »