/brz/remove-bazaar : revision 4665.3.14

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Robert Collins
Date: 2009-09-04 00:49:55 UTC
mfrom: (4671 +trunk)
mto: This revision was merged to the branch mainline in revision 4672.
Revision ID: robertc@robertcollins.net-20090904004955-suhaz7r608i4j9gm

Merge bzr.dev.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_tags.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/crash.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.h

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/doc_generate

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/inventory_delta.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/features.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_interrepository

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/treeshape.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/_bencode_py.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/BUILD-NOTES

doc/Makefile

doc/_static

doc/_static/bzr icon 16.png

doc/_static/bzr.ico

doc/_static/en

doc/_static/en/quick-reference

doc/_static/en/quick-reference/Makefile

doc/_static/en/quick-reference/bzr-quick-reference.pdf

doc/_static/en/quick-reference/bzr-quick-reference.png

doc/_static/en/quick-reference/bzr-quick-reference.svg

doc/_templates

doc/_templates/index.html

doc/_templates/layout.html

doc/bazaar-vcs.org.kid

doc/conf.py

doc/contents.txt

doc/default.css

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/apport.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/content-filtering.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/developer-guide/HACKING.txt

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/index.txt

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/index.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index-for-2x.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/developer-guide

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/quick-reference

doc/es/quick-reference/Makefile

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

doc/es/quick-reference/quick-start-summary.svg

doc/es/release-notes

doc/es/user-guide

doc/es/user-guide/index.txt

doc/es/user-guide/resolving_conflicts.txt

doc/es/user-guide/version_info.txt

doc/es/user-reference

doc/index.es.txt

doc/index.ru.txt

doc/index.txt

doc/make.bat

doc/news-template.txt

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

man1

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/check-newsbugs.py

tools/convertfile.py

tools/convertinv.py

tools/generate_docs.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/time_graph.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

trace,

)

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

# Minimum number of uncompressed bytes to try fetch at once when retrieving

# groupcompress blocks.

BATCH_SIZE = 2**16

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for key, value in parent_map.iteritems():

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix][key] = value

except KeyError:

per_prefix_map[prefix] = {key: value}

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

GCB_LZ_HEADER = 'gcb1l\n'

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

100

def __init__(self):

101

# map by key? or just order in file?

102

self._compressor_name = None

103

self._z_content = None

104

self._z_content_decompressor = None

105

self._z_content_length = None

106

self._content_length = None

107

self._content = None

108

self._content_chunks = None

109

110

def __len__(self):

111

# This is the maximum number of bytes this object will reference if

112

# everything is decompressed. However, if we decompress less than

113

# everything... (this would cause some problems for LRUSizeCache)

114

return self._content_length + self._z_content_length

115

116

def _ensure_content(self, num_bytes=None):

117

"""Make sure that content has been expanded enough.

118

119

:param num_bytes: Ensure that we have extracted at least num_bytes of

120

content. If None, consume everything

121

"""

122

# TODO: If we re-use the same content block at different times during

123

# get_record_stream(), it is possible that the first pass will

124

# get inserted, triggering an extract/_ensure_content() which

125

# will get rid of _z_content. And then the next use of the block

126

# will try to access _z_content (to send it over the wire), and

127

# fail because it is already extracted. Consider never releasing

128

# _z_content because of this.

129

if num_bytes is None:

130

num_bytes = self._content_length

131

elif (self._content_length is not None

132

and num_bytes > self._content_length):

133

raise AssertionError(

134

'requested num_bytes (%d) > content length (%d)'

135

% (num_bytes, self._content_length))

136

# Expand the content if required

137

if self._content is None:

138

if self._content_chunks is not None:

139

self._content = ''.join(self._content_chunks)

140

self._content_chunks = None

141

if self._content is None:

142

if self._z_content is None:

143

raise AssertionError('No content to decompress')

144

if self._z_content == '':

145

self._content = ''

146

elif self._compressor_name == 'lzma':

147

# We don't do partial lzma decomp yet

148

self._content = pylzma.decompress(self._z_content)

149

elif self._compressor_name == 'zlib':

150

# Start a zlib decompressor

151

if num_bytes is None:

152

self._content = zlib.decompress(self._z_content)

153

else:

154

self._z_content_decompressor = zlib.decompressobj()

155

# Seed the decompressor with the uncompressed bytes, so

156

# that the rest of the code is simplified

157

self._content = self._z_content_decompressor.decompress(

158

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

159

else:

160

raise AssertionError('Unknown compressor: %r'

161

% self._compressor_name)

162

# Any bytes remaining to be decompressed will be in the decompressors

163

# 'unconsumed_tail'

164

165

# Do we have enough bytes already?

166

if num_bytes is not None and len(self._content) >= num_bytes:

167

return

168

if num_bytes is None and self._z_content_decompressor is None:

169

# We must have already decompressed everything

170

return

171

# If we got this far, and don't have a decompressor, something is wrong

172

if self._z_content_decompressor is None:

173

raise AssertionError(

174

'No decompressor to decompress %d bytes' % num_bytes)

175

remaining_decomp = self._z_content_decompressor.unconsumed_tail

176

if num_bytes is None:

177

if remaining_decomp:

178

# We don't know how much is left, but we'll decompress it all

179

self._content += self._z_content_decompressor.decompress(

180

remaining_decomp)

181

# Note: There's what I consider a bug in zlib.decompressobj

182

# If you pass back in the entire unconsumed_tail, only

183

# this time you don't pass a max-size, it doesn't

184

# change the unconsumed_tail back to None/''.

185

# However, we know we are done with the whole stream

186

self._z_content_decompressor = None

187

# XXX: Why is this the only place in this routine we set this?

188

self._content_length = len(self._content)

189

else:

190

if not remaining_decomp:

191

raise AssertionError('Nothing left to decompress')

192

needed_bytes = num_bytes - len(self._content)

193

# We always set max_size to 32kB over the minimum needed, so that

194

# zlib will give us as much as we really want.

195

# TODO: If this isn't good enough, we could make a loop here,

196

# that keeps expanding the request until we get enough

197

self._content += self._z_content_decompressor.decompress(

198

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

199

if len(self._content) < num_bytes:

200

raise AssertionError('%d bytes wanted, only %d available'

201

% (num_bytes, len(self._content)))

202

if not self._z_content_decompressor.unconsumed_tail:

203

# The stream is finished

204

self._z_content_decompressor = None

205

206

def _parse_bytes(self, bytes, pos):

207

"""Read the various lengths from the header.

208

209

This also populates the various 'compressed' buffers.

210

211

:return: The position in bytes just after the last newline

212

"""

213

# At present, we have 2 integers for the compressed and uncompressed

214

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

215

# checking too far, cap the search to 14 bytes.

216

pos2 = bytes.index('\n', pos, pos + 14)

217

self._z_content_length = int(bytes[pos:pos2])

218

pos = pos2 + 1

219

pos2 = bytes.index('\n', pos, pos + 14)

220

self._content_length = int(bytes[pos:pos2])

221

pos = pos2 + 1

222

if len(bytes) != (pos + self._z_content_length):

223

# XXX: Define some GCCorrupt error ?

224

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

225

(len(bytes), pos, self._z_content_length))

226

self._z_content = bytes[pos:]

227

228

@classmethod

229

def from_bytes(cls, bytes):

230

out = cls()

231

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

232

raise ValueError('bytes did not start with any of %r'

233

% (cls.GCB_KNOWN_HEADERS,))

234

# XXX: why not testing the whole header ?

235

if bytes[4] == 'z':

236

out._compressor_name = 'zlib'

237

elif bytes[4] == 'l':

238

out._compressor_name = 'lzma'

239

else:

240

raise ValueError('unknown compressor: %r' % (bytes,))

241

out._parse_bytes(bytes, 6)

242

return out

243

244

def extract(self, key, start, end, sha1=None):

245

"""Extract the text for a specific key.

246

247

:param key: The label used for this content

248

:param sha1: TODO (should we validate only when sha1 is supplied?)

249

:return: The bytes for the content

250

"""

251

if start == end == 0:

252

return ''

253

self._ensure_content(end)

254

# The bytes are 'f' or 'd' for the type, then a variable-length

255

# base128 integer for the content size, then the actual content

256

# We know that the variable-length integer won't be longer than 5

257

# bytes (it takes 5 bytes to encode 2^32)

258

c = self._content[start]

259

if c == 'f':

260

type = 'fulltext'

261

else:

262

if c != 'd':

263

raise ValueError('Unknown content control code: %s'

264

% (c,))

265

type = 'delta'

266

content_len, len_len = decode_base128_int(

267

self._content[start + 1:start + 6])

268

content_start = start + 1 + len_len

269

if end != content_start + content_len:

270

raise ValueError('end != len according to field header'

271

' %s != %s' % (end, content_start + content_len))

272

if c == 'f':

273

bytes = self._content[content_start:end]

274

elif c == 'd':

275

bytes = apply_delta_to_source(self._content, content_start, end)

276

return bytes

277

278

def set_chunked_content(self, content_chunks, length):

279

"""Set the content of this block to the given chunks."""

280

# If we have lots of short lines, it is may be more efficient to join

281

# the content ahead of time. If the content is <10MiB, we don't really

282

# care about the extra memory consumption, so we can just pack it and

283

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

284

# mysql, which is below the noise margin

285

self._content_length = length

286

self._content_chunks = content_chunks

287

self._content = None

288

self._z_content = None

289

290

def set_content(self, content):

291

"""Set the content of this block."""

292

self._content_length = len(content)

293

self._content = content

294

self._z_content = None

295

296

def _create_z_content_using_lzma(self):

297

if self._content_chunks is not None:

298

self._content = ''.join(self._content_chunks)

299

self._content_chunks = None

300

if self._content is None:

301

raise AssertionError('Nothing to compress')

302

self._z_content = pylzma.compress(self._content)

303

self._z_content_length = len(self._z_content)

304

305

def _create_z_content_from_chunks(self):

306

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

307

compressed_chunks = map(compressor.compress, self._content_chunks)

308

compressed_chunks.append(compressor.flush())

309

self._z_content = ''.join(compressed_chunks)

310

self._z_content_length = len(self._z_content)

311

312

def _create_z_content(self):

313

if self._z_content is not None:

314

return

315

if _USE_LZMA:

316

self._create_z_content_using_lzma()

317

return

318

if self._content_chunks is not None:

319

self._create_z_content_from_chunks()

320

return

321

self._z_content = zlib.compress(self._content)

322

self._z_content_length = len(self._z_content)

323

324

def to_bytes(self):

325

"""Encode the information into a byte stream."""

326

self._create_z_content()

327

if _USE_LZMA:

328

header = self.GCB_LZ_HEADER

329

else:

330

header = self.GCB_HEADER

331

chunks = [header,

332

'%d\n%d\n' % (self._z_content_length, self._content_length),

333

self._z_content,

334

]

335

return ''.join(chunks)

336

337

def _dump(self, include_text=False):

338

"""Take this block, and spit out a human-readable structure.

339

340

:param include_text: Inserts also include text bits, chose whether you

341

want this displayed in the dump or not.

342

:return: A dump of the given block. The layout is something like:

343

[('f', length), ('d', delta_length, text_length, [delta_info])]

344

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

345

...]

346

"""

347

self._ensure_content()

348

result = []

349

pos = 0

350

while pos < self._content_length:

351

kind = self._content[pos]

352

pos += 1

353

if kind not in ('f', 'd'):

354

raise ValueError('invalid kind character: %r' % (kind,))

355

content_len, len_len = decode_base128_int(

356

self._content[pos:pos + 5])

357

pos += len_len

358

if content_len + pos > self._content_length:

359

raise ValueError('invalid content_len %d for record @ pos %d'

360

% (content_len, pos - len_len - 1))

361

if kind == 'f': # Fulltext

362

if include_text:

363

text = self._content[pos:pos+content_len]

364

result.append(('f', content_len, text))

365

else:

366

result.append(('f', content_len))

367

elif kind == 'd': # Delta

368

delta_content = self._content[pos:pos+content_len]

369

delta_info = []

370

# The first entry in a delta is the decompressed length

371

decomp_len, delta_pos = decode_base128_int(delta_content)

372

result.append(('d', content_len, decomp_len, delta_info))

373

measured_len = 0

374

while delta_pos < content_len:

375

c = ord(delta_content[delta_pos])

376

delta_pos += 1

377

if c & 0x80: # Copy

378

(offset, length,

379

delta_pos) = decode_copy_instruction(delta_content, c,

380

delta_pos)

381

if include_text:

382

text = self._content[offset:offset+length]

383

delta_info.append(('c', offset, length, text))

384

else:

385

delta_info.append(('c', offset, length))

386

measured_len += length

387

else: # Insert

388

if include_text:

389

txt = delta_content[delta_pos:delta_pos+c]

390

else:

391

txt = ''

392

delta_info.append(('i', c, txt))

393

measured_len += c

394

delta_pos += c

395

if delta_pos != content_len:

396

raise ValueError('Delta consumed a bad number of bytes:'

397

' %d != %d' % (delta_pos, content_len))

398

if measured_len != decomp_len:

399

raise ValueError('Delta claimed fulltext was %d bytes, but'

400

' extraction resulted in %d bytes'

401

% (decomp_len, measured_len))

402

pos += content_len

403

return result

404

405

406

class _LazyGroupCompressFactory(object):

407

"""Yield content from a GroupCompressBlock on demand."""

408

409

def __init__(self, key, parents, manager, start, end, first):

410

"""Create a _LazyGroupCompressFactory

411

412

:param key: The key of just this record

413

:param parents: The parents of this key (possibly None)

414

:param gc_block: A GroupCompressBlock object

415

:param start: Offset of the first byte for this record in the

416

uncompressd content

417

:param end: Offset of the byte just after the end of this record

418

(ie, bytes = content[start:end])

419

:param first: Is this the first Factory for the given block?

420

"""

421

self.key = key

422

self.parents = parents

423

self.sha1 = None

424

# Note: This attribute coupled with Manager._factories creates a

425

# reference cycle. Perhaps we would rather use a weakref(), or

426

# find an appropriate time to release the ref. After the first

427

# get_bytes_as call? After Manager.get_record_stream() returns

428

# the object?

429

self._manager = manager

430

self._bytes = None

431

self.storage_kind = 'groupcompress-block'

432

if not first:

433

self.storage_kind = 'groupcompress-block-ref'

434

self._first = first

435

self._start = start

436

self._end = end

437

438

def __repr__(self):

439

return '%s(%s, first=%s)' % (self.__class__.__name__,

440

self.key, self._first)

441

442

def get_bytes_as(self, storage_kind):

443

if storage_kind == self.storage_kind:

444

if self._first:

445

# wire bytes, something...

446

return self._manager._wire_bytes()

447

else:

448

return ''

449

if storage_kind in ('fulltext', 'chunked'):

450

if self._bytes is None:

451

# Grab and cache the raw bytes for this entry

452

# and break the ref-cycle with _manager since we don't need it

453

# anymore

454

self._manager._prepare_for_extract()

455

block = self._manager._block

456

self._bytes = block.extract(self.key, self._start, self._end)

457

# There are code paths that first extract as fulltext, and then

458

# extract as storage_kind (smart fetch). So we don't break the

459

# refcycle here, but instead in manager.get_record_stream()

460

if storage_kind == 'fulltext':

461

return self._bytes

462

else:

463

return [self._bytes]

464

raise errors.UnavailableRepresentation(self.key, storage_kind,

465

self.storage_kind)

466

467

468

class _LazyGroupContentManager(object):

469

"""This manages a group of _LazyGroupCompressFactory objects."""

470

471

_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of

472

# current size, and still be considered

473

# resuable

474

_full_block_size = 4*1024*1024

475

_full_mixed_block_size = 2*1024*1024

476

_full_enough_block_size = 3*1024*1024 # size at which we won't repack

477

_full_enough_mixed_block_size = 2*768*1024 # 1.5MB

478

479

def __init__(self, block):

480

self._block = block

481

# We need to preserve the ordering

482

self._factories = []

483

self._last_byte = 0

484

485

def add_factory(self, key, parents, start, end):

486

if not self._factories:

487

first = True

488

else:

489

first = False

490

# Note that this creates a reference cycle....

491

factory = _LazyGroupCompressFactory(key, parents, self,

492

start, end, first=first)

493

# max() works here, but as a function call, doing a compare seems to be

494

# significantly faster, timeit says 250ms for max() and 100ms for the

495

# comparison

496

if end > self._last_byte:

497

self._last_byte = end

498

self._factories.append(factory)

499

500

def get_record_stream(self):

501

"""Get a record for all keys added so far."""

502

for factory in self._factories:

503

yield factory

504

# Break the ref-cycle

505

factory._bytes = None

506

factory._manager = None

507

# TODO: Consider setting self._factories = None after the above loop,

508

# as it will break the reference cycle

509

510

def _trim_block(self, last_byte):

511

"""Create a new GroupCompressBlock, with just some of the content."""

512

# None of the factories need to be adjusted, because the content is

513

# located in an identical place. Just that some of the unreferenced

514

# trailing bytes are stripped

515

trace.mutter('stripping trailing bytes from groupcompress block'

516

' %d => %d', self._block._content_length, last_byte)

517

new_block = GroupCompressBlock()

518

self._block._ensure_content(last_byte)

519

new_block.set_content(self._block._content[:last_byte])

520

self._block = new_block

521

522

def _rebuild_block(self):

523

"""Create a new GroupCompressBlock with only the referenced texts."""

524

compressor = GroupCompressor()

525

tstart = time.time()

526

old_length = self._block._content_length

527

end_point = 0

528

for factory in self._factories:

529

bytes = factory.get_bytes_as('fulltext')

530

(found_sha1, start_point, end_point,

531

type) = compressor.compress(factory.key, bytes, factory.sha1)

532

# Now update this factory with the new offsets, etc

533

factory.sha1 = found_sha1

534

factory._start = start_point

535

factory._end = end_point

536

self._last_byte = end_point

537

new_block = compressor.flush()

538

# TODO: Should we check that new_block really *is* smaller than the old

539

# block? It seems hard to come up with a method that it would

540

# expand, since we do full compression again. Perhaps based on a

541

# request that ends up poorly ordered?

542

delta = time.time() - tstart

543

self._block = new_block

544

trace.mutter('creating new compressed block on-the-fly in %.3fs'

545

' %d bytes => %d bytes', delta, old_length,

546

self._block._content_length)

547

548

def _prepare_for_extract(self):

549

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

550

# We expect that if one child is going to fulltext, all will be. This

551

# helps prevent all of them from extracting a small amount at a time.

552

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

553

# time (self._block._content) is a little expensive.

554

self._block._ensure_content(self._last_byte)

555

556

def _check_rebuild_action(self):

557

"""Check to see if our block should be repacked."""

558

total_bytes_used = 0

559

last_byte_used = 0

560

for factory in self._factories:

561

total_bytes_used += factory._end - factory._start

562

if last_byte_used < factory._end:

563

last_byte_used = factory._end

564

# If we are using more than half of the bytes from the block, we have

565

# nothing else to check

566

if total_bytes_used * 2 >= self._block._content_length:

567

return None, last_byte_used, total_bytes_used

568

# We are using less than 50% of the content. Is the content we are

569

# using at the beginning of the block? If so, we can just trim the

570

# tail, rather than rebuilding from scratch.

571

if total_bytes_used * 2 > last_byte_used:

572

return 'trim', last_byte_used, total_bytes_used

573

574

# We are using a small amount of the data, and it isn't just packed

575

# nicely at the front, so rebuild the content.

576

# Note: This would be *nicer* as a strip-data-from-group, rather than

577

# building it up again from scratch

578

# It might be reasonable to consider the fulltext sizes for

579

# different bits when deciding this, too. As you may have a small

580

# fulltext, and a trivial delta, and you are just trading around

581

# for another fulltext. If we do a simple 'prune' you may end up

582

# expanding many deltas into fulltexts, as well.

583

# If we build a cheap enough 'strip', then we could try a strip,

584

# if that expands the content, we then rebuild.

585

return 'rebuild', last_byte_used, total_bytes_used

586

587

def check_is_well_utilized(self):

588

"""Is the current block considered 'well utilized'?

589

590

This is a bit of a heuristic, but it basically asks if the current

591

block considers itself to be a fully developed group, rather than just

592

a loose collection of data.

593

"""

594

if len(self._factories) == 1:

595

# A block of length 1 is never considered 'well utilized' :)

596

return False

597

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

598

block_size = self._block._content_length

599

if total_bytes_used < block_size * self._max_cut_fraction:

600

# This block wants to trim itself small enough that we want to

601

# consider it under-utilized.

602

return False

603

# TODO: This code is meant to be the twin of _insert_record_stream's

604

# 'start_new_block' logic. It would probably be better to factor

605

# out that logic into a shared location, so that it stays

606

# together better

607

# We currently assume a block is properly utilized whenever it is >75%

608

# of the size of a 'full' block. In normal operation, a block is

609

# considered full when it hits 4MB of same-file content. So any block

610

# >3MB is 'full enough'.

611

# The only time this isn't true is when a given block has large-object

612

# content. (a single file >4MB, etc.)

613

# Under these circumstances, we allow a block to grow to

614

# 2 x largest_content. Which means that if a given block had a large

615

# object, it may actually be under-utilized. However, given that this

616

# is 'pack-on-the-fly' it is probably reasonable to not repack large

617

# contet blobs on-the-fly.

618

if block_size >= self._full_enough_block_size:

619

return True

620

# If a block is <3MB, it still may be considered 'full' if it contains

621

# mixed content. The current rule is 2MB of mixed content is considered

622

# full. So check to see if this block contains mixed content, and

623

# set the threshold appropriately.

624

common_prefix = None

625

for factory in self._factories:

626

prefix = factory.key[:-1]

627

if common_prefix is None:

628

common_prefix = prefix

629

elif prefix != common_prefix:

630

# Mixed content, check the size appropriately

631

if block_size >= self._full_enough_mixed_block_size:

632

return True

633

break

634

# The content failed both the mixed check and the single-content check

635

# so obviously it is not fully utilized

636

# TODO: there is one other constraint that isn't being checked

637

# namely, that the entries in the block are in the appropriate

638

# order. For example, you could insert the entries in exactly

639

# reverse groupcompress order, and we would think that is ok.

640

# (all the right objects are in one group, and it is fully

641

# utilized, etc.) For now, we assume that case is rare,

642

# especially since we should always fetch in 'groupcompress'

643

# order.

644

return False

645

646

def _check_rebuild_block(self):

647

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

648

if action is None:

649

return

650

if action == 'trim':

651

self._trim_block(last_byte_used)

652

elif action == 'rebuild':

653

self._rebuild_block()

654

else:

655

raise ValueError('unknown rebuild action: %r' % (action,))

656

657

def _wire_bytes(self):

658

"""Return a byte stream suitable for transmitting over the wire."""

659

self._check_rebuild_block()

660

# The outer block starts with:

661

# 'groupcompress-block\n'

662

# <length of compressed key info>\n

663

# <length of uncompressed info>\n

664

# <length of gc block>\n

665

# <header bytes>

666

# <gc-block>

667

lines = ['groupcompress-block\n']

668

# The minimal info we need is the key, the start offset, and the

669

# parents. The length and type are encoded in the record itself.

670

# However, passing in the other bits makes it easier. The list of

671

# keys, and the start offset, the length

672

# 1 line key

673

# 1 line with parents, '' for ()

674

# 1 line for start offset

675

# 1 line for end byte

676

header_lines = []

677

for factory in self._factories:

678

key_bytes = '\x00'.join(factory.key)

679

parents = factory.parents

680

if parents is None:

681

parent_bytes = 'None:'

682

else:

683

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

684

record_header = '%s\n%s\n%d\n%d\n' % (

685

key_bytes, parent_bytes, factory._start, factory._end)

686

header_lines.append(record_header)

687

# TODO: Can we break the refcycle at this point and set

688

# factory._manager = None?

689

header_bytes = ''.join(header_lines)

690

del header_lines

691

header_bytes_len = len(header_bytes)

692

z_header_bytes = zlib.compress(header_bytes)

693

del header_bytes

694

z_header_bytes_len = len(z_header_bytes)

695

block_bytes = self._block.to_bytes()

696

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

697

len(block_bytes)))

698

lines.append(z_header_bytes)

699

lines.append(block_bytes)

700

del z_header_bytes, block_bytes

701

return ''.join(lines)

702

703

@classmethod

704

def from_bytes(cls, bytes):

705

# TODO: This does extra string copying, probably better to do it a

706

# different way

707

(storage_kind, z_header_len, header_len,

708

block_len, rest) = bytes.split('\n', 4)

709

del bytes

710

if storage_kind != 'groupcompress-block':

711

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

712

z_header_len = int(z_header_len)

713

if len(rest) < z_header_len:

714

raise ValueError('Compressed header len shorter than all bytes')

715

z_header = rest[:z_header_len]

716

header_len = int(header_len)

717

header = zlib.decompress(z_header)

718

if len(header) != header_len:

719

raise ValueError('invalid length for decompressed bytes')

720

del z_header

721

block_len = int(block_len)

722

if len(rest) != z_header_len + block_len:

723

raise ValueError('Invalid length for block')

724

block_bytes = rest[z_header_len:]

725

del rest

726

# So now we have a valid GCB, we just need to parse the factories that

727

# were sent to us

728

header_lines = header.split('\n')

729

del header

730

last = header_lines.pop()

731

if last != '':

732

raise ValueError('header lines did not end with a trailing'

733

' newline')

734

if len(header_lines) % 4 != 0:

735

raise ValueError('The header was not an even multiple of 4 lines')

736

block = GroupCompressBlock.from_bytes(block_bytes)

737

del block_bytes

738

result = cls(block)

739

for start in xrange(0, len(header_lines), 4):

740

# intern()?

741

key = tuple(header_lines[start].split('\x00'))

742

parents_line = header_lines[start+1]

743

if parents_line == 'None:':

744

parents = None

745

else:

746

parents = tuple([tuple(segment.split('\x00'))

747

for segment in parents_line.split('\t')

748

if segment])

749

start_offset = int(header_lines[start+2])

750

end_offset = int(header_lines[start+3])

751

result.add_factory(key, parents, start_offset, end_offset)

752

return result

753

754

755

def network_block_to_records(storage_kind, bytes, line_end):

756

if storage_kind != 'groupcompress-block':

757

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

758

manager = _LazyGroupContentManager.from_bytes(bytes)

759

return manager.get_record_stream()

760

761

762

class _CommonGroupCompressor(object):

763

764

def __init__(self):

765

"""Create a GroupCompressor."""

766

self.chunks = []

767

self._last = None

768

self.endpoint = 0

769

self.input_bytes = 0

770

self.labels_deltas = {}

771

self._delta_index = None # Set by the children

772

self._block = GroupCompressBlock()

773

774

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

775

"""Compress lines with label key.

776

777

:param key: A key tuple. It is stored in the output

778

for identification of the text during decompression. If the last

779

element is 'None' it is replaced with the sha1 of the text -

780

e.g. sha1:xxxxxxx.

781

:param bytes: The bytes to be compressed

782

:param expected_sha: If non-None, the sha the lines are believed to

783

have. During compression the sha is calculated; a mismatch will

784

cause an error.

785

:param nostore_sha: If the computed sha1 sum matches, we will raise

786

ExistingContent rather than adding the text.

787

:param soft: Do a 'soft' compression. This means that we require larger

788

ranges to match to be considered for a copy command.

789

790

:return: The sha1 of lines, the start and end offsets in the delta, and

791

the type ('fulltext' or 'delta').

792

793

:seealso VersionedFiles.add_lines:

794

"""

795

if not bytes: # empty, like a dir entry, etc

796

if nostore_sha == _null_sha1:

797

raise errors.ExistingContent()

798

return _null_sha1, 0, 0, 'fulltext'

799

# we assume someone knew what they were doing when they passed it in

800

if expected_sha is not None:

801

sha1 = expected_sha

802

else:

803

sha1 = osutils.sha_string(bytes)

804

if nostore_sha is not None:

805

if sha1 == nostore_sha:

806

raise errors.ExistingContent()

807

if key[-1] is None:

808

key = key[:-1] + ('sha1:' + sha1,)

809

810

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

811

return sha1, start, end, type

812

813

def _compress(self, key, bytes, max_delta_size, soft=False):

814

"""Compress lines with label key.

815

816

:param key: A key tuple. It is stored in the output for identification

817

of the text during decompression.

818

819

:param bytes: The bytes to be compressed

820

821

:param max_delta_size: The size above which we issue a fulltext instead

822

of a delta.

823

824

:param soft: Do a 'soft' compression. This means that we require larger

825

ranges to match to be considered for a copy command.

826

827

:return: The sha1 of lines, the start and end offsets in the delta, and

828

the type ('fulltext' or 'delta').

829

"""

830

raise NotImplementedError(self._compress)

831

832

def extract(self, key):

833

"""Extract a key previously added to the compressor.

834

835

:param key: The key to extract.

836

:return: An iterable over bytes and the sha1.

837

"""

838

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

839

delta_chunks = self.chunks[start_chunk:end_chunk]

840

stored_bytes = ''.join(delta_chunks)

841

if stored_bytes[0] == 'f':

842

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

843

data_len = fulltext_len + 1 + offset

844

if data_len != len(stored_bytes):

845

raise ValueError('Index claimed fulltext len, but stored bytes'

846

' claim %s != %s'

847

% (len(stored_bytes), data_len))

848

bytes = stored_bytes[offset + 1:]

849

else:

850

# XXX: This is inefficient at best

851

source = ''.join(self.chunks[:start_chunk])

852

if stored_bytes[0] != 'd':

853

raise ValueError('Unknown content kind, bytes claim %s'

854

% (stored_bytes[0],))

855

delta_len, offset = decode_base128_int(stored_bytes[1:10])

856

data_len = delta_len + 1 + offset

857

if data_len != len(stored_bytes):

858

raise ValueError('Index claimed delta len, but stored bytes'

859

' claim %s != %s'

860

% (len(stored_bytes), data_len))

861

bytes = apply_delta(source, stored_bytes[offset + 1:])

862

bytes_sha1 = osutils.sha_string(bytes)

863

return bytes, bytes_sha1

864

865

def flush(self):

866

"""Finish this group, creating a formatted stream.

867

868

After calling this, the compressor should no longer be used

869

"""

870

# TODO: this causes us to 'bloat' to 2x the size of content in the

871

# group. This has an impact for 'commit' of large objects.

872

# One possibility is to use self._content_chunks, and be lazy and

873

# only fill out self._content as a full string when we actually

874

# need it. That would at least drop the peak memory consumption

875

# for 'commit' down to ~1x the size of the largest file, at a

876

# cost of increased complexity within this code. 2x is still <<

877

# 3x the size of the largest file, so we are doing ok.

878

self._block.set_chunked_content(self.chunks, self.endpoint)

879

self.chunks = None

880

self._delta_index = None

881

return self._block

882

883

def pop_last(self):

884

"""Call this if you want to 'revoke' the last compression.

885

886

After this, the data structures will be rolled back, but you cannot do

887

more compression.

888

"""

889

self._delta_index = None

890

del self.chunks[self._last[0]:]

891

self.endpoint = self._last[1]

892

self._last = None

893

894

def ratio(self):

895

"""Return the overall compression ratio."""

896

return float(self.input_bytes) / float(self.endpoint)

897

898

899

class PythonGroupCompressor(_CommonGroupCompressor):

900

901

def __init__(self):

902

"""Create a GroupCompressor.

903

904

Used only if the pyrex version is not available.

905

"""

906

super(PythonGroupCompressor, self).__init__()

907

self._delta_index = LinesDeltaIndex([])

908

# The actual content is managed by LinesDeltaIndex

909

self.chunks = self._delta_index.lines

910

911

def _compress(self, key, bytes, max_delta_size, soft=False):

912

"""see _CommonGroupCompressor._compress"""

913

input_len = len(bytes)

914

new_lines = osutils.split_lines(bytes)

915

out_lines, index_lines = self._delta_index.make_delta(

916

new_lines, bytes_length=input_len, soft=soft)

917

delta_length = sum(map(len, out_lines))

918

if delta_length > max_delta_size:

919

# The delta is longer than the fulltext, insert a fulltext

920

type = 'fulltext'

921

out_lines = ['f', encode_base128_int(input_len)]

922

out_lines.extend(new_lines)

923

index_lines = [False, False]

924

index_lines.extend([True] * len(new_lines))

925

else:

926

# this is a worthy delta, output it

927

type = 'delta'

928

out_lines[0] = 'd'

929

# Update the delta_length to include those two encoded integers

930

out_lines[1] = encode_base128_int(delta_length)

931

# Before insertion

932

start = self.endpoint

933

chunk_start = len(self.chunks)

934

self._last = (chunk_start, self.endpoint)

935

self._delta_index.extend_lines(out_lines, index_lines)

936

self.endpoint = self._delta_index.endpoint

937

self.input_bytes += input_len

938

chunk_end = len(self.chunks)

939

self.labels_deltas[key] = (start, chunk_start,

940

self.endpoint, chunk_end)

941

return start, self.endpoint, type

942

943

944

class PyrexGroupCompressor(_CommonGroupCompressor):

945

"""Produce a serialised group of compressed texts.

946

947

It contains code very similar to SequenceMatcher because of having a similar

948

task. However some key differences apply:

949

- there is no junk, we want a minimal edit not a human readable diff.

950

- we don't filter very common lines (because we don't know where a good

951

range will start, and after the first text we want to be emitting minmal

952

edits only.

953

- we chain the left side, not the right side

954

- we incrementally update the adjacency matrix as new lines are provided.

955

- we look for matches in all of the left side, so the routine which does

956

the analagous task of find_longest_match does not need to filter on the

957

left side.

958

"""

959

960

def __init__(self):

961

super(PyrexGroupCompressor, self).__init__()

962

self._delta_index = DeltaIndex()

963

964

def _compress(self, key, bytes, max_delta_size, soft=False):

965

"""see _CommonGroupCompressor._compress"""

966

input_len = len(bytes)

967

# By having action/label/sha1/len, we can parse the group if the index

968

# was ever destroyed, we have the key in 'label', we know the final

969

# bytes are valid from sha1, and we know where to find the end of this

970

# record because of 'len'. (the delta record itself will store the

971

# total length for the expanded record)

972

# 'len: %d\n' costs approximately 1% increase in total data

973

# Having the labels at all costs us 9-10% increase, 38% increase for

974

# inventory pages, and 5.8% increase for text pages

975

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

976

if self._delta_index._source_offset != self.endpoint:

977

raise AssertionError('_source_offset != endpoint'

978

' somehow the DeltaIndex got out of sync with'

979

' the output lines')

980

delta = self._delta_index.make_delta(bytes, max_delta_size)

981

if (delta is None):

982

type = 'fulltext'

983

enc_length = encode_base128_int(len(bytes))

984

len_mini_header = 1 + len(enc_length)

985

self._delta_index.add_source(bytes, len_mini_header)

986

new_chunks = ['f', enc_length, bytes]

987

else:

988

type = 'delta'

989

enc_length = encode_base128_int(len(delta))

990

len_mini_header = 1 + len(enc_length)

991

new_chunks = ['d', enc_length, delta]

992

self._delta_index.add_delta_source(delta, len_mini_header)

993

# Before insertion

994

start = self.endpoint

995

chunk_start = len(self.chunks)

996

# Now output these bytes

997

self._output_chunks(new_chunks)

998

self.input_bytes += input_len

999

chunk_end = len(self.chunks)

1000

self.labels_deltas[key] = (start, chunk_start,

1001

self.endpoint, chunk_end)

1002

if not self._delta_index._source_offset == self.endpoint:

1003

raise AssertionError('the delta index is out of sync'

1004

'with the output lines %s != %s'

1005

% (self._delta_index._source_offset, self.endpoint))

1006

return start, self.endpoint, type

1007

1008

def _output_chunks(self, new_chunks):

1009

"""Output some chunks.

1010

1011

:param new_chunks: The chunks to output.

1012

"""

1013

self._last = (len(self.chunks), self.endpoint)

1014

endpoint = self.endpoint

1015

self.chunks.extend(new_chunks)

1016

endpoint += sum(map(len, new_chunks))

1017

self.endpoint = endpoint

1018

1019

1020

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

1021

"""Create a factory for creating a pack based groupcompress.

1022

1023

This is only functional enough to run interface tests, it doesn't try to

1024

provide a full pack environment.

1025

1026

:param graph: Store a graph.

1027

:param delta: Delta compress contents.

1028

:param keylength: How long should keys be.

1029

"""

1030

def factory(transport):

1031

parents = graph

1032

ref_length = 0

1033

if graph:

1034

ref_length = 1

1035

graph_index = BTreeBuilder(reference_lists=ref_length,

1036

key_elements=keylength)

1037

stream = transport.open_write_stream('newpack')

1038

writer = pack.ContainerWriter(stream.write)

1039

writer.begin()

1040

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

1041

add_callback=graph_index.add_nodes,

1042

inconsistency_fatal=inconsistency_fatal)

1043

access = knit._DirectPackAccess({})

1044

access.set_writer(writer, graph_index, (transport, 'newpack'))

1045

result = GroupCompressVersionedFiles(index, access, delta)

1046

result.stream = stream

1047

result.writer = writer

1048

return result

1049

return factory

1050

1051

1052

def cleanup_pack_group(versioned_files):

1053

versioned_files.writer.end()

1054

versioned_files.stream.close()

1055

1056

1057

class _BatchingBlockFetcher(object):

1058

"""Fetch group compress blocks in batches.

1059

1060

:ivar total_bytes: int of expected number of bytes needed to fetch the

1061

currently pending batch.

1062

"""

1063

1064

def __init__(self, gcvf, locations):

1065

self.gcvf = gcvf

1066

self.locations = locations

1067

self.keys = []

1068

self.batch_memos = {}

1069

self.memos_to_get = []

1070

self.total_bytes = 0

1071

self.last_read_memo = None

1072

self.manager = None

1073

1074

def add_key(self, key):

1075

"""Add another to key to fetch.

1076

1077

:return: The estimated number of bytes needed to fetch the batch so

1078

far.

1079

"""

1080

self.keys.append(key)

1081

index_memo, _, _, _ = self.locations[key]

1082

read_memo = index_memo[0:3]

1083

# Three possibilities for this read_memo:

1084

# - it's already part of this batch; or

1085

# - it's not yet part of this batch, but is already cached; or

1086

# - it's not yet part of this batch and will need to be fetched.

1087

if read_memo in self.batch_memos:

1088

# This read memo is already in this batch.

1089

return self.total_bytes

1090

try:

1091

cached_block = self.gcvf._group_cache[read_memo]

1092

except KeyError:

1093

# This read memo is new to this batch, and the data isn't cached

1094

# either.

1095

self.batch_memos[read_memo] = None

1096

self.memos_to_get.append(read_memo)

1097

byte_length = read_memo[2]

1098

self.total_bytes += byte_length

1099

else:

1100

# This read memo is new to this batch, but cached.

1101

# Keep a reference to the cached block in batch_memos because it's

1102

# certain that we'll use it when this batch is processed, but

1103

# there's a risk that it would fall out of _group_cache between now

1104

# and then.

1105

self.batch_memos[read_memo] = cached_block

1106

return self.total_bytes

1107

1108

def _flush_manager(self):

1109

if self.manager is not None:

1110

for factory in self.manager.get_record_stream():

1111

yield factory

1112

self.manager = None

1113

self.last_read_memo = None

1114

1115

def yield_factories(self, full_flush=False):

1116

"""Yield factories for keys added since the last yield. They will be

1117

returned in the order they were added via add_key.

1118

1119

:param full_flush: by default, some results may not be returned in case

1120

they can be part of the next batch. If full_flush is True, then

1121

all results are returned.

1122

"""

1123

if self.manager is None and not self.keys:

1124

return

1125

# Fetch all memos in this batch.

1126

blocks = self.gcvf._get_blocks(self.memos_to_get)

1127

# Turn blocks into factories and yield them.

1128

memos_to_get_stack = list(self.memos_to_get)

1129

memos_to_get_stack.reverse()

1130

for key in self.keys:

1131

index_memo, _, parents, _ = self.locations[key]

1132

read_memo = index_memo[:3]

1133

if self.last_read_memo != read_memo:

1134

# We are starting a new block. If we have a

1135

# manager, we have found everything that fits for

1136

# now, so yield records

1137

for factory in self._flush_manager():

1138

yield factory

1139

# Now start a new manager.

1140

if memos_to_get_stack and memos_to_get_stack[-1] == read_memo:

1141

# The next block from _get_blocks will be the block we

1142

# need.

1143

block_read_memo, block = blocks.next()

1144

if block_read_memo != read_memo:

1145

raise AssertionError(

1146

"block_read_memo out of sync with read_memo"

1147

"(%r != %r)" % (block_read_memo, read_memo))

1148

self.batch_memos[read_memo] = block

1149

memos_to_get_stack.pop()

1150

else:

1151

block = self.batch_memos[read_memo]

1152

self.manager = _LazyGroupContentManager(block)

1153

self.last_read_memo = read_memo

1154

start, end = index_memo[3:5]

1155

self.manager.add_factory(key, parents, start, end)

1156

if full_flush:

1157

for factory in self._flush_manager():

1158

yield factory

1159

del self.keys[:]

1160

self.batch_memos.clear()

1161

del self.memos_to_get[:]

1162

self.total_bytes = 0

1163

1164

1165

class GroupCompressVersionedFiles(VersionedFiles):

1166

"""A group-compress based VersionedFiles implementation."""

1167

1168

def __init__(self, index, access, delta=True):

1169

"""Create a GroupCompressVersionedFiles object.

1170

1171

:param index: The index object storing access and graph data.

1172

:param access: The access object storing raw data.

1173

:param delta: Whether to delta compress or just entropy compress.

1174

"""

1175

self._index = index

1176

self._access = access

1177

self._delta = delta

1178

self._unadded_refs = {}

1179

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

1180

self._fallback_vfs = []

1181

1182

def add_lines(self, key, parents, lines, parent_texts=None,

1183

left_matching_blocks=None, nostore_sha=None, random_id=False,

1184

check_content=True):

1185

"""Add a text to the store.

1186

1187

:param key: The key tuple of the text to add.

1188

:param parents: The parents key tuples of the text to add.

1189

:param lines: A list of lines. Each line must be a bytestring. And all

1190

of them except the last must be terminated with \n and contain no

1191

other \n's. The last line may either contain no \n's or a single

1192

terminating \n. If the lines list does meet this constraint the add

1193

routine may error or may succeed - but you will be unable to read

1194

the data back accurately. (Checking the lines have been split

1195

correctly is expensive and extremely unlikely to catch bugs so it

1196

is not done at runtime unless check_content is True.)

1197

:param parent_texts: An optional dictionary containing the opaque

1198

representations of some or all of the parents of version_id to

1199

allow delta optimisations. VERY IMPORTANT: the texts must be those

1200

returned by add_lines or data corruption can be caused.

1201

:param left_matching_blocks: a hint about which areas are common

1202

between the text and its left-hand-parent. The format is

1203

the SequenceMatcher.get_matching_blocks format.

1204

:param nostore_sha: Raise ExistingContent and do not add the lines to

1205

the versioned file if the digest of the lines matches this.

1206

:param random_id: If True a random id has been selected rather than

1207

an id determined by some deterministic process such as a converter

1208

from a foreign VCS. When True the backend may choose not to check

1209

for uniqueness of the resulting key within the versioned file, so

1210

this should only be done when the result is expected to be unique

1211

anyway.

1212

:param check_content: If True, the lines supplied are verified to be

1213

bytestrings that are correctly formed lines.

1214

:return: The text sha1, the number of bytes in the text, and an opaque

1215

representation of the inserted version which can be provided

1216

back to future add_lines calls in the parent_texts dictionary.

1217

"""

1218

self._index._check_write_ok()

1219

self._check_add(key, lines, random_id, check_content)

1220

if parents is None:

1221

# The caller might pass None if there is no graph data, but kndx

1222

# indexes can't directly store that, so we give them

1223

# an empty tuple instead.

1224

parents = ()

1225

# double handling for now. Make it work until then.

1226

length = sum(map(len, lines))

1227

record = ChunkedContentFactory(key, parents, None, lines)

1228

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1229

nostore_sha=nostore_sha))[0]

1230

return sha1, length, None

1231

1232

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1233

"""See VersionedFiles._add_text()."""

1234

self._index._check_write_ok()

1235

self._check_add(key, None, random_id, check_content=False)

1236

if text.__class__ is not str:

1237

raise errors.BzrBadParameterUnicode("text")

1238

if parents is None:

1239

# The caller might pass None if there is no graph data, but kndx

1240

# indexes can't directly store that, so we give them

1241

# an empty tuple instead.

1242

parents = ()

1243

# double handling for now. Make it work until then.

1244

length = len(text)

1245

record = FulltextContentFactory(key, parents, None, text)

1246

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1247

nostore_sha=nostore_sha))[0]

1248

return sha1, length, None

1249

1250

def add_fallback_versioned_files(self, a_versioned_files):

1251

"""Add a source of texts for texts not present in this knit.

1252

1253

:param a_versioned_files: A VersionedFiles object.

1254

"""

1255

self._fallback_vfs.append(a_versioned_files)

1256

1257

def annotate(self, key):

1258

"""See VersionedFiles.annotate."""

1259

ann = annotate.Annotator(self)

1260

return ann.annotate_flat(key)

1261

1262

def get_annotator(self):

1263

return annotate.Annotator(self)

1264

1265

def check(self, progress_bar=None, keys=None):

1266

"""See VersionedFiles.check()."""

1267

if keys is None:

1268

keys = self.keys()

1269

for record in self.get_record_stream(keys, 'unordered', True):

1270

record.get_bytes_as('fulltext')

1271

else:

1272

return self.get_record_stream(keys, 'unordered', True)

1273

1274

def _check_add(self, key, lines, random_id, check_content):

1275

"""check that version_id and lines are safe to add."""

1276

version_id = key[-1]

1277

if version_id is not None:

1278

if osutils.contains_whitespace(version_id):

1279

raise errors.InvalidRevisionId(version_id, self)

1280

self.check_not_reserved_id(version_id)

1281

# TODO: If random_id==False and the key is already present, we should

1282

# probably check that the existing content is identical to what is

1283

# being inserted, and otherwise raise an exception. This would make

1284

# the bundle code simpler.

1285

if check_content:

1286

self._check_lines_not_unicode(lines)

1287

self._check_lines_are_lines(lines)

1288

1289

def get_known_graph_ancestry(self, keys):

1290

"""Get a KnownGraph instance with the ancestry of keys."""

1291

# Note that this is identical to

1292

# KnitVersionedFiles.get_known_graph_ancestry, but they don't share

1293

# ancestry.

1294

parent_map, missing_keys = self._index.find_ancestry(keys)

1295

for fallback in self._fallback_vfs:

1296

if not missing_keys:

1297

break

1298

(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(

1299

missing_keys)

1300

parent_map.update(f_parent_map)

1301

missing_keys = f_missing_keys

1302

kg = _mod_graph.KnownGraph(parent_map)

1303

return kg

1304

1305

def get_parent_map(self, keys):

1306

"""Get a map of the graph parents of keys.

1307

1308

:param keys: The keys to look up parents for.

1309

:return: A mapping from keys to parents. Absent keys are absent from

1310

the mapping.

1311

"""

1312

return self._get_parent_map_with_sources(keys)[0]

1313

1314

def _get_parent_map_with_sources(self, keys):

1315

"""Get a map of the parents of keys.

1316

1317

:param keys: The keys to look up parents for.

1318

:return: A tuple. The first element is a mapping from keys to parents.

1319

Absent keys are absent from the mapping. The second element is a

1320

list with the locations each key was found in. The first element

1321

is the in-this-knit parents, the second the first fallback source,

1322

and so on.

1323

"""

1324

result = {}

1325

sources = [self._index] + self._fallback_vfs

1326

source_results = []

1327

missing = set(keys)

1328

for source in sources:

1329

if not missing:

1330

break

1331

new_result = source.get_parent_map(missing)

1332

source_results.append(new_result)

1333

result.update(new_result)

1334

missing.difference_update(set(new_result))

1335

return result, source_results

1336

1337

def _get_blocks(self, read_memos):

1338

"""Get GroupCompressBlocks for the given read_memos.

1339

1340

:returns: a series of (read_memo, block) pairs, in the order they were

1341

originally passed.

1342

"""

1343

cached = {}

1344

for read_memo in read_memos:

1345

try:

1346

block = self._group_cache[read_memo]

1347

except KeyError:

1348

pass

1349

else:

1350

cached[read_memo] = block

1351

not_cached = []

1352

not_cached_seen = set()

1353

for read_memo in read_memos:

1354

if read_memo in cached:

1355

# Don't fetch what we already have

1356

continue

1357

if read_memo in not_cached_seen:

1358

# Don't try to fetch the same data twice

1359

continue

1360

not_cached.append(read_memo)

1361

not_cached_seen.add(read_memo)

1362

raw_records = self._access.get_raw_records(not_cached)

1363

for read_memo in read_memos:

1364

try:

1365

yield read_memo, cached[read_memo]

1366

except KeyError:

1367

# Read the block, and cache it.

1368

zdata = raw_records.next()

1369

block = GroupCompressBlock.from_bytes(zdata)

1370

self._group_cache[read_memo] = block

1371

cached[read_memo] = block

1372

yield read_memo, block

1373

1374

def get_missing_compression_parent_keys(self):

1375

"""Return the keys of missing compression parents.

1376

1377

Missing compression parents occur when a record stream was missing

1378

basis texts, or a index was scanned that had missing basis texts.

1379

"""

1380

# GroupCompress cannot currently reference texts that are not in the

1381

# group, so this is valid for now

1382

return frozenset()

1383

1384

def get_record_stream(self, keys, ordering, include_delta_closure):

1385

"""Get a stream of records for keys.

1386

1387

:param keys: The keys to include.

1388

:param ordering: Either 'unordered' or 'topological'. A topologically

1389

sorted stream has compression parents strictly before their

1390

children.

1391

:param include_delta_closure: If True then the closure across any

1392

compression parents will be included (in the opaque data).

1393

:return: An iterator of ContentFactory objects, each of which is only

1394

valid until the iterator is advanced.

1395

"""

1396

# keys might be a generator

1397

orig_keys = list(keys)

1398

keys = set(keys)

1399

if not keys:

1400

return

1401

if (not self._index.has_graph

1402

and ordering in ('topological', 'groupcompress')):

1403

# Cannot topological order when no graph has been stored.

1404

# but we allow 'as-requested' or 'unordered'

1405

ordering = 'unordered'

1406

1407

remaining_keys = keys

1408

while True:

1409

try:

1410

keys = set(remaining_keys)

1411

for content_factory in self._get_remaining_record_stream(keys,

1412

orig_keys, ordering, include_delta_closure):

1413

remaining_keys.discard(content_factory.key)

1414

yield content_factory

1415

return

1416

except errors.RetryWithNewPacks, e:

1417

self._access.reload_or_raise(e)

1418

1419

def _find_from_fallback(self, missing):

1420

"""Find whatever keys you can from the fallbacks.

1421

1422

:param missing: A set of missing keys. This set will be mutated as keys

1423

are found from a fallback_vfs

1424

:return: (parent_map, key_to_source_map, source_results)

1425

parent_map the overall key => parent_keys

1426

key_to_source_map a dict from {key: source}

1427

source_results a list of (source: keys)

1428

"""

1429

parent_map = {}

1430

key_to_source_map = {}

1431

source_results = []

1432

for source in self._fallback_vfs:

1433

if not missing:

1434

break

1435

source_parents = source.get_parent_map(missing)

1436

parent_map.update(source_parents)

1437

source_parents = list(source_parents)

1438

source_results.append((source, source_parents))

1439

key_to_source_map.update((key, source) for key in source_parents)

1440

missing.difference_update(source_parents)

1441

return parent_map, key_to_source_map, source_results

1442

1443

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1444

"""Get the (source, [keys]) list.

1445

1446

The returned objects should be in the order defined by 'ordering',

1447

which can weave between different sources.

1448

:param ordering: Must be one of 'topological' or 'groupcompress'

1449

:return: List of [(source, [keys])] tuples, such that all keys are in

1450

the defined order, regardless of source.

1451

"""

1452

if ordering == 'topological':

1453

present_keys = topo_sort(parent_map)

1454

else:

1455

# ordering == 'groupcompress'

1456

# XXX: This only optimizes for the target ordering. We may need

1457

# to balance that with the time it takes to extract

1458

# ordering, by somehow grouping based on

1459

# locations[key][0:3]

1460

present_keys = sort_gc_optimal(parent_map)

1461

# Now group by source:

1462

source_keys = []

1463

current_source = None

1464

for key in present_keys:

1465

source = key_to_source_map.get(key, self)

1466

if source is not current_source:

1467

source_keys.append((source, []))

1468

current_source = source

1469

source_keys[-1][1].append(key)

1470

return source_keys

1471

1472

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1473

key_to_source_map):

1474

source_keys = []

1475

current_source = None

1476

for key in orig_keys:

1477

if key in locations or key in unadded_keys:

1478

source = self

1479

elif key in key_to_source_map:

1480

source = key_to_source_map[key]

1481

else: # absent

1482

continue

1483

if source is not current_source:

1484

source_keys.append((source, []))

1485

current_source = source

1486

source_keys[-1][1].append(key)

1487

return source_keys

1488

1489

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1490

source_result):

1491

def get_group(key):

1492

# This is the group the bytes are stored in, followed by the

1493

# location in the group

1494

return locations[key][0]

1495

present_keys = sorted(locations.iterkeys(), key=get_group)

1496

# We don't have an ordering for keys in the in-memory object, but

1497

# lets process the in-memory ones first.

1498

present_keys = list(unadded_keys) + present_keys

1499

# Now grab all of the ones from other sources

1500

source_keys = [(self, present_keys)]

1501

source_keys.extend(source_result)

1502

return source_keys

1503

1504

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1505

include_delta_closure):

1506

"""Get a stream of records for keys.

1507

1508

:param keys: The keys to include.

1509

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1510

'as-requested'

1511

:param include_delta_closure: If True then the closure across any

1512

compression parents will be included (in the opaque data).

1513

:return: An iterator of ContentFactory objects, each of which is only

1514

valid until the iterator is advanced.

1515

"""

1516

# Cheap: iterate

1517

locations = self._index.get_build_details(keys)

1518

unadded_keys = set(self._unadded_refs).intersection(keys)

1519

missing = keys.difference(locations)

1520

missing.difference_update(unadded_keys)

1521

(fallback_parent_map, key_to_source_map,

1522

source_result) = self._find_from_fallback(missing)

1523

if ordering in ('topological', 'groupcompress'):

1524

# would be better to not globally sort initially but instead

1525

# start with one key, recurse to its oldest parent, then grab

1526

# everything in the same group, etc.

1527

parent_map = dict((key, details[2]) for key, details in

1528

locations.iteritems())

1529

for key in unadded_keys:

1530

parent_map[key] = self._unadded_refs[key]

1531

parent_map.update(fallback_parent_map)

1532

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1533

key_to_source_map)

1534

elif ordering == 'as-requested':

1535

source_keys = self._get_as_requested_source_keys(orig_keys,

1536

locations, unadded_keys, key_to_source_map)

1537

else:

1538

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1539

# Otherwise we thrash the _group_cache and destroy performance

1540

source_keys = self._get_io_ordered_source_keys(locations,

1541

unadded_keys, source_result)

1542

for key in missing:

1543

yield AbsentContentFactory(key)

1544

# Batch up as many keys as we can until either:

1545

# - we encounter an unadded ref, or

1546

# - we run out of keys, or

1547

# - the total bytes to retrieve for this batch > BATCH_SIZE

1548

batcher = _BatchingBlockFetcher(self, locations)

1549

for source, keys in source_keys:

1550

if source is self:

1551

for key in keys:

1552

if key in self._unadded_refs:

1553

# Flush batch, then yield unadded ref from

1554

# self._compressor.

1555

for factory in batcher.yield_factories(full_flush=True):

1556

yield factory

1557

bytes, sha1 = self._compressor.extract(key)

1558

parents = self._unadded_refs[key]

1559

yield FulltextContentFactory(key, parents, sha1, bytes)

1560

continue

1561

if batcher.add_key(key) > BATCH_SIZE:

1562

# Ok, this batch is big enough. Yield some results.

1563

for factory in batcher.yield_factories():

1564

yield factory

1565

else:

1566

for factory in batcher.yield_factories(full_flush=True):

1567

yield factory

1568

for record in source.get_record_stream(keys, ordering,

1569

include_delta_closure):

1570

yield record

1571

for factory in batcher.yield_factories(full_flush=True):

1572

yield factory

1573

1574

def get_sha1s(self, keys):

1575

"""See VersionedFiles.get_sha1s()."""

1576

result = {}

1577

for record in self.get_record_stream(keys, 'unordered', True):

1578

if record.sha1 != None:

1579

result[record.key] = record.sha1

1580

else:

1581

if record.storage_kind != 'absent':

1582

result[record.key] = osutils.sha_string(

1583

record.get_bytes_as('fulltext'))

1584

return result

1585

1586

def insert_record_stream(self, stream):

1587

"""Insert a record stream into this container.

1588

1589

:param stream: A stream of records to insert.

1590

:return: None

1591

:seealso VersionedFiles.get_record_stream:

1592

"""

1593

# XXX: Setting random_id=True makes

1594

# test_insert_record_stream_existing_keys fail for groupcompress and

1595

# groupcompress-nograph, this needs to be revisited while addressing

1596

# 'bzr branch' performance issues.

1597

for _ in self._insert_record_stream(stream, random_id=False):

1598

pass

1599

1600

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1601

reuse_blocks=True):

1602

"""Internal core to insert a record stream into this container.

1603

1604

This helper function has a different interface than insert_record_stream

1605

to allow add_lines to be minimal, but still return the needed data.

1606

1607

:param stream: A stream of records to insert.

1608

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1609

raise ExistingContent, rather than committing the new text.

1610

:param reuse_blocks: If the source is streaming from

1611

groupcompress-blocks, just insert the blocks as-is, rather than

1612

expanding the texts and inserting again.

1613

:return: An iterator over the sha1 of the inserted records.

1614

:seealso insert_record_stream:

1615

:seealso add_lines:

1616

"""

1617

adapters = {}

1618

def get_adapter(adapter_key):

1619

try:

1620

return adapters[adapter_key]

1621

except KeyError:

1622

adapter_factory = adapter_registry.get(adapter_key)

1623

adapter = adapter_factory(self)

1624

adapters[adapter_key] = adapter

1625

return adapter

1626

# This will go up to fulltexts for gc to gc fetching, which isn't

1627

# ideal.

1628

self._compressor = GroupCompressor()

1629

self._unadded_refs = {}

1630

keys_to_add = []

1631

def flush():

1632

bytes = self._compressor.flush().to_bytes()

1633

index, start, length = self._access.add_raw_records(

1634

[(None, len(bytes))], bytes)[0]

1635

nodes = []

1636

for key, reads, refs in keys_to_add:

1637

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1638

self._index.add_records(nodes, random_id=random_id)

1639

self._unadded_refs = {}

1640

del keys_to_add[:]

1641

self._compressor = GroupCompressor()

1642

1643

last_prefix = None

1644

max_fulltext_len = 0

1645

max_fulltext_prefix = None

1646

insert_manager = None

1647

block_start = None

1648

block_length = None

1649

# XXX: TODO: remove this, it is just for safety checking for now

1650

inserted_keys = set()

1651

reuse_this_block = reuse_blocks

1652

for record in stream:

1653

# Raise an error when a record is missing.

1654

if record.storage_kind == 'absent':

1655

raise errors.RevisionNotPresent(record.key, self)

1656

if random_id:

1657

if record.key in inserted_keys:

1658

trace.note('Insert claimed random_id=True,'

1659

' but then inserted %r two times', record.key)

1660

continue

1661

inserted_keys.add(record.key)

1662

if reuse_blocks:

1663

# If the reuse_blocks flag is set, check to see if we can just

1664

# copy a groupcompress block as-is.

1665

# We only check on the first record (groupcompress-block) not

1666

# on all of the (groupcompress-block-ref) entries.

1667

# The reuse_this_block flag is then kept for as long as

1668

if record.storage_kind == 'groupcompress-block':

1669

# Check to see if we really want to re-use this block

1670

insert_manager = record._manager

1671

reuse_this_block = insert_manager.check_is_well_utilized()

1672

else:

1673

reuse_this_block = False

1674

if reuse_this_block:

1675

# We still want to reuse this block

1676

if record.storage_kind == 'groupcompress-block':

1677

# Insert the raw block into the target repo

1678

insert_manager = record._manager

1679

bytes = record._manager._block.to_bytes()

1680

_, start, length = self._access.add_raw_records(

1681

[(None, len(bytes))], bytes)[0]

1682

del bytes

1683

block_start = start

1684

block_length = length

1685

if record.storage_kind in ('groupcompress-block',

1686

'groupcompress-block-ref'):

1687

if insert_manager is None:

1688

raise AssertionError('No insert_manager set')

1689

if insert_manager is not record._manager:

1690

raise AssertionError('insert_manager does not match'

1691

' the current record, we cannot be positive'

1692

' that the appropriate content was inserted.'

1693

)

1694

value = "%d %d %d %d" % (block_start, block_length,

1695

record._start, record._end)

1696

nodes = [(record.key, value, (record.parents,))]

1697

# TODO: Consider buffering up many nodes to be added, not

1698

# sure how much overhead this has, but we're seeing

1699

# ~23s / 120s in add_records calls

1700

self._index.add_records(nodes, random_id=random_id)

1701

continue

1702

try:

1703

bytes = record.get_bytes_as('fulltext')

1704

except errors.UnavailableRepresentation:

1705

adapter_key = record.storage_kind, 'fulltext'

1706

adapter = get_adapter(adapter_key)

1707

bytes = adapter.get_bytes(record)

1708

if len(record.key) > 1:

1709

prefix = record.key[0]

1710

soft = (prefix == last_prefix)

1711

else:

1712

prefix = None

1713

soft = False

1714

if max_fulltext_len < len(bytes):

1715

max_fulltext_len = len(bytes)

1716

max_fulltext_prefix = prefix

1717

(found_sha1, start_point, end_point,

1718

type) = self._compressor.compress(record.key,

1719

bytes, record.sha1, soft=soft,

1720

nostore_sha=nostore_sha)

1721

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1722

# Check if we want to continue to include that text

1723

if (prefix == max_fulltext_prefix

1724

and end_point < 2 * max_fulltext_len):

1725

# As long as we are on the same file_id, we will fill at least

1726

# 2 * max_fulltext_len

1727

start_new_block = False

1728

elif end_point > 4*1024*1024:

1729

start_new_block = True

1730

elif (prefix is not None and prefix != last_prefix

1731

and end_point > 2*1024*1024):

1732

start_new_block = True

1733

else:

1734

start_new_block = False

1735

last_prefix = prefix

1736

if start_new_block:

1737

self._compressor.pop_last()

1738

flush()

1739

max_fulltext_len = len(bytes)

1740

(found_sha1, start_point, end_point,

1741

type) = self._compressor.compress(record.key, bytes,

1742

record.sha1)

1743

if record.key[-1] is None:

1744

key = record.key[:-1] + ('sha1:' + found_sha1,)

1745

else:

1746

key = record.key

1747

self._unadded_refs[key] = record.parents

1748

yield found_sha1

1749

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1750

(record.parents,)))

1751

if len(keys_to_add):

1752

flush()

1753

self._compressor = None

1754

1755

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1756

"""Iterate over the lines in the versioned files from keys.

1757

1758

This may return lines from other keys. Each item the returned

1759

iterator yields is a tuple of a line and a text version that that line

1760

is present in (not introduced in).

1761

1762

Ordering of results is in whatever order is most suitable for the

1763

underlying storage format.

1764

1765

If a progress bar is supplied, it may be used to indicate progress.

1766

The caller is responsible for cleaning up progress bars (because this

1767

is an iterator).

1768

1769

NOTES:

1770

* Lines are normalised by the underlying store: they will all have \n

1771

terminators.

1772

* Lines are returned in arbitrary order.

1773

1774

:return: An iterator over (line, key).

1775

"""

1776

keys = set(keys)

1777

total = len(keys)

1778

# we don't care about inclusions, the caller cares.

1779

# but we need to setup a list of records to visit.

1780

# we need key, position, length

1781

for key_idx, record in enumerate(self.get_record_stream(keys,

1782

'unordered', True)):

1783

# XXX: todo - optimise to use less than full texts.

1784

key = record.key

1785

if pb is not None:

1786

pb.update('Walking content', key_idx, total)

1787

if record.storage_kind == 'absent':

1788

raise errors.RevisionNotPresent(key, self)

1789

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1790

for line in lines:

1791

yield line, key

1792

if pb is not None:

1793

pb.update('Walking content', total, total)

1794

1795

def keys(self):

1796

"""See VersionedFiles.keys."""

1797

if 'evil' in debug.debug_flags:

1798

trace.mutter_callsite(2, "keys scales with size of history")

1799

sources = [self._index] + self._fallback_vfs

1800

result = set()

1801

for source in sources:

1802

result.update(source.keys())

1803

return result

1804

1805

1806

class _GCGraphIndex(object):

1807

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1808

1809

def __init__(self, graph_index, is_locked, parents=True,

1810

add_callback=None, track_external_parent_refs=False,

1811

inconsistency_fatal=True):

1812

"""Construct a _GCGraphIndex on a graph_index.

1813

1814

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1815

:param is_locked: A callback, returns True if the index is locked and

1816

thus usable.

1817

:param parents: If True, record knits parents, if not do not record

1818

parents.

1819

:param add_callback: If not None, allow additions to the index and call

1820

this callback with a list of added GraphIndex nodes:

1821

[(node, value, node_refs), ...]

1822

:param track_external_parent_refs: As keys are added, keep track of the

1823

keys they reference, so that we can query get_missing_parents(),

1824

etc.

1825

:param inconsistency_fatal: When asked to add records that are already

1826

present, and the details are inconsistent with the existing

1827

record, raise an exception instead of warning (and skipping the

1828

record).

1829

"""

1830

self._add_callback = add_callback

1831

self._graph_index = graph_index

1832

self._parents = parents

1833

self.has_graph = parents

1834

self._is_locked = is_locked

1835

self._inconsistency_fatal = inconsistency_fatal

1836

if track_external_parent_refs:

1837

self._key_dependencies = knit._KeyRefs()

1838

else:

1839

self._key_dependencies = None

1840

1841

def add_records(self, records, random_id=False):

1842

"""Add multiple records to the index.

1843

1844

This function does not insert data into the Immutable GraphIndex

1845

backing the KnitGraphIndex, instead it prepares data for insertion by

1846

the caller and checks that it is safe to insert then calls

1847

self._add_callback with the prepared GraphIndex nodes.

1848

1849

:param records: a list of tuples:

1850

(key, options, access_memo, parents).

1851

:param random_id: If True the ids being added were randomly generated

1852

and no check for existence will be performed.

1853

"""

1854

if not self._add_callback:

1855

raise errors.ReadOnlyError(self)

1856

# we hope there are no repositories with inconsistent parentage

1857

# anymore.

1858

1859

changed = False

1860

keys = {}

1861

for (key, value, refs) in records:

1862

if not self._parents:

1863

if refs:

1864

for ref in refs:

1865

if ref:

1866

raise errors.KnitCorrupt(self,

1867

"attempt to add node with parents "

1868

"in parentless index.")

1869

refs = ()

1870

changed = True

1871

keys[key] = (value, refs)

1872

# check for dups

1873

if not random_id:

1874

present_nodes = self._get_entries(keys)

1875

for (index, key, value, node_refs) in present_nodes:

1876

if node_refs != keys[key][1]:

1877

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1878

if self._inconsistency_fatal:

1879

raise errors.KnitCorrupt(self, "inconsistent details"

1880

" in add_records: %s" %

1881

details)

1882

else:

1883

trace.warning("inconsistent details in skipped"

1884

" record: %s", details)

1885

del keys[key]

1886

changed = True

1887

if changed:

1888

result = []

1889

if self._parents:

1890

for key, (value, node_refs) in keys.iteritems():

1891

result.append((key, value, node_refs))

1892

else:

1893

for key, (value, node_refs) in keys.iteritems():

1894

result.append((key, value))

1895

records = result

1896

key_dependencies = self._key_dependencies

1897

if key_dependencies is not None and self._parents:

1898

for key, value, refs in records:

1899

parents = refs[0]

1900

key_dependencies.add_references(key, parents)

1901

self._add_callback(records)

1902

1903

def _check_read(self):

1904

"""Raise an exception if reads are not permitted."""

1905

if not self._is_locked():

1906

raise errors.ObjectNotLocked(self)

1907

1908

def _check_write_ok(self):

1909

"""Raise an exception if writes are not permitted."""

1910

if not self._is_locked():

1911

raise errors.ObjectNotLocked(self)

1912

1913

def _get_entries(self, keys, check_present=False):

1914

"""Get the entries for keys.

1915

1916

Note: Callers are responsible for checking that the index is locked

1917

before calling this method.

1918

1919

:param keys: An iterable of index key tuples.

1920

"""

1921

keys = set(keys)

1922

found_keys = set()

1923

if self._parents:

1924

for node in self._graph_index.iter_entries(keys):

1925

yield node

1926

found_keys.add(node[1])

1927

else:

1928

# adapt parentless index to the rest of the code.

1929

for node in self._graph_index.iter_entries(keys):

1930

yield node[0], node[1], node[2], ()

1931

found_keys.add(node[1])

1932

if check_present:

1933

missing_keys = keys.difference(found_keys)

1934

if missing_keys:

1935

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1936

1937

def find_ancestry(self, keys):

1938

"""See CombinedGraphIndex.find_ancestry"""

1939

return self._graph_index.find_ancestry(keys, 0)

1940

1941

def get_parent_map(self, keys):

1942

"""Get a map of the parents of keys.

1943

1944

:param keys: The keys to look up parents for.

1945

:return: A mapping from keys to parents. Absent keys are absent from

1946

the mapping.

1947

"""

1948

self._check_read()

1949

nodes = self._get_entries(keys)

1950

result = {}

1951

if self._parents:

1952

for node in nodes:

1953

result[node[1]] = node[3][0]

1954

else:

1955

for node in nodes:

1956

result[node[1]] = None

1957

return result

1958

1959

def get_missing_parents(self):

1960

"""Return the keys of missing parents."""

1961

# Copied from _KnitGraphIndex.get_missing_parents

1962

# We may have false positives, so filter those out.

1963

self._key_dependencies.add_keys(

1964

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1965

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1966

1967

def get_build_details(self, keys):

1968

"""Get the various build details for keys.

1969

1970

Ghosts are omitted from the result.

1971

1972

:param keys: An iterable of keys.

1973

:return: A dict of key:

1974

(index_memo, compression_parent, parents, record_details).

1975

index_memo

1976

opaque structure to pass to read_records to extract the raw

1977

data

1978

compression_parent

1979

Content that this record is built upon, may be None

1980

parents

1981

Logical parents of this node

1982

record_details

1983

extra information about the content which needs to be passed to

1984

Factory.parse_record

1985

"""

1986

self._check_read()

1987

result = {}

1988

entries = self._get_entries(keys)

1989

for entry in entries:

1990

key = entry[1]

1991

if not self._parents:

1992

parents = None

1993

else:

1994

parents = entry[3][0]

1995

method = 'group'

1996

result[key] = (self._node_to_position(entry),

1997

None, parents, (method, None))

1998

return result

1999

2000

def keys(self):

2001

"""Get all the keys in the collection.

2002

2003

The keys are not ordered.

2004

"""

2005

self._check_read()

2006

return [node[1] for node in self._graph_index.iter_all_entries()]

2007

2008

def _node_to_position(self, node):

2009

"""Convert an index value to position details."""

2010

bits = node[2].split(' ')

2011

# It would be nice not to read the entire gzip.

2012

start = int(bits[0])

2013

stop = int(bits[1])

2014

basis_end = int(bits[2])

2015

delta_end = int(bits[3])

2016

return node[0], start, stop, basis_end, delta_end

2017

2018

def scan_unvalidated_index(self, graph_index):

2019

"""Inform this _GCGraphIndex that there is an unvalidated index.

2020

2021

This allows this _GCGraphIndex to keep track of any missing

2022

compression parents we may want to have filled in to make those

2023

indices valid.

2024

2025

:param graph_index: A GraphIndex

2026

"""

2027

if self._key_dependencies is not None:

2028

# Add parent refs from graph_index (and discard parent refs that

2029

# the graph_index has).

2030

add_refs = self._key_dependencies.add_references

2031

for node in graph_index.iter_all_entries():

2032

add_refs(node[1], node[3][0])

2033

2034

2035

2036

from bzrlib._groupcompress_py import (

2037

apply_delta,

2038

apply_delta_to_source,

2039

encode_base128_int,

2040

decode_base128_int,

2041

decode_copy_instruction,

2042

LinesDeltaIndex,

2043

)

2044

try:

2045

from bzrlib._groupcompress_pyx import (

2046

apply_delta,

2047

apply_delta_to_source,

2048

DeltaIndex,

2049

encode_base128_int,

2050

decode_base128_int,

2051

)

2052

GroupCompressor = PyrexGroupCompressor

2053

except ImportError:

2054

GroupCompressor = PythonGroupCompressor

2055

Older »