/brz/remove-bazaar : revision 1551.9.14

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Aaron Bentley
Date: 2006-12-14 21:45:21 UTC
mto: (2234.6.1 bzr.0.14) (2229.2.4 reserved-ids) (2323.6.9 0.15-integration) (1551.19.24 Aaron's mergeable stuff)
mto: This revision was merged to the branch mainline in revision 2187.
Revision ID: abentley@panoramicfeedback.com-20061214214521-88qero0up00n73on

Change topic to hidden-commands

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.ui

import bzrlib.weave

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

100

# files belong together?

101

102

# TODO: accommodate binaries, perhaps by storing a byte count

103

104

# TODO: function to check whole file

105

106

# TODO: atomically append data, then measure backwards from the cursor

107

# position after writing to work out where it was located. we may need to

108

# bypass python file buffering.

109

110

DATA_SUFFIX = '.knit'

111

INDEX_SUFFIX = '.kndx'

112

113

114

class KnitContent(object):

115

"""Content of a knit version to which deltas can be applied."""

116

117

def __init__(self, lines):

118

self._lines = lines

119

120

def annotate_iter(self):

121

"""Yield tuples of (origin, text) for each content line."""

122

return iter(self._lines)

123

124

def annotate(self):

125

"""Return a list of (origin, text) tuples."""

126

return list(self.annotate_iter())

127

128

def line_delta_iter(self, new_lines):

129

"""Generate line-based delta from this content to new_lines."""

130

new_texts = new_lines.text()

131

old_texts = self.text()

132

s = KnitSequenceMatcher(None, old_texts, new_texts)

133

for tag, i1, i2, j1, j2 in s.get_opcodes():

134

if tag == 'equal':

135

continue

136

# ofrom, oto, length, data

137

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

138

139

def line_delta(self, new_lines):

140

return list(self.line_delta_iter(new_lines))

141

142

def text(self):

143

return [text for origin, text in self._lines]

144

145

def copy(self):

146

return KnitContent(self._lines[:])

147

148

149

class _KnitFactory(object):

150

"""Base factory for creating content objects."""

151

152

def make(self, lines, version):

153

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

155

156

157

class KnitAnnotateFactory(_KnitFactory):

158

"""Factory for creating annotated Content objects."""

159

160

annotated = True

161

162

def parse_fulltext(self, content, version):

163

"""Convert fulltext to internal representation

164

165

fulltext content is of the format

166

revid(utf8) plaintext\n

167

internal representation is of the format:

168

(revid, plaintext)

169

"""

170

decode_utf8 = cache_utf8.decode

171

lines = []

172

for line in content:

173

origin, text = line.split(' ', 1)

174

lines.append((decode_utf8(origin), text))

175

return KnitContent(lines)

176

177

def parse_line_delta_iter(self, lines):

178

return iter(self.parse_line_delta(lines))

179

180

def parse_line_delta(self, lines, version):

181

"""Convert a line based delta into internal representation.

182

183

line delta is in the form of:

184

intstart intend intcount

185

1..count lines:

186

revid(utf8) newline\n

187

internal representation is

188

(start, end, count, [1..count tuples (revid, newline)])

189

"""

190

decode_utf8 = cache_utf8.decode

191

result = []

192

lines = iter(lines)

193

next = lines.next

194

# walk through the lines parsing.

195

for header in lines:

196

start, end, count = [int(n) for n in header.split(',')]

197

contents = []

198

remaining = count

199

while remaining:

200

origin, text = next().split(' ', 1)

201

remaining -= 1

202

contents.append((decode_utf8(origin), text))

203

result.append((start, end, count, contents))

204

return result

205

206

def lower_fulltext(self, content):

207

"""convert a fulltext content record into a serializable form.

208

209

see parse_fulltext which this inverts.

210

"""

211

encode_utf8 = cache_utf8.encode

212

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

213

214

def lower_line_delta(self, delta):

215

"""convert a delta into a serializable form.

216

217

See parse_line_delta which this inverts.

218

"""

219

encode_utf8 = cache_utf8.encode

220

out = []

221

for start, end, c, lines in delta:

222

out.append('%d,%d,%d\n' % (start, end, c))

223

out.extend(encode_utf8(origin) + ' ' + text

224

for origin, text in lines)

225

return out

226

227

228

class KnitPlainFactory(_KnitFactory):

229

"""Factory for creating plain Content objects."""

230

231

annotated = False

232

233

def parse_fulltext(self, content, version):

234

"""This parses an unannotated fulltext.

235

236

Note that this is not a noop - the internal representation

237

has (versionid, line) - its just a constant versionid.

238

"""

239

return self.make(content, version)

240

241

def parse_line_delta_iter(self, lines, version):

242

cur = 0

243

num_lines = len(lines)

244

while cur < num_lines:

245

header = lines[cur]

246

cur += 1

247

start, end, c = [int(n) for n in header.split(',')]

248

yield start, end, c, zip([version] * c, lines[cur:cur+c])

249

cur += c

250

251

def parse_line_delta(self, lines, version):

252

return list(self.parse_line_delta_iter(lines, version))

253

254

def lower_fulltext(self, content):

255

return content.text()

256

257

def lower_line_delta(self, delta):

258

out = []

259

for start, end, c, lines in delta:

260

out.append('%d,%d,%d\n' % (start, end, c))

261

out.extend([text for origin, text in lines])

262

return out

263

264

265

def make_empty_knit(transport, relpath):

266

"""Construct a empty knit at the specified location."""

267

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

268

k._data._open_file()

269

270

271

class KnitVersionedFile(VersionedFile):

272

"""Weave-like structure with faster random access.

273

274

A knit stores a number of texts and a summary of the relationships

275

between them. Texts are identified by a string version-id. Texts

276

are normally stored and retrieved as a series of lines, but can

277

also be passed as single strings.

278

279

Lines are stored with the trailing newline (if any) included, to

280

avoid special cases for files with no final newline. Lines are

281

composed of 8-bit characters, not unicode. The combination of

282

these approaches should mean any 'binary' file can be safely

283

stored and retrieved.

284

"""

285

286

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

287

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

288

create=False, create_parent_dir=False, delay_create=False,

289

dir_mode=None):

290

"""Construct a knit at location specified by relpath.

291

292

:param create: If not True, only open an existing knit.

293

:param create_parent_dir: If True, create the parent directory if

294

creating the file fails. (This is used for stores with

295

hash-prefixes that may not exist yet)

296

:param delay_create: The calling code is aware that the knit won't

297

actually be created until the first data is stored.

298

"""

299

if deprecated_passed(basis_knit):

300

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

301

" deprecated as of bzr 0.9.",

302

DeprecationWarning, stacklevel=2)

303

if access_mode is None:

304

access_mode = 'w'

305

super(KnitVersionedFile, self).__init__(access_mode)

306

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

307

self.transport = transport

308

self.filename = relpath

309

self.factory = factory or KnitAnnotateFactory()

310

self.writable = (access_mode == 'w')

311

self.delta = delta

312

313

self._max_delta_chain = 200

314

315

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

316

access_mode, create=create, file_mode=file_mode,

317

create_parent_dir=create_parent_dir, delay_create=delay_create,

318

dir_mode=dir_mode)

319

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

320

access_mode, create=create and not len(self), file_mode=file_mode,

321

create_parent_dir=create_parent_dir, delay_create=delay_create,

322

dir_mode=dir_mode)

323

324

def __repr__(self):

325

return '%s(%s)' % (self.__class__.__name__,

326

self.transport.abspath(self.filename))

327

328

def _check_should_delta(self, first_parents):

329

"""Iterate back through the parent listing, looking for a fulltext.

330

331

This is used when we want to decide whether to add a delta or a new

332

fulltext. It searches for _max_delta_chain parents. When it finds a

333

fulltext parent, it sees if the total size of the deltas leading up to

334

it is large enough to indicate that we want a new full text anyway.

335

336

Return True if we should create a new delta, False if we should use a

337

full text.

338

"""

339

delta_size = 0

340

fulltext_size = None

341

delta_parents = first_parents

342

for count in xrange(self._max_delta_chain):

343

parent = delta_parents[0]

344

method = self._index.get_method(parent)

345

pos, size = self._index.get_position(parent)

346

if method == 'fulltext':

347

fulltext_size = size

348

break

349

delta_size += size

350

delta_parents = self._index.get_parents(parent)

351

else:

352

# We couldn't find a fulltext, so we must create a new one

353

return False

354

355

return fulltext_size > delta_size

356

357

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

358

"""See VersionedFile._add_delta()."""

359

self._check_add(version_id, []) # should we check the lines ?

360

self._check_versions_present(parents)

361

present_parents = []

362

ghosts = []

363

parent_texts = {}

364

for parent in parents:

365

if not self.has_version(parent):

366

ghosts.append(parent)

367

else:

368

present_parents.append(parent)

369

370

if delta_parent is None:

371

# reconstitute as full text.

372

assert len(delta) == 1 or len(delta) == 0

373

if len(delta):

374

assert delta[0][0] == 0

375

assert delta[0][1] == 0, delta[0][1]

376

return super(KnitVersionedFile, self)._add_delta(version_id,

377

parents,

378

delta_parent,

379

sha1,

380

noeol,

381

delta)

382

383

digest = sha1

384

385

options = []

386

if noeol:

387

options.append('no-eol')

388

389

if delta_parent is not None:

390

# determine the current delta chain length.

391

# To speed the extract of texts the delta chain is limited

392

# to a fixed number of deltas. This should minimize both

393

# I/O and the time spend applying deltas.

394

# The window was changed to a maximum of 200 deltas, but also added

395

# was a check that the total compressed size of the deltas is

396

# smaller than the compressed size of the fulltext.

397

if not self._check_should_delta([delta_parent]):

398

# We don't want a delta here, just do a normal insertion.

399

return super(KnitVersionedFile, self)._add_delta(version_id,

400

parents,

401

delta_parent,

402

sha1,

403

noeol,

404

delta)

405

406

options.append('line-delta')

407

store_lines = self.factory.lower_line_delta(delta)

408

409

where, size = self._data.add_record(version_id, digest, store_lines)

410

self._index.add_version(version_id, options, where, size, parents)

411

412

def _add_raw_records(self, records, data):

413

"""Add all the records 'records' with data pre-joined in 'data'.

414

415

:param records: A list of tuples(version_id, options, parents, size).

416

:param data: The data for the records. When it is written, the records

417

are adjusted to have pos pointing into data by the sum of

418

the preceding records sizes.

419

"""

420

# write all the data

421

pos = self._data.add_raw_record(data)

422

offset = 0

423

index_entries = []

424

for (version_id, options, parents, size) in records:

425

index_entries.append((version_id, options, pos+offset,

426

size, parents))

427

if self._data._do_cache:

428

self._data._cache[version_id] = data[offset:offset+size]

429

offset += size

430

self._index.add_versions(index_entries)

431

432

def enable_cache(self):

433

"""Start caching data for this knit"""

434

self._data.enable_cache()

435

436

def clear_cache(self):

437

"""Clear the data cache only."""

438

self._data.clear_cache()

439

440

def copy_to(self, name, transport):

441

"""See VersionedFile.copy_to()."""

442

# copy the current index to a temp index to avoid racing with local

443

# writes

444

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

445

self.transport.get(self._index._filename))

446

# copy the data file

447

f = self._data._open_file()

448

try:

449

transport.put_file(name + DATA_SUFFIX, f)

450

finally:

451

f.close()

452

# move the copied index into place

453

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

454

455

def create_empty(self, name, transport, mode=None):

456

return KnitVersionedFile(name, transport, factory=self.factory,

457

delta=self.delta, create=True)

458

459

def _fix_parents(self, version, new_parents):

460

"""Fix the parents list for version.

461

462

This is done by appending a new version to the index

463

with identical data except for the parents list.

464

the parents list must be a superset of the current

465

list.

466

"""

467

current_values = self._index._cache[version]

468

assert set(current_values[4]).difference(set(new_parents)) == set()

469

self._index.add_version(version,

470

current_values[1],

471

current_values[2],

472

current_values[3],

473

new_parents)

474

475

def get_delta(self, version_id):

476

"""Get a delta for constructing version from some other version."""

477

if not self.has_version(version_id):

478

raise RevisionNotPresent(version_id, self.filename)

479

480

parents = self.get_parents(version_id)

481

if len(parents):

482

parent = parents[0]

483

else:

484

parent = None

485

data_pos, data_size = self._index.get_position(version_id)

486

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

487

version_idx = self._index.lookup(version_id)

488

noeol = 'no-eol' in self._index.get_options(version_id)

489

if 'fulltext' == self._index.get_method(version_id):

490

new_content = self.factory.parse_fulltext(data, version_idx)

491

if parent is not None:

492

reference_content = self._get_content(parent)

493

old_texts = reference_content.text()

494

else:

495

old_texts = []

496

new_texts = new_content.text()

497

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

498

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

499

else:

500

delta = self.factory.parse_line_delta(data, version_idx)

501

return parent, sha1, noeol, delta

502

503

def get_graph_with_ghosts(self):

504

"""See VersionedFile.get_graph_with_ghosts()."""

505

graph_items = self._index.get_graph()

506

return dict(graph_items)

507

508

def get_sha1(self, version_id):

509

"""See VersionedFile.get_sha1()."""

510

record_map = self._get_record_map([version_id])

511

method, content, digest, next = record_map[version_id]

512

return digest

513

514

@staticmethod

515

def get_suffixes():

516

"""See VersionedFile.get_suffixes()."""

517

return [DATA_SUFFIX, INDEX_SUFFIX]

518

519

def has_ghost(self, version_id):

520

"""True if there is a ghost reference in the file to version_id."""

521

# maybe we have it

522

if self.has_version(version_id):

523

return False

524

# optimisable if needed by memoising the _ghosts set.

525

items = self._index.get_graph()

526

for node, parents in items:

527

for parent in parents:

528

if parent not in self._index._cache:

529

if parent == version_id:

530

return True

531

return False

532

533

def versions(self):

534

"""See VersionedFile.versions."""

535

return self._index.get_versions()

536

537

def has_version(self, version_id):

538

"""See VersionedFile.has_version."""

539

return self._index.has_version(version_id)

540

541

__contains__ = has_version

542

543

def _merge_annotations(self, content, parents, parent_texts={},

544

delta=None, annotated=None):

545

"""Merge annotations for content. This is done by comparing

546

the annotations based on changed to the text.

547

"""

548

if annotated:

549

delta_seq = None

550

for parent_id in parents:

551

merge_content = self._get_content(parent_id, parent_texts)

552

seq = patiencediff.PatienceSequenceMatcher(

553

None, merge_content.text(), content.text())

554

if delta_seq is None:

555

# setup a delta seq to reuse.

556

delta_seq = seq

557

for i, j, n in seq.get_matching_blocks():

558

if n == 0:

559

continue

560

# this appears to copy (origin, text) pairs across to the new

561

# content for any line that matches the last-checked parent.

562

# FIXME: save the sequence control data for delta compression

563

# against the most relevant parent rather than rediffing.

564

content._lines[j:j+n] = merge_content._lines[i:i+n]

565

if delta:

566

if not annotated:

567

reference_content = self._get_content(parents[0], parent_texts)

568

new_texts = content.text()

569

old_texts = reference_content.text()

570

delta_seq = patiencediff.PatienceSequenceMatcher(

571

None, old_texts, new_texts)

572

return self._make_line_delta(delta_seq, content)

573

574

def _make_line_delta(self, delta_seq, new_content):

575

"""Generate a line delta from delta_seq and new_content."""

576

diff_hunks = []

577

for op in delta_seq.get_opcodes():

578

if op[0] == 'equal':

579

continue

580

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

581

return diff_hunks

582

583

def _get_components_positions(self, version_ids):

584

"""Produce a map of position data for the components of versions.

585

586

This data is intended to be used for retrieving the knit records.

587

588

A dict of version_id to (method, data_pos, data_size, next) is

589

returned.

590

method is the way referenced data should be applied.

591

data_pos is the position of the data in the knit.

592

data_size is the size of the data in the knit.

593

next is the build-parent of the version, or None for fulltexts.

594

"""

595

component_data = {}

596

for version_id in version_ids:

597

cursor = version_id

598

599

while cursor is not None and cursor not in component_data:

600

method = self._index.get_method(cursor)

601

if method == 'fulltext':

602

next = None

603

else:

604

next = self.get_parents(cursor)[0]

605

data_pos, data_size = self._index.get_position(cursor)

606

component_data[cursor] = (method, data_pos, data_size, next)

607

cursor = next

608

return component_data

609

610

def _get_content(self, version_id, parent_texts={}):

611

"""Returns a content object that makes up the specified

612

version."""

613

if not self.has_version(version_id):

614

raise RevisionNotPresent(version_id, self.filename)

615

616

cached_version = parent_texts.get(version_id, None)

617

if cached_version is not None:

618

return cached_version

619

620

text_map, contents_map = self._get_content_maps([version_id])

621

return contents_map[version_id]

622

623

def _check_versions_present(self, version_ids):

624

"""Check that all specified versions are present."""

625

version_ids = set(version_ids)

626

for r in list(version_ids):

627

if self._index.has_version(r):

628

version_ids.remove(r)

629

if version_ids:

630

raise RevisionNotPresent(list(version_ids)[0], self.filename)

631

632

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

633

"""See VersionedFile.add_lines_with_ghosts()."""

634

self._check_add(version_id, lines)

635

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

636

637

def _add_lines(self, version_id, parents, lines, parent_texts):

638

"""See VersionedFile.add_lines."""

639

self._check_add(version_id, lines)

640

self._check_versions_present(parents)

641

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

642

643

def _check_add(self, version_id, lines):

644

"""check that version_id and lines are safe to add."""

645

assert self.writable, "knit is not opened for write"

646

### FIXME escape. RBC 20060228

647

if contains_whitespace(version_id):

648

raise InvalidRevisionId(version_id, self.filename)

649

if self.has_version(version_id):

650

raise RevisionAlreadyPresent(version_id, self.filename)

651

self._check_lines_not_unicode(lines)

652

self._check_lines_are_lines(lines)

653

654

def _add(self, version_id, lines, parents, delta, parent_texts):

655

"""Add a set of lines on top of version specified by parents.

656

657

If delta is true, compress the text as a line-delta against

658

the first parent.

659

660

Any versions not present will be converted into ghosts.

661

"""

662

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

663

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

664

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

665

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

666

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

667

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

668

# +1383 0 8.0370 8.0370 +<len>

669

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

670

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

671

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

672

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

673

674

present_parents = []

675

ghosts = []

676

if parent_texts is None:

677

parent_texts = {}

678

for parent in parents:

679

if not self.has_version(parent):

680

ghosts.append(parent)

681

else:

682

present_parents.append(parent)

683

684

if delta and not len(present_parents):

685

delta = False

686

687

digest = sha_strings(lines)

688

options = []

689

if lines:

690

if lines[-1][-1] != '\n':

691

options.append('no-eol')

692

lines[-1] = lines[-1] + '\n'

693

694

if len(present_parents) and delta:

695

# To speed the extract of texts the delta chain is limited

696

# to a fixed number of deltas. This should minimize both

697

# I/O and the time spend applying deltas.

698

delta = self._check_should_delta(present_parents)

699

700

lines = self.factory.make(lines, version_id)

701

if delta or (self.factory.annotated and len(present_parents) > 0):

702

# Merge annotations from parent texts if so is needed.

703

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

704

delta, self.factory.annotated)

705

706

if delta:

707

options.append('line-delta')

708

store_lines = self.factory.lower_line_delta(delta_hunks)

709

else:

710

options.append('fulltext')

711

store_lines = self.factory.lower_fulltext(lines)

712

713

where, size = self._data.add_record(version_id, digest, store_lines)

714

self._index.add_version(version_id, options, where, size, parents)

715

return lines

716

717

def check(self, progress_bar=None):

718

"""See VersionedFile.check()."""

719

720

def _clone_text(self, new_version_id, old_version_id, parents):

721

"""See VersionedFile.clone_text()."""

722

# FIXME RBC 20060228 make fast by only inserting an index with null

723

# delta.

724

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

725

726

def get_lines(self, version_id):

727

"""See VersionedFile.get_lines()."""

728

return self.get_line_list([version_id])[0]

729

730

def _get_record_map(self, version_ids):

731

"""Produce a dictionary of knit records.

732

733

The keys are version_ids, the values are tuples of (method, content,

734

digest, next).

735

method is the way the content should be applied.

736

content is a KnitContent object.

737

digest is the SHA1 digest of this version id after all steps are done

738

next is the build-parent of the version, i.e. the leftmost ancestor.

739

If the method is fulltext, next will be None.

740

"""

741

position_map = self._get_components_positions(version_ids)

742

# c = component_id, m = method, p = position, s = size, n = next

743

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

744

record_map = {}

745

for component_id, content, digest in \

746

self._data.read_records_iter(records):

747

method, position, size, next = position_map[component_id]

748

record_map[component_id] = method, content, digest, next

749

750

return record_map

751

752

def get_text(self, version_id):

753

"""See VersionedFile.get_text"""

754

return self.get_texts([version_id])[0]

755

756

def get_texts(self, version_ids):

757

return [''.join(l) for l in self.get_line_list(version_ids)]

758

759

def get_line_list(self, version_ids):

760

"""Return the texts of listed versions as a list of strings."""

761

text_map, content_map = self._get_content_maps(version_ids)

762

return [text_map[v] for v in version_ids]

763

764

def _get_content_maps(self, version_ids):

765

"""Produce maps of text and KnitContents

766

767

:return: (text_map, content_map) where text_map contains the texts for

768

the requested versions and content_map contains the KnitContents.

769

Both dicts take version_ids as their keys.

770

"""

771

for version_id in version_ids:

772

if not self.has_version(version_id):

773

raise RevisionNotPresent(version_id, self.filename)

774

record_map = self._get_record_map(version_ids)

775

776

text_map = {}

777

content_map = {}

778

final_content = {}

779

for version_id in version_ids:

780

components = []

781

cursor = version_id

782

while cursor is not None:

783

method, data, digest, next = record_map[cursor]

784

components.append((cursor, method, data, digest))

785

if cursor in content_map:

786

break

787

cursor = next

788

789

content = None

790

for component_id, method, data, digest in reversed(components):

791

if component_id in content_map:

792

content = content_map[component_id]

793

else:

794

version_idx = self._index.lookup(component_id)

795

if method == 'fulltext':

796

assert content is None

797

content = self.factory.parse_fulltext(data, version_idx)

798

elif method == 'line-delta':

799

delta = self.factory.parse_line_delta(data, version_idx)

800

content = content.copy()

801

content._lines = self._apply_delta(content._lines,

802

delta)

803

content_map[component_id] = content

804

805

if 'no-eol' in self._index.get_options(version_id):

806

content = content.copy()

807

line = content._lines[-1][1].rstrip('\n')

808

content._lines[-1] = (content._lines[-1][0], line)

809

final_content[version_id] = content

810

811

# digest here is the digest from the last applied component.

812

text = content.text()

813

if sha_strings(text) != digest:

814

raise KnitCorrupt(self.filename,

815

'sha-1 does not match %s' % version_id)

816

817

text_map[version_id] = text

818

return text_map, final_content

819

820

def iter_lines_added_or_present_in_versions(self, version_ids=None,

821

pb=None):

822

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

823

if version_ids is None:

824

version_ids = self.versions()

825

if pb is None:

826

pb = progress.DummyProgress()

827

# we don't care about inclusions, the caller cares.

828

# but we need to setup a list of records to visit.

829

# we need version_id, position, length

830

version_id_records = []

831

requested_versions = set(version_ids)

832

# filter for available versions

833

for version_id in requested_versions:

834

if not self.has_version(version_id):

835

raise RevisionNotPresent(version_id, self.filename)

836

# get a in-component-order queue:

837

for version_id in self.versions():

838

if version_id in requested_versions:

839

data_pos, length = self._index.get_position(version_id)

840

version_id_records.append((version_id, data_pos, length))

841

842

total = len(version_id_records)

843

for version_idx, (version_id, data, sha_value) in \

844

enumerate(self._data.read_records_iter(version_id_records)):

845

pb.update('Walking content.', version_idx, total)

846

method = self._index.get_method(version_id)

847

version_idx = self._index.lookup(version_id)

848

assert method in ('fulltext', 'line-delta')

849

if method == 'fulltext':

850

content = self.factory.parse_fulltext(data, version_idx)

851

for line in content.text():

852

yield line

853

else:

854

delta = self.factory.parse_line_delta(data, version_idx)

855

for start, end, count, lines in delta:

856

for origin, line in lines:

857

yield line

858

pb.update('Walking content.', total, total)

859

860

def num_versions(self):

861

"""See VersionedFile.num_versions()."""

862

return self._index.num_versions()

863

864

__len__ = num_versions

865

866

def annotate_iter(self, version_id):

867

"""See VersionedFile.annotate_iter."""

868

content = self._get_content(version_id)

869

for origin, text in content.annotate_iter():

870

yield origin, text

871

872

def get_parents(self, version_id):

873

"""See VersionedFile.get_parents."""

874

# perf notes:

875

# optimism counts!

876

# 52554 calls in 1264 872 internal down from 3674

877

try:

878

return self._index.get_parents(version_id)

879

except KeyError:

880

raise RevisionNotPresent(version_id, self.filename)

881

882

def get_parents_with_ghosts(self, version_id):

883

"""See VersionedFile.get_parents."""

884

try:

885

return self._index.get_parents_with_ghosts(version_id)

886

except KeyError:

887

raise RevisionNotPresent(version_id, self.filename)

888

889

def get_ancestry(self, versions):

890

"""See VersionedFile.get_ancestry."""

891

if isinstance(versions, basestring):

892

versions = [versions]

893

if not versions:

894

return []

895

self._check_versions_present(versions)

896

return self._index.get_ancestry(versions)

897

898

def get_ancestry_with_ghosts(self, versions):

899

"""See VersionedFile.get_ancestry_with_ghosts."""

900

if isinstance(versions, basestring):

901

versions = [versions]

902

if not versions:

903

return []

904

self._check_versions_present(versions)

905

return self._index.get_ancestry_with_ghosts(versions)

906

907

#@deprecated_method(zero_eight)

908

def walk(self, version_ids):

909

"""See VersionedFile.walk."""

910

# We take the short path here, and extract all relevant texts

911

# and put them in a weave and let that do all the work. Far

912

# from optimal, but is much simpler.

913

# FIXME RB 20060228 this really is inefficient!

914

from bzrlib.weave import Weave

915

916

w = Weave(self.filename)

917

ancestry = self.get_ancestry(version_ids)

918

sorted_graph = topo_sort(self._index.get_graph())

919

version_list = [vid for vid in sorted_graph if vid in ancestry]

920

921

for version_id in version_list:

922

lines = self.get_lines(version_id)

923

w.add_lines(version_id, self.get_parents(version_id), lines)

924

925

for lineno, insert_id, dset, line in w.walk(version_ids):

926

yield lineno, insert_id, dset, line

927

928

def plan_merge(self, ver_a, ver_b):

929

"""See VersionedFile.plan_merge."""

930

ancestors_b = set(self.get_ancestry(ver_b))

931

def status_a(revision, text):

932

if revision in ancestors_b:

933

return 'killed-b', text

934

else:

935

return 'new-a', text

936

937

ancestors_a = set(self.get_ancestry(ver_a))

938

def status_b(revision, text):

939

if revision in ancestors_a:

940

return 'killed-a', text

941

else:

942

return 'new-b', text

943

944

annotated_a = self.annotate(ver_a)

945

annotated_b = self.annotate(ver_b)

946

plain_a = [t for (a, t) in annotated_a]

947

plain_b = [t for (a, t) in annotated_b]

948

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

949

a_cur = 0

950

b_cur = 0

951

for ai, bi, l in blocks:

952

# process all mismatched sections

953

# (last mismatched section is handled because blocks always

954

# includes a 0-length last block)

955

for revision, text in annotated_a[a_cur:ai]:

956

yield status_a(revision, text)

957

for revision, text in annotated_b[b_cur:bi]:

958

yield status_b(revision, text)

959

960

# and now the matched section

961

a_cur = ai + l

962

b_cur = bi + l

963

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

964

assert text_a == text_b

965

yield "unchanged", text_a

966

967

968

class _KnitComponentFile(object):

969

"""One of the files used to implement a knit database"""

970

971

def __init__(self, transport, filename, mode, file_mode=None,

972

create_parent_dir=False, dir_mode=None):

973

self._transport = transport

974

self._filename = filename

975

self._mode = mode

976

self._file_mode = file_mode

977

self._dir_mode = dir_mode

978

self._create_parent_dir = create_parent_dir

979

self._need_to_create = False

980

981

def check_header(self, fp):

982

line = fp.readline()

983

if line == '':

984

# An empty file can actually be treated as though the file doesn't

985

# exist yet.

986

raise errors.NoSuchFile(self._transport.base + self._filename)

987

if line != self.HEADER:

988

raise KnitHeaderError(badline=line,

989

filename=self._transport.abspath(self._filename))

990

991

def commit(self):

992

"""Commit is a nop."""

993

994

def __repr__(self):

995

return '%s(%s)' % (self.__class__.__name__, self._filename)

996

997

998

class _KnitIndex(_KnitComponentFile):

999

"""Manages knit index file.

1000

1001

The index is already kept in memory and read on startup, to enable

1002

fast lookups of revision information. The cursor of the index

1003

file is always pointing to the end, making it easy to append

1004

entries.

1005

1006

_cache is a cache for fast mapping from version id to a Index

1007

object.

1008

1009

_history is a cache for fast mapping from indexes to version ids.

1010

1011

The index data format is dictionary compressed when it comes to

1012

parent references; a index entry may only have parents that with a

1013

lover index number. As a result, the index is topological sorted.

1014

1015

Duplicate entries may be written to the index for a single version id

1016

if this is done then the latter one completely replaces the former:

1017

this allows updates to correct version and parent information.

1018

Note that the two entries may share the delta, and that successive

1019

annotations and references MUST point to the first entry.

1020

1021

The index file on disc contains a header, followed by one line per knit

1022

record. The same revision can be present in an index file more than once.

1023

The first occurrence gets assigned a sequence number starting from 0.

1024

1025

The format of a single line is

1026

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1027

REVISION_ID is a utf8-encoded revision id

1028

FLAGS is a comma separated list of flags about the record. Values include

1029

no-eol, line-delta, fulltext.

1030

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1031

that the the compressed data starts at.

1032

LENGTH is the ascii representation of the length of the data file.

1033

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1034

REVISION_ID.

1035

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1036

revision id already in the knit that is a parent of REVISION_ID.

1037

The ' :' marker is the end of record marker.

1038

1039

partial writes:

1040

when a write is interrupted to the index file, it will result in a line that

1041

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1042

the end of the file, then the record that is missing it will be ignored by

1043

the parser.

1044

1045

When writing new records to the index file, the data is preceded by '\n'

1046

to ensure that records always start on new lines even if the last write was

1047

interrupted. As a result its normal for the last line in the index to be

1048

missing a trailing newline. One can be added with no harmful effects.

1049

"""

1050

1051

HEADER = "# bzr knit index 8\n"

1052

1053

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1054

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1055

1056

def _cache_version(self, version_id, options, pos, size, parents):

1057

"""Cache a version record in the history array and index cache.

1058

1059

This is inlined into __init__ for performance. KEEP IN SYNC.

1060

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1061

indexes).

1062

"""

1063

# only want the _history index to reference the 1st index entry

1064

# for version_id

1065

if version_id not in self._cache:

1066

index = len(self._history)

1067

self._history.append(version_id)

1068

else:

1069

index = self._cache[version_id][5]

1070

self._cache[version_id] = (version_id,

1071

options,

1072

pos,

1073

size,

1074

parents,

1075

index)

1076

1077

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1078

create_parent_dir=False, delay_create=False, dir_mode=None):

1079

_KnitComponentFile.__init__(self, transport, filename, mode,

1080

file_mode=file_mode,

1081

create_parent_dir=create_parent_dir,

1082

dir_mode=dir_mode)

1083

self._cache = {}

1084

# position in _history is the 'official' index for a revision

1085

# but the values may have come from a newer entry.

1086

# so - wc -l of a knit index is != the number of unique names

1087

# in the knit.

1088

self._history = []

1089

decode_utf8 = cache_utf8.decode

1090

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1091

try:

1092

count = 0

1093

total = 1

1094

try:

1095

pb.update('read knit index', count, total)

1096

fp = self._transport.get(self._filename)

1097

try:

1098

self.check_header(fp)

1099

# readlines reads the whole file at once:

1100

# bad for transports like http, good for local disk

1101

# we save 60 ms doing this one change (

1102

# from calling readline each time to calling

1103

# readlines once.

1104

# probably what we want for nice behaviour on

1105

# http is a incremental readlines that yields, or

1106

# a check for local vs non local indexes,

1107

for l in fp.readlines():

1108

rec = l.split()

1109

if len(rec) < 5 or rec[-1] != ':':

1110

# corrupt line.

1111

# FIXME: in the future we should determine if its a

1112

# short write - and ignore it

1113

# or a different failure, and raise. RBC 20060407

1114

continue

1115

count += 1

1116

total += 1

1117

#pb.update('read knit index', count, total)

1118

# See self._parse_parents

1119

parents = []

1120

for value in rec[4:-1]:

1121

if '.' == value[0]:

1122

# uncompressed reference

1123

parents.append(decode_utf8(value[1:]))

1124

else:

1125

# this is 15/4000ms faster than isinstance,

1126

# (in lsprof)

1127

# this function is called thousands of times a

1128

# second so small variations add up.

1129

assert value.__class__ is str

1130

parents.append(self._history[int(value)])

1131

# end self._parse_parents

1132

# self._cache_version(decode_utf8(rec[0]),

1133

# rec[1].split(','),

1134

# int(rec[2]),

1135

# int(rec[3]),

1136

# parents)

1137

# --- self._cache_version

1138

# only want the _history index to reference the 1st

1139

# index entry for version_id

1140

version_id = decode_utf8(rec[0])

1141

if version_id not in self._cache:

1142

index = len(self._history)

1143

self._history.append(version_id)

1144

else:

1145

index = self._cache[version_id][5]

1146

self._cache[version_id] = (version_id,

1147

rec[1].split(','),

1148

int(rec[2]),

1149

int(rec[3]),

1150

parents,

1151

index)

1152

# --- self._cache_version

1153

finally:

1154

fp.close()

1155

except NoSuchFile, e:

1156

if mode != 'w' or not create:

1157

raise

1158

if delay_create:

1159

self._need_to_create = True

1160

else:

1161

self._transport.put_bytes_non_atomic(self._filename,

1162

self.HEADER, mode=self._file_mode)

1163

1164

finally:

1165

pb.update('read knit index', total, total)

1166

pb.finished()

1167

1168

def _parse_parents(self, compressed_parents):

1169

"""convert a list of string parent values into version ids.

1170

1171

ints are looked up in the index.

1172

.FOO values are ghosts and converted in to FOO.

1173

1174

NOTE: the function is retained here for clarity, and for possible

1175

use in partial index reads. However bulk processing now has

1176

it inlined in __init__ for inner-loop optimisation.

1177

"""

1178

result = []

1179

for value in compressed_parents:

1180

if value[-1] == '.':

1181

# uncompressed reference

1182

result.append(cache_utf8.decode_utf8(value[1:]))

1183

else:

1184

# this is 15/4000ms faster than isinstance,

1185

# this function is called thousands of times a

1186

# second so small variations add up.

1187

assert value.__class__ is str

1188

result.append(self._history[int(value)])

1189

return result

1190

1191

def get_graph(self):

1192

graph = []

1193

for version_id, index in self._cache.iteritems():

1194

graph.append((version_id, index[4]))

1195

return graph

1196

1197

def get_ancestry(self, versions):

1198

"""See VersionedFile.get_ancestry."""

1199

# get a graph of all the mentioned versions:

1200

graph = {}

1201

pending = set(versions)

1202

while len(pending):

1203

version = pending.pop()

1204

parents = self._cache[version][4]

1205

# got the parents ok

1206

# trim ghosts

1207

parents = [parent for parent in parents if parent in self._cache]

1208

for parent in parents:

1209

# if not completed and not a ghost

1210

if parent not in graph:

1211

pending.add(parent)

1212

graph[version] = parents

1213

return topo_sort(graph.items())

1214

1215

def get_ancestry_with_ghosts(self, versions):

1216

"""See VersionedFile.get_ancestry_with_ghosts."""

1217

# get a graph of all the mentioned versions:

1218

graph = {}

1219

pending = set(versions)

1220

while len(pending):

1221

version = pending.pop()

1222

try:

1223

parents = self._cache[version][4]

1224

except KeyError:

1225

# ghost, fake it

1226

graph[version] = []

1227

pass

1228

else:

1229

# got the parents ok

1230

for parent in parents:

1231

if parent not in graph:

1232

pending.add(parent)

1233

graph[version] = parents

1234

return topo_sort(graph.items())

1235

1236

def num_versions(self):

1237

return len(self._history)

1238

1239

__len__ = num_versions

1240

1241

def get_versions(self):

1242

return self._history

1243

1244

def idx_to_name(self, idx):

1245

return self._history[idx]

1246

1247

def lookup(self, version_id):

1248

assert version_id in self._cache

1249

return self._cache[version_id][5]

1250

1251

def _version_list_to_index(self, versions):

1252

encode_utf8 = cache_utf8.encode

1253

result_list = []

1254

for version in versions:

1255

if version in self._cache:

1256

# -- inlined lookup() --

1257

result_list.append(str(self._cache[version][5]))

1258

# -- end lookup () --

1259

else:

1260

result_list.append('.' + encode_utf8(version))

1261

return ' '.join(result_list)

1262

1263

def add_version(self, version_id, options, pos, size, parents):

1264

"""Add a version record to the index."""

1265

self.add_versions(((version_id, options, pos, size, parents),))

1266

1267

def add_versions(self, versions):

1268

"""Add multiple versions to the index.

1269

1270

:param versions: a list of tuples:

1271

(version_id, options, pos, size, parents).

1272

"""

1273

lines = []

1274

encode_utf8 = cache_utf8.encode

1275

orig_history = self._history[:]

1276

orig_cache = self._cache.copy()

1277

1278

try:

1279

for version_id, options, pos, size, parents in versions:

1280

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1281

','.join(options),

1282

pos,

1283

size,

1284

self._version_list_to_index(parents))

1285

assert isinstance(line, str), \

1286

'content must be utf-8 encoded: %r' % (line,)

1287

lines.append(line)

1288

self._cache_version(version_id, options, pos, size, parents)

1289

if not self._need_to_create:

1290

self._transport.append_bytes(self._filename, ''.join(lines))

1291

else:

1292

sio = StringIO()

1293

sio.write(self.HEADER)

1294

sio.writelines(lines)

1295

sio.seek(0)

1296

self._transport.put_file_non_atomic(self._filename, sio,

1297

create_parent_dir=self._create_parent_dir,

1298

mode=self._file_mode,

1299

dir_mode=self._dir_mode)

1300

self._need_to_create = False

1301

except:

1302

# If any problems happen, restore the original values and re-raise

1303

self._history = orig_history

1304

self._cache = orig_cache

1305

raise

1306

1307

def has_version(self, version_id):

1308

"""True if the version is in the index."""

1309

return (version_id in self._cache)

1310

1311

def get_position(self, version_id):

1312

"""Return data position and size of specified version."""

1313

return (self._cache[version_id][2], \

1314

self._cache[version_id][3])

1315

1316

def get_method(self, version_id):

1317

"""Return compression method of specified version."""

1318

options = self._cache[version_id][1]

1319

if 'fulltext' in options:

1320

return 'fulltext'

1321

else:

1322

assert 'line-delta' in options

1323

return 'line-delta'

1324

1325

def get_options(self, version_id):

1326

return self._cache[version_id][1]

1327

1328

def get_parents(self, version_id):

1329

"""Return parents of specified version ignoring ghosts."""

1330

return [parent for parent in self._cache[version_id][4]

1331

if parent in self._cache]

1332

1333

def get_parents_with_ghosts(self, version_id):

1334

"""Return parents of specified version with ghosts."""

1335

return self._cache[version_id][4]

1336

1337

def check_versions_present(self, version_ids):

1338

"""Check that all specified versions are present."""

1339

version_ids = set(version_ids)

1340

for version_id in list(version_ids):

1341

if version_id in self._cache:

1342

version_ids.remove(version_id)

1343

if version_ids:

1344

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1345

1346

1347

class _KnitData(_KnitComponentFile):

1348

"""Contents of the knit data file"""

1349

1350

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1351

create_parent_dir=False, delay_create=False,

1352

dir_mode=None):

1353

_KnitComponentFile.__init__(self, transport, filename, mode,

1354

file_mode=file_mode,

1355

create_parent_dir=create_parent_dir,

1356

dir_mode=dir_mode)

1357

self._checked = False

1358

# TODO: jam 20060713 conceptually, this could spill to disk

1359

# if the cached size gets larger than a certain amount

1360

# but it complicates the model a bit, so for now just use

1361

# a simple dictionary

1362

self._cache = {}

1363

self._do_cache = False

1364

if create:

1365

if delay_create:

1366

self._need_to_create = create

1367

else:

1368

self._transport.put_bytes_non_atomic(self._filename, '',

1369

mode=self._file_mode)

1370

1371

def enable_cache(self):

1372

"""Enable caching of reads."""

1373

self._do_cache = True

1374

1375

def clear_cache(self):

1376

"""Clear the record cache."""

1377

self._do_cache = False

1378

self._cache = {}

1379

1380

def _open_file(self):

1381

try:

1382

return self._transport.get(self._filename)

1383

except NoSuchFile:

1384

pass

1385

return None

1386

1387

def _record_to_data(self, version_id, digest, lines):

1388

"""Convert version_id, digest, lines into a raw data block.

1389

1390

:return: (len, a StringIO instance with the raw data ready to read.)

1391

"""

1392

sio = StringIO()

1393

data_file = GzipFile(None, mode='wb', fileobj=sio)

1394

1395

version_id_utf8 = cache_utf8.encode(version_id)

1396

data_file.writelines(chain(

1397

["version %s %d %s\n" % (version_id_utf8,

1398

len(lines),

1399

digest)],

1400

lines,

1401

["end %s\n" % version_id_utf8]))

1402

data_file.close()

1403

length= sio.tell()

1404

1405

sio.seek(0)

1406

return length, sio

1407

1408

def add_raw_record(self, raw_data):

1409

"""Append a prepared record to the data file.

1410

1411

:return: the offset in the data file raw_data was written.

1412

"""

1413

assert isinstance(raw_data, str), 'data must be plain bytes'

1414

if not self._need_to_create:

1415

return self._transport.append_bytes(self._filename, raw_data)

1416

else:

1417

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1418

create_parent_dir=self._create_parent_dir,

1419

mode=self._file_mode,

1420

dir_mode=self._dir_mode)

1421

self._need_to_create = False

1422

return 0

1423

1424

def add_record(self, version_id, digest, lines):

1425

"""Write new text record to disk. Returns the position in the

1426

file where it was written."""

1427

size, sio = self._record_to_data(version_id, digest, lines)

1428

# write to disk

1429

if not self._need_to_create:

1430

start_pos = self._transport.append_file(self._filename, sio)

1431

else:

1432

self._transport.put_file_non_atomic(self._filename, sio,

1433

create_parent_dir=self._create_parent_dir,

1434

mode=self._file_mode,

1435

dir_mode=self._dir_mode)

1436

self._need_to_create = False

1437

start_pos = 0

1438

if self._do_cache:

1439

self._cache[version_id] = sio.getvalue()

1440

return start_pos, size

1441

1442

def _parse_record_header(self, version_id, raw_data):

1443

"""Parse a record header for consistency.

1444

1445

:return: the header and the decompressor stream.

1446

as (stream, header_record)

1447

"""

1448

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1449

rec = df.readline().split()

1450

if len(rec) != 4:

1451

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1452

if cache_utf8.decode(rec[1]) != version_id:

1453

raise KnitCorrupt(self._filename,

1454

'unexpected version, wanted %r, got %r' % (

1455

version_id, rec[1]))

1456

return df, rec

1457

1458

def _parse_record(self, version_id, data):

1459

# profiling notes:

1460

# 4168 calls in 2880 217 internal

1461

# 4168 calls to _parse_record_header in 2121

1462

# 4168 calls to readlines in 330

1463

df, rec = self._parse_record_header(version_id, data)

1464

record_contents = df.readlines()

1465

l = record_contents.pop()

1466

assert len(record_contents) == int(rec[2])

1467

if l != 'end %s\n' % cache_utf8.encode(version_id):

1468

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1469

% (l, version_id))

1470

df.close()

1471

return record_contents, rec[3]

1472

1473

def read_records_iter_raw(self, records):

1474

"""Read text records from data file and yield raw data.

1475

1476

This unpacks enough of the text record to validate the id is

1477

as expected but thats all.

1478

"""

1479

# setup an iterator of the external records:

1480

# uses readv so nice and fast we hope.

1481

if len(records):

1482

# grab the disk data needed.

1483

if self._cache:

1484

# Don't check _cache if it is empty

1485

needed_offsets = [(pos, size) for version_id, pos, size

1486

in records

1487

if version_id not in self._cache]

1488

else:

1489

needed_offsets = [(pos, size) for version_id, pos, size

1490

in records]

1491

1492

raw_records = self._transport.readv(self._filename, needed_offsets)

1493

1494

1495

for version_id, pos, size in records:

1496

if version_id in self._cache:

1497

# This data has already been validated

1498

data = self._cache[version_id]

1499

else:

1500

pos, data = raw_records.next()

1501

if self._do_cache:

1502

self._cache[version_id] = data

1503

1504

# validate the header

1505

df, rec = self._parse_record_header(version_id, data)

1506

df.close()

1507

yield version_id, data

1508

1509

def read_records_iter(self, records):

1510

"""Read text records from data file and yield result.

1511

1512

The result will be returned in whatever is the fastest to read.

1513

Not by the order requested. Also, multiple requests for the same

1514

record will only yield 1 response.

1515

:param records: A list of (version_id, pos, len) entries

1516

:return: Yields (version_id, contents, digest) in the order

1517

read, not the order requested

1518

"""

1519

if not records:

1520

return

1521

1522

if self._cache:

1523

# Skip records we have alread seen

1524

yielded_records = set()

1525

needed_records = set()

1526

for record in records:

1527

if record[0] in self._cache:

1528

if record[0] in yielded_records:

1529

continue

1530

yielded_records.add(record[0])

1531

data = self._cache[record[0]]

1532

content, digest = self._parse_record(record[0], data)

1533

yield (record[0], content, digest)

1534

else:

1535

needed_records.add(record)

1536

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1537

else:

1538

needed_records = sorted(set(records), key=operator.itemgetter(1))

1539

1540

if not needed_records:

1541

return

1542

1543

# The transport optimizes the fetching as well

1544

# (ie, reads continuous ranges.)

1545

readv_response = self._transport.readv(self._filename,

1546

[(pos, size) for version_id, pos, size in needed_records])

1547

1548

for (version_id, pos, size), (pos, data) in \

1549

izip(iter(needed_records), readv_response):

1550

content, digest = self._parse_record(version_id, data)

1551

if self._do_cache:

1552

self._cache[version_id] = data

1553

yield version_id, content, digest

1554

1555

def read_records(self, records):

1556

"""Read records into a dictionary."""

1557

components = {}

1558

for record_id, content, digest in \

1559

self.read_records_iter(records):

1560

components[record_id] = (content, digest)

1561

return components

1562

1563

1564

class InterKnit(InterVersionedFile):

1565

"""Optimised code paths for knit to knit operations."""

1566

1567

_matching_file_from_factory = KnitVersionedFile

1568

_matching_file_to_factory = KnitVersionedFile

1569

1570

@staticmethod

1571

def is_compatible(source, target):

1572

"""Be compatible with knits. """

1573

try:

1574

return (isinstance(source, KnitVersionedFile) and

1575

isinstance(target, KnitVersionedFile))

1576

except AttributeError:

1577

return False

1578

1579

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1580

"""See InterVersionedFile.join."""

1581

assert isinstance(self.source, KnitVersionedFile)

1582

assert isinstance(self.target, KnitVersionedFile)

1583

1584

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1585

1586

if not version_ids:

1587

return 0

1588

1589

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1590

try:

1591

version_ids = list(version_ids)

1592

if None in version_ids:

1593

version_ids.remove(None)

1594

1595

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1596

this_versions = set(self.target._index.get_versions())

1597

needed_versions = self.source_ancestry - this_versions

1598

cross_check_versions = self.source_ancestry.intersection(this_versions)

1599

mismatched_versions = set()

1600

for version in cross_check_versions:

1601

# scan to include needed parents.

1602

n1 = set(self.target.get_parents_with_ghosts(version))

1603

n2 = set(self.source.get_parents_with_ghosts(version))

1604

if n1 != n2:

1605

# FIXME TEST this check for cycles being introduced works

1606

# the logic is we have a cycle if in our graph we are an

1607

# ancestor of any of the n2 revisions.

1608

for parent in n2:

1609

if parent in n1:

1610

# safe

1611

continue

1612

else:

1613

parent_ancestors = self.source.get_ancestry(parent)

1614

if version in parent_ancestors:

1615

raise errors.GraphCycleError([parent, version])

1616

# ensure this parent will be available later.

1617

new_parents = n2.difference(n1)

1618

needed_versions.update(new_parents.difference(this_versions))

1619

mismatched_versions.add(version)

1620

1621

if not needed_versions and not mismatched_versions:

1622

return 0

1623

full_list = topo_sort(self.source.get_graph())

1624

1625

version_list = [i for i in full_list if (not self.target.has_version(i)

1626

and i in needed_versions)]

1627

1628

# plan the join:

1629

copy_queue = []

1630

copy_queue_records = []

1631

copy_set = set()

1632

for version_id in version_list:

1633

options = self.source._index.get_options(version_id)

1634

parents = self.source._index.get_parents_with_ghosts(version_id)

1635

# check that its will be a consistent copy:

1636

for parent in parents:

1637

# if source has the parent, we must :

1638

# * already have it or

1639

# * have it scheduled already

1640

# otherwise we don't care

1641

assert (self.target.has_version(parent) or

1642

parent in copy_set or

1643

not self.source.has_version(parent))

1644

data_pos, data_size = self.source._index.get_position(version_id)

1645

copy_queue_records.append((version_id, data_pos, data_size))

1646

copy_queue.append((version_id, options, parents))

1647

copy_set.add(version_id)

1648

1649

# data suck the join:

1650

count = 0

1651

total = len(version_list)

1652

raw_datum = []

1653

raw_records = []

1654

for (version_id, raw_data), \

1655

(version_id2, options, parents) in \

1656

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1657

copy_queue):

1658

assert version_id == version_id2, 'logic error, inconsistent results'

1659

count = count + 1

1660

pb.update("Joining knit", count, total)

1661

raw_records.append((version_id, options, parents, len(raw_data)))

1662

raw_datum.append(raw_data)

1663

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1664

1665

for version in mismatched_versions:

1666

# FIXME RBC 20060309 is this needed?

1667

n1 = set(self.target.get_parents_with_ghosts(version))

1668

n2 = set(self.source.get_parents_with_ghosts(version))

1669

# write a combined record to our history preserving the current

1670

# parents as first in the list

1671

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1672

self.target.fix_parents(version, new_parents)

1673

return count

1674

finally:

1675

pb.finished()

1676

1677

1678

InterVersionedFile.register_optimiser(InterKnit)

1679

1680

1681

class WeaveToKnit(InterVersionedFile):

1682

"""Optimised code paths for weave to knit operations."""

1683

1684

_matching_file_from_factory = bzrlib.weave.WeaveFile

1685

_matching_file_to_factory = KnitVersionedFile

1686

1687

@staticmethod

1688

def is_compatible(source, target):

1689

"""Be compatible with weaves to knits."""

1690

try:

1691

return (isinstance(source, bzrlib.weave.Weave) and

1692

isinstance(target, KnitVersionedFile))

1693

except AttributeError:

1694

return False

1695

1696

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1697

"""See InterVersionedFile.join."""

1698

assert isinstance(self.source, bzrlib.weave.Weave)

1699

assert isinstance(self.target, KnitVersionedFile)

1700

1701

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1702

1703

if not version_ids:

1704

return 0

1705

1706

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1707

try:

1708

version_ids = list(version_ids)

1709

1710

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1711

this_versions = set(self.target._index.get_versions())

1712

needed_versions = self.source_ancestry - this_versions

1713

cross_check_versions = self.source_ancestry.intersection(this_versions)

1714

mismatched_versions = set()

1715

for version in cross_check_versions:

1716

# scan to include needed parents.

1717

n1 = set(self.target.get_parents_with_ghosts(version))

1718

n2 = set(self.source.get_parents(version))

1719

# if all of n2's parents are in n1, then its fine.

1720

if n2.difference(n1):

1721

# FIXME TEST this check for cycles being introduced works

1722

# the logic is we have a cycle if in our graph we are an

1723

# ancestor of any of the n2 revisions.

1724

for parent in n2:

1725

if parent in n1:

1726

# safe

1727

continue

1728

else:

1729

parent_ancestors = self.source.get_ancestry(parent)

1730

if version in parent_ancestors:

1731

raise errors.GraphCycleError([parent, version])

1732

# ensure this parent will be available later.

1733

new_parents = n2.difference(n1)

1734

needed_versions.update(new_parents.difference(this_versions))

1735

mismatched_versions.add(version)

1736

1737

if not needed_versions and not mismatched_versions:

1738

return 0

1739

full_list = topo_sort(self.source.get_graph())

1740

1741

version_list = [i for i in full_list if (not self.target.has_version(i)

1742

and i in needed_versions)]

1743

1744

# do the join:

1745

count = 0

1746

total = len(version_list)

1747

for version_id in version_list:

1748

pb.update("Converting to knit", count, total)

1749

parents = self.source.get_parents(version_id)

1750

# check that its will be a consistent copy:

1751

for parent in parents:

1752

# if source has the parent, we must already have it

1753

assert (self.target.has_version(parent))

1754

self.target.add_lines(

1755

version_id, parents, self.source.get_lines(version_id))

1756

count = count + 1

1757

1758

for version in mismatched_versions:

1759

# FIXME RBC 20060309 is this needed?

1760

n1 = set(self.target.get_parents_with_ghosts(version))

1761

n2 = set(self.source.get_parents(version))

1762

# write a combined record to our history preserving the current

1763

# parents as first in the list

1764

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1765

self.target.fix_parents(version, new_parents)

1766

return count

1767

finally:

1768

pb.finished()

1769

1770

1771

InterVersionedFile.register_optimiser(WeaveToKnit)

1772

1773

1774

class KnitSequenceMatcher(difflib.SequenceMatcher):

1775

"""Knit tuned sequence matcher.

1776

1777

This is based on profiling of difflib which indicated some improvements

1778

for our usage pattern.

1779

"""

1780

1781

def find_longest_match(self, alo, ahi, blo, bhi):

1782

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1783

1784

If isjunk is not defined:

1785

1786

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1787

alo <= i <= i+k <= ahi

1788

blo <= j <= j+k <= bhi

1789

and for all (i',j',k') meeting those conditions,

1790

k >= k'

1791

i <= i'

1792

and if i == i', j <= j'

1793

1794

In other words, of all maximal matching blocks, return one that

1795

starts earliest in a, and of all those maximal matching blocks that

1796

start earliest in a, return the one that starts earliest in b.

1797

1798

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1799

>>> s.find_longest_match(0, 5, 0, 9)

1800

(0, 4, 5)

1801

1802

If isjunk is defined, first the longest matching block is

1803

determined as above, but with the additional restriction that no

1804

junk element appears in the block. Then that block is extended as

1805

far as possible by matching (only) junk elements on both sides. So

1806

the resulting block never matches on junk except as identical junk

1807

happens to be adjacent to an "interesting" match.

1808

1809

Here's the same example as before, but considering blanks to be

1810

junk. That prevents " abcd" from matching the " abcd" at the tail

1811

end of the second sequence directly. Instead only the "abcd" can

1812

match, and matches the leftmost "abcd" in the second sequence:

1813

1814

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1815

>>> s.find_longest_match(0, 5, 0, 9)

1816

(1, 0, 4)

1817

1818

If no blocks match, return (alo, blo, 0).

1819

1820

>>> s = SequenceMatcher(None, "ab", "c")

1821

>>> s.find_longest_match(0, 2, 0, 1)

1822

(0, 0, 0)

1823

"""

1824

1825

# CAUTION: stripping common prefix or suffix would be incorrect.

1826

# E.g.,

1827

# ab

1828

# acab

1829

# Longest matching block is "ab", but if common prefix is

1830

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1831

# strip, so ends up claiming that ab is changed to acab by

1832

# inserting "ca" in the middle. That's minimal but unintuitive:

1833

# "it's obvious" that someone inserted "ac" at the front.

1834

# Windiff ends up at the same place as diff, but by pairing up

1835

# the unique 'b's and then matching the first two 'a's.

1836

1837

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1838

besti, bestj, bestsize = alo, blo, 0

1839

# find longest junk-free match

1840

# during an iteration of the loop, j2len[j] = length of longest

1841

# junk-free match ending with a[i-1] and b[j]

1842

j2len = {}

1843

# nothing = []

1844

b2jget = b2j.get

1845

for i in xrange(alo, ahi):

1846

# look at all instances of a[i] in b; note that because

1847

# b2j has no junk keys, the loop is skipped if a[i] is junk

1848

j2lenget = j2len.get

1849

newj2len = {}

1850

1851

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1852

# following improvement

1853

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1854

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1855

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1856

# to

1857

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1858

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1859

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1860

1861

try:

1862

js = b2j[a[i]]

1863

except KeyError:

1864

pass

1865

else:

1866

for j in js:

1867

# a[i] matches b[j]

1868

if j >= blo:

1869

if j >= bhi:

1870

break

1871

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1872

if k > bestsize:

1873

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1874

j2len = newj2len

1875

1876

# Extend the best by non-junk elements on each end. In particular,

1877

# "popular" non-junk elements aren't in b2j, which greatly speeds

1878

# the inner loop above, but also means "the best" match so far

1879

# doesn't contain any junk *or* popular non-junk elements.

1880

while besti > alo and bestj > blo and \

1881

not isbjunk(b[bestj-1]) and \

1882

a[besti-1] == b[bestj-1]:

1883

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1884

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1885

not isbjunk(b[bestj+bestsize]) and \

1886

a[besti+bestsize] == b[bestj+bestsize]:

1887

bestsize += 1

1888

1889

# Now that we have a wholly interesting match (albeit possibly

1890

# empty!), we may as well suck up the matching junk on each

1891

# side of it too. Can't think of a good reason not to, and it

1892

# saves post-processing the (possibly considerable) expense of

1893

# figuring out what to do with it. In the case of an empty

1894

# interesting match, this is clearly the right thing to do,

1895

# because no other kind of match is possible in the regions.

1896

while besti > alo and bestj > blo and \

1897

isbjunk(b[bestj-1]) and \

1898

a[besti-1] == b[bestj-1]:

1899

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1900

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1901

isbjunk(b[bestj+bestsize]) and \

1902

a[besti+bestsize] == b[bestj+bestsize]:

1903

bestsize = bestsize + 1

1904

1905

return besti, bestj, bestsize

1906

Older »