/brz/remove-bazaar : revision 2094.3.6

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2006-12-01 19:41:16 UTC
mfrom: (2158 +trunk)
mto: This revision was merged to the branch mainline in revision 2159.
Revision ID: john@arbash-meinel.com-20061201194116-nvn5qhfxux5284jc

[merge] bzr.dev 2158

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.ui

import bzrlib.weave

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

100

# files belong together?

101

102

# TODO: accommodate binaries, perhaps by storing a byte count

103

104

# TODO: function to check whole file

105

106

# TODO: atomically append data, then measure backwards from the cursor

107

# position after writing to work out where it was located. we may need to

108

# bypass python file buffering.

109

110

DATA_SUFFIX = '.knit'

111

INDEX_SUFFIX = '.kndx'

112

113

114

class KnitContent(object):

115

"""Content of a knit version to which deltas can be applied."""

116

117

def __init__(self, lines):

118

self._lines = lines

119

120

def annotate_iter(self):

121

"""Yield tuples of (origin, text) for each content line."""

122

return iter(self._lines)

123

124

def annotate(self):

125

"""Return a list of (origin, text) tuples."""

126

return list(self.annotate_iter())

127

128

def line_delta_iter(self, new_lines):

129

"""Generate line-based delta from this content to new_lines."""

130

new_texts = new_lines.text()

131

old_texts = self.text()

132

s = KnitSequenceMatcher(None, old_texts, new_texts)

133

for tag, i1, i2, j1, j2 in s.get_opcodes():

134

if tag == 'equal':

135

continue

136

# ofrom, oto, length, data

137

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

138

139

def line_delta(self, new_lines):

140

return list(self.line_delta_iter(new_lines))

141

142

def text(self):

143

return [text for origin, text in self._lines]

144

145

def copy(self):

146

return KnitContent(self._lines[:])

147

148

149

class _KnitFactory(object):

150

"""Base factory for creating content objects."""

151

152

def make(self, lines, version):

153

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

155

156

157

class KnitAnnotateFactory(_KnitFactory):

158

"""Factory for creating annotated Content objects."""

159

160

annotated = True

161

162

def parse_fulltext(self, content, version):

163

"""Convert fulltext to internal representation

164

165

fulltext content is of the format

166

revid(utf8) plaintext\n

167

internal representation is of the format:

168

(revid, plaintext)

169

"""

170

decode_utf8 = cache_utf8.decode

171

lines = []

172

for line in content:

173

origin, text = line.split(' ', 1)

174

lines.append((decode_utf8(origin), text))

175

return KnitContent(lines)

176

177

def parse_line_delta_iter(self, lines):

178

for result_item in self.parse_line_delta[lines]:

179

yield result_item

180

181

def parse_line_delta(self, lines, version):

182

"""Convert a line based delta into internal representation.

183

184

line delta is in the form of:

185

intstart intend intcount

186

1..count lines:

187

revid(utf8) newline\n

188

internal representation is

189

(start, end, count, [1..count tuples (revid, newline)])

190

"""

191

decode_utf8 = cache_utf8.decode

192

result = []

193

lines = iter(lines)

194

next = lines.next

195

# walk through the lines parsing.

196

for header in lines:

197

start, end, count = [int(n) for n in header.split(',')]

198

contents = []

199

remaining = count

200

while remaining:

201

origin, text = next().split(' ', 1)

202

remaining -= 1

203

contents.append((decode_utf8(origin), text))

204

result.append((start, end, count, contents))

205

return result

206

207

def lower_fulltext(self, content):

208

"""convert a fulltext content record into a serializable form.

209

210

see parse_fulltext which this inverts.

211

"""

212

encode_utf8 = cache_utf8.encode

213

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

214

215

def lower_line_delta(self, delta):

216

"""convert a delta into a serializable form.

217

218

See parse_line_delta which this inverts.

219

"""

220

encode_utf8 = cache_utf8.encode

221

out = []

222

for start, end, c, lines in delta:

223

out.append('%d,%d,%d\n' % (start, end, c))

224

out.extend(encode_utf8(origin) + ' ' + text

225

for origin, text in lines)

226

return out

227

228

229

class KnitPlainFactory(_KnitFactory):

230

"""Factory for creating plain Content objects."""

231

232

annotated = False

233

234

def parse_fulltext(self, content, version):

235

"""This parses an unannotated fulltext.

236

237

Note that this is not a noop - the internal representation

238

has (versionid, line) - its just a constant versionid.

239

"""

240

return self.make(content, version)

241

242

def parse_line_delta_iter(self, lines, version):

243

while lines:

244

header = lines.pop(0)

245

start, end, c = [int(n) for n in header.split(',')]

246

yield start, end, c, zip([version] * c, lines[:c])

247

del lines[:c]

248

249

def parse_line_delta(self, lines, version):

250

return list(self.parse_line_delta_iter(lines, version))

251

252

def lower_fulltext(self, content):

253

return content.text()

254

255

def lower_line_delta(self, delta):

256

out = []

257

for start, end, c, lines in delta:

258

out.append('%d,%d,%d\n' % (start, end, c))

259

out.extend([text for origin, text in lines])

260

return out

261

262

263

def make_empty_knit(transport, relpath):

264

"""Construct a empty knit at the specified location."""

265

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

266

k._data._open_file()

267

268

269

class KnitVersionedFile(VersionedFile):

270

"""Weave-like structure with faster random access.

271

272

A knit stores a number of texts and a summary of the relationships

273

between them. Texts are identified by a string version-id. Texts

274

are normally stored and retrieved as a series of lines, but can

275

also be passed as single strings.

276

277

Lines are stored with the trailing newline (if any) included, to

278

avoid special cases for files with no final newline. Lines are

279

composed of 8-bit characters, not unicode. The combination of

280

these approaches should mean any 'binary' file can be safely

281

stored and retrieved.

282

"""

283

284

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

285

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

286

create=False, create_parent_dir=False, delay_create=False,

287

dir_mode=None):

288

"""Construct a knit at location specified by relpath.

289

290

:param create: If not True, only open an existing knit.

291

:param create_parent_dir: If True, create the parent directory if

292

creating the file fails. (This is used for stores with

293

hash-prefixes that may not exist yet)

294

:param delay_create: The calling code is aware that the knit won't

295

actually be created until the first data is stored.

296

"""

297

if deprecated_passed(basis_knit):

298

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

299

" deprecated as of bzr 0.9.",

300

DeprecationWarning, stacklevel=2)

301

if access_mode is None:

302

access_mode = 'w'

303

super(KnitVersionedFile, self).__init__(access_mode)

304

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

305

self.transport = transport

306

self.filename = relpath

307

self.factory = factory or KnitAnnotateFactory()

308

self.writable = (access_mode == 'w')

309

self.delta = delta

310

311

self._max_delta_chain = 200

312

313

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

314

access_mode, create=create, file_mode=file_mode,

315

create_parent_dir=create_parent_dir, delay_create=delay_create,

316

dir_mode=dir_mode)

317

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

318

access_mode, create=create and not len(self), file_mode=file_mode,

319

create_parent_dir=create_parent_dir, delay_create=delay_create,

320

dir_mode=dir_mode)

321

322

def __repr__(self):

323

return '%s(%s)' % (self.__class__.__name__,

324

self.transport.abspath(self.filename))

325

326

def _check_should_delta(self, first_parents):

327

"""Iterate back through the parent listing, looking for a fulltext.

328

329

This is used when we want to decide whether to add a delta or a new

330

fulltext. It searches for _max_delta_chain parents. When it finds a

331

fulltext parent, it sees if the total size of the deltas leading up to

332

it is large enough to indicate that we want a new full text anyway.

333

334

Return True if we should create a new delta, False if we should use a

335

full text.

336

"""

337

delta_size = 0

338

fulltext_size = None

339

delta_parents = first_parents

340

for count in xrange(self._max_delta_chain):

341

parent = delta_parents[0]

342

method = self._index.get_method(parent)

343

pos, size = self._index.get_position(parent)

344

if method == 'fulltext':

345

fulltext_size = size

346

break

347

delta_size += size

348

delta_parents = self._index.get_parents(parent)

349

else:

350

# We couldn't find a fulltext, so we must create a new one

351

return False

352

353

return fulltext_size > delta_size

354

355

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

356

"""See VersionedFile._add_delta()."""

357

self._check_add(version_id, []) # should we check the lines ?

358

self._check_versions_present(parents)

359

present_parents = []

360

ghosts = []

361

parent_texts = {}

362

for parent in parents:

363

if not self.has_version(parent):

364

ghosts.append(parent)

365

else:

366

present_parents.append(parent)

367

368

if delta_parent is None:

369

# reconstitute as full text.

370

assert len(delta) == 1 or len(delta) == 0

371

if len(delta):

372

assert delta[0][0] == 0

373

assert delta[0][1] == 0, delta[0][1]

374

return super(KnitVersionedFile, self)._add_delta(version_id,

375

parents,

376

delta_parent,

377

sha1,

378

noeol,

379

delta)

380

381

digest = sha1

382

383

options = []

384

if noeol:

385

options.append('no-eol')

386

387

if delta_parent is not None:

388

# determine the current delta chain length.

389

# To speed the extract of texts the delta chain is limited

390

# to a fixed number of deltas. This should minimize both

391

# I/O and the time spend applying deltas.

392

# The window was changed to a maximum of 200 deltas, but also added

393

# was a check that the total compressed size of the deltas is

394

# smaller than the compressed size of the fulltext.

395

if not self._check_should_delta([delta_parent]):

396

# We don't want a delta here, just do a normal insertion.

397

return super(KnitVersionedFile, self)._add_delta(version_id,

398

parents,

399

delta_parent,

400

sha1,

401

noeol,

402

delta)

403

404

options.append('line-delta')

405

store_lines = self.factory.lower_line_delta(delta)

406

407

where, size = self._data.add_record(version_id, digest, store_lines)

408

self._index.add_version(version_id, options, where, size, parents)

409

410

def _add_raw_records(self, records, data):

411

"""Add all the records 'records' with data pre-joined in 'data'.

412

413

:param records: A list of tuples(version_id, options, parents, size).

414

:param data: The data for the records. When it is written, the records

415

are adjusted to have pos pointing into data by the sum of

416

the preceding records sizes.

417

"""

418

# write all the data

419

pos = self._data.add_raw_record(data)

420

offset = 0

421

index_entries = []

422

for (version_id, options, parents, size) in records:

423

index_entries.append((version_id, options, pos+offset,

424

size, parents))

425

if self._data._do_cache:

426

self._data._cache[version_id] = data[offset:offset+size]

427

offset += size

428

self._index.add_versions(index_entries)

429

430

def enable_cache(self):

431

"""Start caching data for this knit"""

432

self._data.enable_cache()

433

434

def clear_cache(self):

435

"""Clear the data cache only."""

436

self._data.clear_cache()

437

438

def copy_to(self, name, transport):

439

"""See VersionedFile.copy_to()."""

440

# copy the current index to a temp index to avoid racing with local

441

# writes

442

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

443

self.transport.get(self._index._filename))

444

# copy the data file

445

f = self._data._open_file()

446

try:

447

transport.put_file(name + DATA_SUFFIX, f)

448

finally:

449

f.close()

450

# move the copied index into place

451

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

452

453

def create_empty(self, name, transport, mode=None):

454

return KnitVersionedFile(name, transport, factory=self.factory,

455

delta=self.delta, create=True)

456

457

def _fix_parents(self, version, new_parents):

458

"""Fix the parents list for version.

459

460

This is done by appending a new version to the index

461

with identical data except for the parents list.

462

the parents list must be a superset of the current

463

list.

464

"""

465

current_values = self._index._cache[version]

466

assert set(current_values[4]).difference(set(new_parents)) == set()

467

self._index.add_version(version,

468

current_values[1],

469

current_values[2],

470

current_values[3],

471

new_parents)

472

473

def get_delta(self, version_id):

474

"""Get a delta for constructing version from some other version."""

475

if not self.has_version(version_id):

476

raise RevisionNotPresent(version_id, self.filename)

477

478

parents = self.get_parents(version_id)

479

if len(parents):

480

parent = parents[0]

481

else:

482

parent = None

483

data_pos, data_size = self._index.get_position(version_id)

484

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

485

version_idx = self._index.lookup(version_id)

486

noeol = 'no-eol' in self._index.get_options(version_id)

487

if 'fulltext' == self._index.get_method(version_id):

488

new_content = self.factory.parse_fulltext(data, version_idx)

489

if parent is not None:

490

reference_content = self._get_content(parent)

491

old_texts = reference_content.text()

492

else:

493

old_texts = []

494

new_texts = new_content.text()

495

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

496

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

497

else:

498

delta = self.factory.parse_line_delta(data, version_idx)

499

return parent, sha1, noeol, delta

500

501

def get_graph_with_ghosts(self):

502

"""See VersionedFile.get_graph_with_ghosts()."""

503

graph_items = self._index.get_graph()

504

return dict(graph_items)

505

506

def get_sha1(self, version_id):

507

"""See VersionedFile.get_sha1()."""

508

record_map = self._get_record_map([version_id])

509

method, content, digest, next = record_map[version_id]

510

return digest

511

512

@staticmethod

513

def get_suffixes():

514

"""See VersionedFile.get_suffixes()."""

515

return [DATA_SUFFIX, INDEX_SUFFIX]

516

517

def has_ghost(self, version_id):

518

"""True if there is a ghost reference in the file to version_id."""

519

# maybe we have it

520

if self.has_version(version_id):

521

return False

522

# optimisable if needed by memoising the _ghosts set.

523

items = self._index.get_graph()

524

for node, parents in items:

525

for parent in parents:

526

if parent not in self._index._cache:

527

if parent == version_id:

528

return True

529

return False

530

531

def versions(self):

532

"""See VersionedFile.versions."""

533

return self._index.get_versions()

534

535

def has_version(self, version_id):

536

"""See VersionedFile.has_version."""

537

return self._index.has_version(version_id)

538

539

__contains__ = has_version

540

541

def _merge_annotations(self, content, parents, parent_texts={},

542

delta=None, annotated=None):

543

"""Merge annotations for content. This is done by comparing

544

the annotations based on changed to the text.

545

"""

546

if annotated:

547

delta_seq = None

548

for parent_id in parents:

549

merge_content = self._get_content(parent_id, parent_texts)

550

seq = patiencediff.PatienceSequenceMatcher(

551

None, merge_content.text(), content.text())

552

if delta_seq is None:

553

# setup a delta seq to reuse.

554

delta_seq = seq

555

for i, j, n in seq.get_matching_blocks():

556

if n == 0:

557

continue

558

# this appears to copy (origin, text) pairs across to the new

559

# content for any line that matches the last-checked parent.

560

# FIXME: save the sequence control data for delta compression

561

# against the most relevant parent rather than rediffing.

562

content._lines[j:j+n] = merge_content._lines[i:i+n]

563

if delta:

564

if not annotated:

565

reference_content = self._get_content(parents[0], parent_texts)

566

new_texts = content.text()

567

old_texts = reference_content.text()

568

delta_seq = patiencediff.PatienceSequenceMatcher(

569

None, old_texts, new_texts)

570

return self._make_line_delta(delta_seq, content)

571

572

def _make_line_delta(self, delta_seq, new_content):

573

"""Generate a line delta from delta_seq and new_content."""

574

diff_hunks = []

575

for op in delta_seq.get_opcodes():

576

if op[0] == 'equal':

577

continue

578

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

579

return diff_hunks

580

581

def _get_components_positions(self, version_ids):

582

"""Produce a map of position data for the components of versions.

583

584

This data is intended to be used for retrieving the knit records.

585

586

A dict of version_id to (method, data_pos, data_size, next) is

587

returned.

588

method is the way referenced data should be applied.

589

data_pos is the position of the data in the knit.

590

data_size is the size of the data in the knit.

591

next is the build-parent of the version, or None for fulltexts.

592

"""

593

component_data = {}

594

for version_id in version_ids:

595

cursor = version_id

596

597

while cursor is not None and cursor not in component_data:

598

method = self._index.get_method(cursor)

599

if method == 'fulltext':

600

next = None

601

else:

602

next = self.get_parents(cursor)[0]

603

data_pos, data_size = self._index.get_position(cursor)

604

component_data[cursor] = (method, data_pos, data_size, next)

605

cursor = next

606

return component_data

607

608

def _get_content(self, version_id, parent_texts={}):

609

"""Returns a content object that makes up the specified

610

version."""

611

if not self.has_version(version_id):

612

raise RevisionNotPresent(version_id, self.filename)

613

614

cached_version = parent_texts.get(version_id, None)

615

if cached_version is not None:

616

return cached_version

617

618

text_map, contents_map = self._get_content_maps([version_id])

619

return contents_map[version_id]

620

621

def _check_versions_present(self, version_ids):

622

"""Check that all specified versions are present."""

623

version_ids = set(version_ids)

624

for r in list(version_ids):

625

if self._index.has_version(r):

626

version_ids.remove(r)

627

if version_ids:

628

raise RevisionNotPresent(list(version_ids)[0], self.filename)

629

630

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

631

"""See VersionedFile.add_lines_with_ghosts()."""

632

self._check_add(version_id, lines)

633

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

634

635

def _add_lines(self, version_id, parents, lines, parent_texts):

636

"""See VersionedFile.add_lines."""

637

self._check_add(version_id, lines)

638

self._check_versions_present(parents)

639

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

640

641

def _check_add(self, version_id, lines):

642

"""check that version_id and lines are safe to add."""

643

assert self.writable, "knit is not opened for write"

644

### FIXME escape. RBC 20060228

645

if contains_whitespace(version_id):

646

raise InvalidRevisionId(version_id, self.filename)

647

if self.has_version(version_id):

648

raise RevisionAlreadyPresent(version_id, self.filename)

649

self._check_lines_not_unicode(lines)

650

self._check_lines_are_lines(lines)

651

652

def _add(self, version_id, lines, parents, delta, parent_texts):

653

"""Add a set of lines on top of version specified by parents.

654

655

If delta is true, compress the text as a line-delta against

656

the first parent.

657

658

Any versions not present will be converted into ghosts.

659

"""

660

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

661

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

662

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

663

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

664

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

665

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

666

# +1383 0 8.0370 8.0370 +<len>

667

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

668

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

669

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

670

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

671

672

present_parents = []

673

ghosts = []

674

if parent_texts is None:

675

parent_texts = {}

676

for parent in parents:

677

if not self.has_version(parent):

678

ghosts.append(parent)

679

else:

680

present_parents.append(parent)

681

682

if delta and not len(present_parents):

683

delta = False

684

685

digest = sha_strings(lines)

686

options = []

687

if lines:

688

if lines[-1][-1] != '\n':

689

options.append('no-eol')

690

lines[-1] = lines[-1] + '\n'

691

692

if len(present_parents) and delta:

693

# To speed the extract of texts the delta chain is limited

694

# to a fixed number of deltas. This should minimize both

695

# I/O and the time spend applying deltas.

696

delta = self._check_should_delta(present_parents)

697

698

lines = self.factory.make(lines, version_id)

699

if delta or (self.factory.annotated and len(present_parents) > 0):

700

# Merge annotations from parent texts if so is needed.

701

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

702

delta, self.factory.annotated)

703

704

if delta:

705

options.append('line-delta')

706

store_lines = self.factory.lower_line_delta(delta_hunks)

707

else:

708

options.append('fulltext')

709

store_lines = self.factory.lower_fulltext(lines)

710

711

where, size = self._data.add_record(version_id, digest, store_lines)

712

self._index.add_version(version_id, options, where, size, parents)

713

return lines

714

715

def check(self, progress_bar=None):

716

"""See VersionedFile.check()."""

717

718

def _clone_text(self, new_version_id, old_version_id, parents):

719

"""See VersionedFile.clone_text()."""

720

# FIXME RBC 20060228 make fast by only inserting an index with null

721

# delta.

722

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

723

724

def get_lines(self, version_id):

725

"""See VersionedFile.get_lines()."""

726

return self.get_line_list([version_id])[0]

727

728

def _get_record_map(self, version_ids):

729

"""Produce a dictionary of knit records.

730

731

The keys are version_ids, the values are tuples of (method, content,

732

digest, next).

733

method is the way the content should be applied.

734

content is a KnitContent object.

735

digest is the SHA1 digest of this version id after all steps are done

736

next is the build-parent of the version, i.e. the leftmost ancestor.

737

If the method is fulltext, next will be None.

738

"""

739

position_map = self._get_components_positions(version_ids)

740

# c = component_id, m = method, p = position, s = size, n = next

741

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

742

record_map = {}

743

for component_id, content, digest in \

744

self._data.read_records_iter(records):

745

method, position, size, next = position_map[component_id]

746

record_map[component_id] = method, content, digest, next

747

748

return record_map

749

750

def get_text(self, version_id):

751

"""See VersionedFile.get_text"""

752

return self.get_texts([version_id])[0]

753

754

def get_texts(self, version_ids):

755

return [''.join(l) for l in self.get_line_list(version_ids)]

756

757

def get_line_list(self, version_ids):

758

"""Return the texts of listed versions as a list of strings."""

759

text_map, content_map = self._get_content_maps(version_ids)

760

return [text_map[v] for v in version_ids]

761

762

def _get_content_maps(self, version_ids):

763

"""Produce maps of text and KnitContents

764

765

:return: (text_map, content_map) where text_map contains the texts for

766

the requested versions and content_map contains the KnitContents.

767

Both dicts take version_ids as their keys.

768

"""

769

for version_id in version_ids:

770

if not self.has_version(version_id):

771

raise RevisionNotPresent(version_id, self.filename)

772

record_map = self._get_record_map(version_ids)

773

774

text_map = {}

775

content_map = {}

776

final_content = {}

777

for version_id in version_ids:

778

components = []

779

cursor = version_id

780

while cursor is not None:

781

method, data, digest, next = record_map[cursor]

782

components.append((cursor, method, data, digest))

783

if cursor in content_map:

784

break

785

cursor = next

786

787

content = None

788

for component_id, method, data, digest in reversed(components):

789

if component_id in content_map:

790

content = content_map[component_id]

791

else:

792

version_idx = self._index.lookup(component_id)

793

if method == 'fulltext':

794

assert content is None

795

content = self.factory.parse_fulltext(data, version_idx)

796

elif method == 'line-delta':

797

delta = self.factory.parse_line_delta(data[:],

798

version_idx)

799

content = content.copy()

800

content._lines = self._apply_delta(content._lines,

801

delta)

802

content_map[component_id] = content

803

804

if 'no-eol' in self._index.get_options(version_id):

805

content = content.copy()

806

line = content._lines[-1][1].rstrip('\n')

807

content._lines[-1] = (content._lines[-1][0], line)

808

final_content[version_id] = content

809

810

# digest here is the digest from the last applied component.

811

text = content.text()

812

if sha_strings(text) != digest:

813

raise KnitCorrupt(self.filename,

814

'sha-1 does not match %s' % version_id)

815

816

text_map[version_id] = text

817

return text_map, final_content

818

819

def iter_lines_added_or_present_in_versions(self, version_ids=None,

820

pb=None):

821

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

822

if version_ids is None:

823

version_ids = self.versions()

824

if pb is None:

825

pb = progress.DummyProgress()

826

# we don't care about inclusions, the caller cares.

827

# but we need to setup a list of records to visit.

828

# we need version_id, position, length

829

version_id_records = []

830

requested_versions = list(version_ids)

831

# filter for available versions

832

for version_id in requested_versions:

833

if not self.has_version(version_id):

834

raise RevisionNotPresent(version_id, self.filename)

835

# get a in-component-order queue:

836

version_ids = []

837

for version_id in self.versions():

838

if version_id in requested_versions:

839

version_ids.append(version_id)

840

data_pos, length = self._index.get_position(version_id)

841

version_id_records.append((version_id, data_pos, length))

842

843

total = len(version_id_records)

844

for version_idx, (version_id, data, sha_value) in \

845

enumerate(self._data.read_records_iter(version_id_records)):

846

pb.update('Walking content.', version_idx, total)

847

method = self._index.get_method(version_id)

848

version_idx = self._index.lookup(version_id)

849

assert method in ('fulltext', 'line-delta')

850

if method == 'fulltext':

851

content = self.factory.parse_fulltext(data, version_idx)

852

for line in content.text():

853

yield line

854

else:

855

delta = self.factory.parse_line_delta(data, version_idx)

856

for start, end, count, lines in delta:

857

for origin, line in lines:

858

yield line

859

pb.update('Walking content.', total, total)

860

861

def num_versions(self):

862

"""See VersionedFile.num_versions()."""

863

return self._index.num_versions()

864

865

__len__ = num_versions

866

867

def annotate_iter(self, version_id):

868

"""See VersionedFile.annotate_iter."""

869

content = self._get_content(version_id)

870

for origin, text in content.annotate_iter():

871

yield origin, text

872

873

def get_parents(self, version_id):

874

"""See VersionedFile.get_parents."""

875

# perf notes:

876

# optimism counts!

877

# 52554 calls in 1264 872 internal down from 3674

878

try:

879

return self._index.get_parents(version_id)

880

except KeyError:

881

raise RevisionNotPresent(version_id, self.filename)

882

883

def get_parents_with_ghosts(self, version_id):

884

"""See VersionedFile.get_parents."""

885

try:

886

return self._index.get_parents_with_ghosts(version_id)

887

except KeyError:

888

raise RevisionNotPresent(version_id, self.filename)

889

890

def get_ancestry(self, versions):

891

"""See VersionedFile.get_ancestry."""

892

if isinstance(versions, basestring):

893

versions = [versions]

894

if not versions:

895

return []

896

self._check_versions_present(versions)

897

return self._index.get_ancestry(versions)

898

899

def get_ancestry_with_ghosts(self, versions):

900

"""See VersionedFile.get_ancestry_with_ghosts."""

901

if isinstance(versions, basestring):

902

versions = [versions]

903

if not versions:

904

return []

905

self._check_versions_present(versions)

906

return self._index.get_ancestry_with_ghosts(versions)

907

908

#@deprecated_method(zero_eight)

909

def walk(self, version_ids):

910

"""See VersionedFile.walk."""

911

# We take the short path here, and extract all relevant texts

912

# and put them in a weave and let that do all the work. Far

913

# from optimal, but is much simpler.

914

# FIXME RB 20060228 this really is inefficient!

915

from bzrlib.weave import Weave

916

917

w = Weave(self.filename)

918

ancestry = self.get_ancestry(version_ids)

919

sorted_graph = topo_sort(self._index.get_graph())

920

version_list = [vid for vid in sorted_graph if vid in ancestry]

921

922

for version_id in version_list:

923

lines = self.get_lines(version_id)

924

w.add_lines(version_id, self.get_parents(version_id), lines)

925

926

for lineno, insert_id, dset, line in w.walk(version_ids):

927

yield lineno, insert_id, dset, line

928

929

def plan_merge(self, ver_a, ver_b):

930

"""See VersionedFile.plan_merge."""

931

ancestors_b = set(self.get_ancestry(ver_b))

932

def status_a(revision, text):

933

if revision in ancestors_b:

934

return 'killed-b', text

935

else:

936

return 'new-a', text

937

938

ancestors_a = set(self.get_ancestry(ver_a))

939

def status_b(revision, text):

940

if revision in ancestors_a:

941

return 'killed-a', text

942

else:

943

return 'new-b', text

944

945

annotated_a = self.annotate(ver_a)

946

annotated_b = self.annotate(ver_b)

947

plain_a = [t for (a, t) in annotated_a]

948

plain_b = [t for (a, t) in annotated_b]

949

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

950

a_cur = 0

951

b_cur = 0

952

for ai, bi, l in blocks:

953

# process all mismatched sections

954

# (last mismatched section is handled because blocks always

955

# includes a 0-length last block)

956

for revision, text in annotated_a[a_cur:ai]:

957

yield status_a(revision, text)

958

for revision, text in annotated_b[b_cur:bi]:

959

yield status_b(revision, text)

960

961

# and now the matched section

962

a_cur = ai + l

963

b_cur = bi + l

964

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

965

assert text_a == text_b

966

yield "unchanged", text_a

967

968

969

class _KnitComponentFile(object):

970

"""One of the files used to implement a knit database"""

971

972

def __init__(self, transport, filename, mode, file_mode=None,

973

create_parent_dir=False, dir_mode=None):

974

self._transport = transport

975

self._filename = filename

976

self._mode = mode

977

self._file_mode = file_mode

978

self._dir_mode = dir_mode

979

self._create_parent_dir = create_parent_dir

980

self._need_to_create = False

981

982

def check_header(self, fp):

983

line = fp.readline()

984

if line != self.HEADER:

985

raise KnitHeaderError(badline=line)

986

987

def commit(self):

988

"""Commit is a nop."""

989

990

def __repr__(self):

991

return '%s(%s)' % (self.__class__.__name__, self._filename)

992

993

994

class _KnitIndex(_KnitComponentFile):

995

"""Manages knit index file.

996

997

The index is already kept in memory and read on startup, to enable

998

fast lookups of revision information. The cursor of the index

999

file is always pointing to the end, making it easy to append

1000

entries.

1001

1002

_cache is a cache for fast mapping from version id to a Index

1003

object.

1004

1005

_history is a cache for fast mapping from indexes to version ids.

1006

1007

The index data format is dictionary compressed when it comes to

1008

parent references; a index entry may only have parents that with a

1009

lover index number. As a result, the index is topological sorted.

1010

1011

Duplicate entries may be written to the index for a single version id

1012

if this is done then the latter one completely replaces the former:

1013

this allows updates to correct version and parent information.

1014

Note that the two entries may share the delta, and that successive

1015

annotations and references MUST point to the first entry.

1016

1017

The index file on disc contains a header, followed by one line per knit

1018

record. The same revision can be present in an index file more than once.

1019

The first occurrence gets assigned a sequence number starting from 0.

1020

1021

The format of a single line is

1022

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1023

REVISION_ID is a utf8-encoded revision id

1024

FLAGS is a comma separated list of flags about the record. Values include

1025

no-eol, line-delta, fulltext.

1026

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1027

that the the compressed data starts at.

1028

LENGTH is the ascii representation of the length of the data file.

1029

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1030

REVISION_ID.

1031

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1032

revision id already in the knit that is a parent of REVISION_ID.

1033

The ' :' marker is the end of record marker.

1034

1035

partial writes:

1036

when a write is interrupted to the index file, it will result in a line that

1037

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1038

the end of the file, then the record that is missing it will be ignored by

1039

the parser.

1040

1041

When writing new records to the index file, the data is preceded by '\n'

1042

to ensure that records always start on new lines even if the last write was

1043

interrupted. As a result its normal for the last line in the index to be

1044

missing a trailing newline. One can be added with no harmful effects.

1045

"""

1046

1047

HEADER = "# bzr knit index 8\n"

1048

1049

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1050

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1051

1052

def _cache_version(self, version_id, options, pos, size, parents):

1053

"""Cache a version record in the history array and index cache.

1054

1055

This is inlined into __init__ for performance. KEEP IN SYNC.

1056

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1057

indexes).

1058

"""

1059

# only want the _history index to reference the 1st index entry

1060

# for version_id

1061

if version_id not in self._cache:

1062

index = len(self._history)

1063

self._history.append(version_id)

1064

else:

1065

index = self._cache[version_id][5]

1066

self._cache[version_id] = (version_id,

1067

options,

1068

pos,

1069

size,

1070

parents,

1071

index)

1072

1073

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1074

create_parent_dir=False, delay_create=False, dir_mode=None):

1075

_KnitComponentFile.__init__(self, transport, filename, mode,

1076

file_mode=file_mode,

1077

create_parent_dir=create_parent_dir,

1078

dir_mode=dir_mode)

1079

self._cache = {}

1080

# position in _history is the 'official' index for a revision

1081

# but the values may have come from a newer entry.

1082

# so - wc -l of a knit index is != the number of unique names

1083

# in the knit.

1084

self._history = []

1085

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1086

try:

1087

count = 0

1088

total = 1

1089

try:

1090

pb.update('read knit index', count, total)

1091

fp = self._transport.get(self._filename)

1092

try:

1093

self.check_header(fp)

1094

# readlines reads the whole file at once:

1095

# bad for transports like http, good for local disk

1096

# we save 60 ms doing this one change (

1097

# from calling readline each time to calling

1098

# readlines once.

1099

# probably what we want for nice behaviour on

1100

# http is a incremental readlines that yields, or

1101

# a check for local vs non local indexes,

1102

for l in fp.readlines():

1103

rec = l.split()

1104

if len(rec) < 5 or rec[-1] != ':':

1105

# corrupt line.

1106

# FIXME: in the future we should determine if its a

1107

# short write - and ignore it

1108

# or a different failure, and raise. RBC 20060407

1109

continue

1110

count += 1

1111

total += 1

1112

#pb.update('read knit index', count, total)

1113

# See self._parse_parents

1114

parents = []

1115

for value in rec[4:-1]:

1116

if '.' == value[0]:

1117

# uncompressed reference

1118

parents.append(value[1:])

1119

else:

1120

# this is 15/4000ms faster than isinstance,

1121

# (in lsprof)

1122

# this function is called thousands of times a

1123

# second so small variations add up.

1124

assert value.__class__ is str

1125

parents.append(self._history[int(value)])

1126

# end self._parse_parents

1127

# self._cache_version(rec[0],

1128

# rec[1].split(','),

1129

# int(rec[2]),

1130

# int(rec[3]),

1131

# parents)

1132

# --- self._cache_version

1133

# only want the _history index to reference the 1st

1134

# index entry for version_id

1135

version_id = rec[0]

1136

if version_id not in self._cache:

1137

index = len(self._history)

1138

self._history.append(version_id)

1139

else:

1140

index = self._cache[version_id][5]

1141

self._cache[version_id] = (version_id,

1142

rec[1].split(','),

1143

int(rec[2]),

1144

int(rec[3]),

1145

parents,

1146

index)

1147

# --- self._cache_version

1148

finally:

1149

fp.close()

1150

except NoSuchFile, e:

1151

if mode != 'w' or not create:

1152

raise

1153

if delay_create:

1154

self._need_to_create = True

1155

else:

1156

self._transport.put_bytes_non_atomic(self._filename,

1157

self.HEADER, mode=self._file_mode)

1158

1159

finally:

1160

pb.update('read knit index', total, total)

1161

pb.finished()

1162

1163

def _parse_parents(self, compressed_parents):

1164

"""convert a list of string parent values into version ids.

1165

1166

ints are looked up in the index.

1167

.FOO values are ghosts and converted in to FOO.

1168

1169

NOTE: the function is retained here for clarity, and for possible

1170

use in partial index reads. However bulk processing now has

1171

it inlined in __init__ for inner-loop optimisation.

1172

"""

1173

result = []

1174

for value in compressed_parents:

1175

if value[-1] == '.':

1176

# uncompressed reference

1177

result.append(value[1:])

1178

else:

1179

# this is 15/4000ms faster than isinstance,

1180

# this function is called thousands of times a

1181

# second so small variations add up.

1182

assert value.__class__ is str

1183

result.append(self._history[int(value)])

1184

return result

1185

1186

def get_graph(self):

1187

graph = []

1188

for version_id, index in self._cache.iteritems():

1189

graph.append((version_id, index[4]))

1190

return graph

1191

1192

def get_ancestry(self, versions):

1193

"""See VersionedFile.get_ancestry."""

1194

# get a graph of all the mentioned versions:

1195

graph = {}

1196

pending = set(versions)

1197

while len(pending):

1198

version = pending.pop()

1199

parents = self._cache[version][4]

1200

# got the parents ok

1201

# trim ghosts

1202

parents = [parent for parent in parents if parent in self._cache]

1203

for parent in parents:

1204

# if not completed and not a ghost

1205

if parent not in graph:

1206

pending.add(parent)

1207

graph[version] = parents

1208

return topo_sort(graph.items())

1209

1210

def get_ancestry_with_ghosts(self, versions):

1211

"""See VersionedFile.get_ancestry_with_ghosts."""

1212

# get a graph of all the mentioned versions:

1213

graph = {}

1214

pending = set(versions)

1215

while len(pending):

1216

version = pending.pop()

1217

try:

1218

parents = self._cache[version][4]

1219

except KeyError:

1220

# ghost, fake it

1221

graph[version] = []

1222

pass

1223

else:

1224

# got the parents ok

1225

for parent in parents:

1226

if parent not in graph:

1227

pending.add(parent)

1228

graph[version] = parents

1229

return topo_sort(graph.items())

1230

1231

def num_versions(self):

1232

return len(self._history)

1233

1234

__len__ = num_versions

1235

1236

def get_versions(self):

1237

return self._history

1238

1239

def idx_to_name(self, idx):

1240

return self._history[idx]

1241

1242

def lookup(self, version_id):

1243

assert version_id in self._cache

1244

return self._cache[version_id][5]

1245

1246

def _version_list_to_index(self, versions):

1247

encode_utf8 = cache_utf8.encode

1248

result_list = []

1249

for version in versions:

1250

if version in self._cache:

1251

# -- inlined lookup() --

1252

result_list.append(str(self._cache[version][5]))

1253

# -- end lookup () --

1254

else:

1255

result_list.append('.' + encode_utf8(version))

1256

return ' '.join(result_list)

1257

1258

def add_version(self, version_id, options, pos, size, parents):

1259

"""Add a version record to the index."""

1260

self.add_versions(((version_id, options, pos, size, parents),))

1261

1262

def add_versions(self, versions):

1263

"""Add multiple versions to the index.

1264

1265

:param versions: a list of tuples:

1266

(version_id, options, pos, size, parents).

1267

"""

1268

lines = []

1269

encode_utf8 = cache_utf8.encode

1270

orig_history = self._history[:]

1271

orig_cache = self._cache.copy()

1272

1273

try:

1274

for version_id, options, pos, size, parents in versions:

1275

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1276

','.join(options),

1277

pos,

1278

size,

1279

self._version_list_to_index(parents))

1280

assert isinstance(line, str), \

1281

'content must be utf-8 encoded: %r' % (line,)

1282

lines.append(line)

1283

self._cache_version(version_id, options, pos, size, parents)

1284

if not self._need_to_create:

1285

self._transport.append_bytes(self._filename, ''.join(lines))

1286

else:

1287

sio = StringIO()

1288

sio.write(self.HEADER)

1289

sio.writelines(lines)

1290

sio.seek(0)

1291

self._transport.put_file_non_atomic(self._filename, sio,

1292

create_parent_dir=self._create_parent_dir,

1293

mode=self._file_mode,

1294

dir_mode=self._dir_mode)

1295

self._need_to_create = False

1296

except:

1297

# If any problems happen, restore the original values and re-raise

1298

self._history = orig_history

1299

self._cache = orig_cache

1300

raise

1301

1302

def has_version(self, version_id):

1303

"""True if the version is in the index."""

1304

return (version_id in self._cache)

1305

1306

def get_position(self, version_id):

1307

"""Return data position and size of specified version."""

1308

return (self._cache[version_id][2], \

1309

self._cache[version_id][3])

1310

1311

def get_method(self, version_id):

1312

"""Return compression method of specified version."""

1313

options = self._cache[version_id][1]

1314

if 'fulltext' in options:

1315

return 'fulltext'

1316

else:

1317

assert 'line-delta' in options

1318

return 'line-delta'

1319

1320

def get_options(self, version_id):

1321

return self._cache[version_id][1]

1322

1323

def get_parents(self, version_id):

1324

"""Return parents of specified version ignoring ghosts."""

1325

return [parent for parent in self._cache[version_id][4]

1326

if parent in self._cache]

1327

1328

def get_parents_with_ghosts(self, version_id):

1329

"""Return parents of specified version with ghosts."""

1330

return self._cache[version_id][4]

1331

1332

def check_versions_present(self, version_ids):

1333

"""Check that all specified versions are present."""

1334

version_ids = set(version_ids)

1335

for version_id in list(version_ids):

1336

if version_id in self._cache:

1337

version_ids.remove(version_id)

1338

if version_ids:

1339

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1340

1341

1342

class _KnitData(_KnitComponentFile):

1343

"""Contents of the knit data file"""

1344

1345

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1346

create_parent_dir=False, delay_create=False,

1347

dir_mode=None):

1348

_KnitComponentFile.__init__(self, transport, filename, mode,

1349

file_mode=file_mode,

1350

create_parent_dir=create_parent_dir,

1351

dir_mode=dir_mode)

1352

self._checked = False

1353

# TODO: jam 20060713 conceptually, this could spill to disk

1354

# if the cached size gets larger than a certain amount

1355

# but it complicates the model a bit, so for now just use

1356

# a simple dictionary

1357

self._cache = {}

1358

self._do_cache = False

1359

if create:

1360

if delay_create:

1361

self._need_to_create = create

1362

else:

1363

self._transport.put_bytes_non_atomic(self._filename, '',

1364

mode=self._file_mode)

1365

1366

def enable_cache(self):

1367

"""Enable caching of reads."""

1368

self._do_cache = True

1369

1370

def clear_cache(self):

1371

"""Clear the record cache."""

1372

self._do_cache = False

1373

self._cache = {}

1374

1375

def _open_file(self):

1376

try:

1377

return self._transport.get(self._filename)

1378

except NoSuchFile:

1379

pass

1380

return None

1381

1382

def _record_to_data(self, version_id, digest, lines):

1383

"""Convert version_id, digest, lines into a raw data block.

1384

1385

:return: (len, a StringIO instance with the raw data ready to read.)

1386

"""

1387

sio = StringIO()

1388

data_file = GzipFile(None, mode='wb', fileobj=sio)

1389

1390

version_id_utf8 = cache_utf8.encode(version_id)

1391

data_file.writelines(chain(

1392

["version %s %d %s\n" % (version_id_utf8,

1393

len(lines),

1394

digest)],

1395

lines,

1396

["end %s\n" % version_id_utf8]))

1397

data_file.close()

1398

length= sio.tell()

1399

1400

sio.seek(0)

1401

return length, sio

1402

1403

def add_raw_record(self, raw_data):

1404

"""Append a prepared record to the data file.

1405

1406

:return: the offset in the data file raw_data was written.

1407

"""

1408

assert isinstance(raw_data, str), 'data must be plain bytes'

1409

if not self._need_to_create:

1410

return self._transport.append_bytes(self._filename, raw_data)

1411

else:

1412

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1413

create_parent_dir=self._create_parent_dir,

1414

mode=self._file_mode,

1415

dir_mode=self._dir_mode)

1416

self._need_to_create = False

1417

return 0

1418

1419

def add_record(self, version_id, digest, lines):

1420

"""Write new text record to disk. Returns the position in the

1421

file where it was written."""

1422

size, sio = self._record_to_data(version_id, digest, lines)

1423

# write to disk

1424

if not self._need_to_create:

1425

start_pos = self._transport.append_file(self._filename, sio)

1426

else:

1427

self._transport.put_file_non_atomic(self._filename, sio,

1428

create_parent_dir=self._create_parent_dir,

1429

mode=self._file_mode,

1430

dir_mode=self._dir_mode)

1431

self._need_to_create = False

1432

start_pos = 0

1433

if self._do_cache:

1434

self._cache[version_id] = sio.getvalue()

1435

return start_pos, size

1436

1437

def _parse_record_header(self, version_id, raw_data):

1438

"""Parse a record header for consistency.

1439

1440

:return: the header and the decompressor stream.

1441

as (stream, header_record)

1442

"""

1443

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1444

rec = df.readline().split()

1445

if len(rec) != 4:

1446

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1447

if cache_utf8.decode(rec[1]) != version_id:

1448

raise KnitCorrupt(self._filename,

1449

'unexpected version, wanted %r, got %r' % (

1450

version_id, rec[1]))

1451

return df, rec

1452

1453

def _parse_record(self, version_id, data):

1454

# profiling notes:

1455

# 4168 calls in 2880 217 internal

1456

# 4168 calls to _parse_record_header in 2121

1457

# 4168 calls to readlines in 330

1458

df, rec = self._parse_record_header(version_id, data)

1459

record_contents = df.readlines()

1460

l = record_contents.pop()

1461

assert len(record_contents) == int(rec[2])

1462

if l != 'end %s\n' % cache_utf8.encode(version_id):

1463

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1464

% (l, version_id))

1465

df.close()

1466

return record_contents, rec[3]

1467

1468

def read_records_iter_raw(self, records):

1469

"""Read text records from data file and yield raw data.

1470

1471

This unpacks enough of the text record to validate the id is

1472

as expected but thats all.

1473

"""

1474

# setup an iterator of the external records:

1475

# uses readv so nice and fast we hope.

1476

if len(records):

1477

# grab the disk data needed.

1478

if self._cache:

1479

# Don't check _cache if it is empty

1480

needed_offsets = [(pos, size) for version_id, pos, size

1481

in records

1482

if version_id not in self._cache]

1483

else:

1484

needed_offsets = [(pos, size) for version_id, pos, size

1485

in records]

1486

1487

raw_records = self._transport.readv(self._filename, needed_offsets)

1488

1489

1490

for version_id, pos, size in records:

1491

if version_id in self._cache:

1492

# This data has already been validated

1493

data = self._cache[version_id]

1494

else:

1495

pos, data = raw_records.next()

1496

if self._do_cache:

1497

self._cache[version_id] = data

1498

1499

# validate the header

1500

df, rec = self._parse_record_header(version_id, data)

1501

df.close()

1502

yield version_id, data

1503

1504

def read_records_iter(self, records):

1505

"""Read text records from data file and yield result.

1506

1507

The result will be returned in whatever is the fastest to read.

1508

Not by the order requested. Also, multiple requests for the same

1509

record will only yield 1 response.

1510

:param records: A list of (version_id, pos, len) entries

1511

:return: Yields (version_id, contents, digest) in the order

1512

read, not the order requested

1513

"""

1514

if not records:

1515

return

1516

1517

if self._cache:

1518

# Skip records we have alread seen

1519

yielded_records = set()

1520

needed_records = set()

1521

for record in records:

1522

if record[0] in self._cache:

1523

if record[0] in yielded_records:

1524

continue

1525

yielded_records.add(record[0])

1526

data = self._cache[record[0]]

1527

content, digest = self._parse_record(record[0], data)

1528

yield (record[0], content, digest)

1529

else:

1530

needed_records.add(record)

1531

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1532

else:

1533

needed_records = sorted(set(records), key=operator.itemgetter(1))

1534

1535

if not needed_records:

1536

return

1537

1538

# The transport optimizes the fetching as well

1539

# (ie, reads continuous ranges.)

1540

readv_response = self._transport.readv(self._filename,

1541

[(pos, size) for version_id, pos, size in needed_records])

1542

1543

for (version_id, pos, size), (pos, data) in \

1544

izip(iter(needed_records), readv_response):

1545

content, digest = self._parse_record(version_id, data)

1546

if self._do_cache:

1547

self._cache[version_id] = data

1548

yield version_id, content, digest

1549

1550

def read_records(self, records):

1551

"""Read records into a dictionary."""

1552

components = {}

1553

for record_id, content, digest in \

1554

self.read_records_iter(records):

1555

components[record_id] = (content, digest)

1556

return components

1557

1558

1559

class InterKnit(InterVersionedFile):

1560

"""Optimised code paths for knit to knit operations."""

1561

1562

_matching_file_from_factory = KnitVersionedFile

1563

_matching_file_to_factory = KnitVersionedFile

1564

1565

@staticmethod

1566

def is_compatible(source, target):

1567

"""Be compatible with knits. """

1568

try:

1569

return (isinstance(source, KnitVersionedFile) and

1570

isinstance(target, KnitVersionedFile))

1571

except AttributeError:

1572

return False

1573

1574

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1575

"""See InterVersionedFile.join."""

1576

assert isinstance(self.source, KnitVersionedFile)

1577

assert isinstance(self.target, KnitVersionedFile)

1578

1579

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1580

1581

if not version_ids:

1582

return 0

1583

1584

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1585

try:

1586

version_ids = list(version_ids)

1587

if None in version_ids:

1588

version_ids.remove(None)

1589

1590

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1591

this_versions = set(self.target._index.get_versions())

1592

needed_versions = self.source_ancestry - this_versions

1593

cross_check_versions = self.source_ancestry.intersection(this_versions)

1594

mismatched_versions = set()

1595

for version in cross_check_versions:

1596

# scan to include needed parents.

1597

n1 = set(self.target.get_parents_with_ghosts(version))

1598

n2 = set(self.source.get_parents_with_ghosts(version))

1599

if n1 != n2:

1600

# FIXME TEST this check for cycles being introduced works

1601

# the logic is we have a cycle if in our graph we are an

1602

# ancestor of any of the n2 revisions.

1603

for parent in n2:

1604

if parent in n1:

1605

# safe

1606

continue

1607

else:

1608

parent_ancestors = self.source.get_ancestry(parent)

1609

if version in parent_ancestors:

1610

raise errors.GraphCycleError([parent, version])

1611

# ensure this parent will be available later.

1612

new_parents = n2.difference(n1)

1613

needed_versions.update(new_parents.difference(this_versions))

1614

mismatched_versions.add(version)

1615

1616

if not needed_versions and not mismatched_versions:

1617

return 0

1618

full_list = topo_sort(self.source.get_graph())

1619

1620

version_list = [i for i in full_list if (not self.target.has_version(i)

1621

and i in needed_versions)]

1622

1623

# plan the join:

1624

copy_queue = []

1625

copy_queue_records = []

1626

copy_set = set()

1627

for version_id in version_list:

1628

options = self.source._index.get_options(version_id)

1629

parents = self.source._index.get_parents_with_ghosts(version_id)

1630

# check that its will be a consistent copy:

1631

for parent in parents:

1632

# if source has the parent, we must :

1633

# * already have it or

1634

# * have it scheduled already

1635

# otherwise we don't care

1636

assert (self.target.has_version(parent) or

1637

parent in copy_set or

1638

not self.source.has_version(parent))

1639

data_pos, data_size = self.source._index.get_position(version_id)

1640

copy_queue_records.append((version_id, data_pos, data_size))

1641

copy_queue.append((version_id, options, parents))

1642

copy_set.add(version_id)

1643

1644

# data suck the join:

1645

count = 0

1646

total = len(version_list)

1647

raw_datum = []

1648

raw_records = []

1649

for (version_id, raw_data), \

1650

(version_id2, options, parents) in \

1651

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1652

copy_queue):

1653

assert version_id == version_id2, 'logic error, inconsistent results'

1654

count = count + 1

1655

pb.update("Joining knit", count, total)

1656

raw_records.append((version_id, options, parents, len(raw_data)))

1657

raw_datum.append(raw_data)

1658

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1659

1660

for version in mismatched_versions:

1661

# FIXME RBC 20060309 is this needed?

1662

n1 = set(self.target.get_parents_with_ghosts(version))

1663

n2 = set(self.source.get_parents_with_ghosts(version))

1664

# write a combined record to our history preserving the current

1665

# parents as first in the list

1666

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1667

self.target.fix_parents(version, new_parents)

1668

return count

1669

finally:

1670

pb.finished()

1671

1672

1673

InterVersionedFile.register_optimiser(InterKnit)

1674

1675

1676

class WeaveToKnit(InterVersionedFile):

1677

"""Optimised code paths for weave to knit operations."""

1678

1679

_matching_file_from_factory = bzrlib.weave.WeaveFile

1680

_matching_file_to_factory = KnitVersionedFile

1681

1682

@staticmethod

1683

def is_compatible(source, target):

1684

"""Be compatible with weaves to knits."""

1685

try:

1686

return (isinstance(source, bzrlib.weave.Weave) and

1687

isinstance(target, KnitVersionedFile))

1688

except AttributeError:

1689

return False

1690

1691

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1692

"""See InterVersionedFile.join."""

1693

assert isinstance(self.source, bzrlib.weave.Weave)

1694

assert isinstance(self.target, KnitVersionedFile)

1695

1696

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1697

1698

if not version_ids:

1699

return 0

1700

1701

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1702

try:

1703

version_ids = list(version_ids)

1704

1705

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1706

this_versions = set(self.target._index.get_versions())

1707

needed_versions = self.source_ancestry - this_versions

1708

cross_check_versions = self.source_ancestry.intersection(this_versions)

1709

mismatched_versions = set()

1710

for version in cross_check_versions:

1711

# scan to include needed parents.

1712

n1 = set(self.target.get_parents_with_ghosts(version))

1713

n2 = set(self.source.get_parents(version))

1714

# if all of n2's parents are in n1, then its fine.

1715

if n2.difference(n1):

1716

# FIXME TEST this check for cycles being introduced works

1717

# the logic is we have a cycle if in our graph we are an

1718

# ancestor of any of the n2 revisions.

1719

for parent in n2:

1720

if parent in n1:

1721

# safe

1722

continue

1723

else:

1724

parent_ancestors = self.source.get_ancestry(parent)

1725

if version in parent_ancestors:

1726

raise errors.GraphCycleError([parent, version])

1727

# ensure this parent will be available later.

1728

new_parents = n2.difference(n1)

1729

needed_versions.update(new_parents.difference(this_versions))

1730

mismatched_versions.add(version)

1731

1732

if not needed_versions and not mismatched_versions:

1733

return 0

1734

full_list = topo_sort(self.source.get_graph())

1735

1736

version_list = [i for i in full_list if (not self.target.has_version(i)

1737

and i in needed_versions)]

1738

1739

# do the join:

1740

count = 0

1741

total = len(version_list)

1742

for version_id in version_list:

1743

pb.update("Converting to knit", count, total)

1744

parents = self.source.get_parents(version_id)

1745

# check that its will be a consistent copy:

1746

for parent in parents:

1747

# if source has the parent, we must already have it

1748

assert (self.target.has_version(parent))

1749

self.target.add_lines(

1750

version_id, parents, self.source.get_lines(version_id))

1751

count = count + 1

1752

1753

for version in mismatched_versions:

1754

# FIXME RBC 20060309 is this needed?

1755

n1 = set(self.target.get_parents_with_ghosts(version))

1756

n2 = set(self.source.get_parents(version))

1757

# write a combined record to our history preserving the current

1758

# parents as first in the list

1759

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1760

self.target.fix_parents(version, new_parents)

1761

return count

1762

finally:

1763

pb.finished()

1764

1765

1766

InterVersionedFile.register_optimiser(WeaveToKnit)

1767

1768

1769

class KnitSequenceMatcher(difflib.SequenceMatcher):

1770

"""Knit tuned sequence matcher.

1771

1772

This is based on profiling of difflib which indicated some improvements

1773

for our usage pattern.

1774

"""

1775

1776

def find_longest_match(self, alo, ahi, blo, bhi):

1777

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1778

1779

If isjunk is not defined:

1780

1781

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1782

alo <= i <= i+k <= ahi

1783

blo <= j <= j+k <= bhi

1784

and for all (i',j',k') meeting those conditions,

1785

k >= k'

1786

i <= i'

1787

and if i == i', j <= j'

1788

1789

In other words, of all maximal matching blocks, return one that

1790

starts earliest in a, and of all those maximal matching blocks that

1791

start earliest in a, return the one that starts earliest in b.

1792

1793

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1794

>>> s.find_longest_match(0, 5, 0, 9)

1795

(0, 4, 5)

1796

1797

If isjunk is defined, first the longest matching block is

1798

determined as above, but with the additional restriction that no

1799

junk element appears in the block. Then that block is extended as

1800

far as possible by matching (only) junk elements on both sides. So

1801

the resulting block never matches on junk except as identical junk

1802

happens to be adjacent to an "interesting" match.

1803

1804

Here's the same example as before, but considering blanks to be

1805

junk. That prevents " abcd" from matching the " abcd" at the tail

1806

end of the second sequence directly. Instead only the "abcd" can

1807

match, and matches the leftmost "abcd" in the second sequence:

1808

1809

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1810

>>> s.find_longest_match(0, 5, 0, 9)

1811

(1, 0, 4)

1812

1813

If no blocks match, return (alo, blo, 0).

1814

1815

>>> s = SequenceMatcher(None, "ab", "c")

1816

>>> s.find_longest_match(0, 2, 0, 1)

1817

(0, 0, 0)

1818

"""

1819

1820

# CAUTION: stripping common prefix or suffix would be incorrect.

1821

# E.g.,

1822

# ab

1823

# acab

1824

# Longest matching block is "ab", but if common prefix is

1825

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1826

# strip, so ends up claiming that ab is changed to acab by

1827

# inserting "ca" in the middle. That's minimal but unintuitive:

1828

# "it's obvious" that someone inserted "ac" at the front.

1829

# Windiff ends up at the same place as diff, but by pairing up

1830

# the unique 'b's and then matching the first two 'a's.

1831

1832

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1833

besti, bestj, bestsize = alo, blo, 0

1834

# find longest junk-free match

1835

# during an iteration of the loop, j2len[j] = length of longest

1836

# junk-free match ending with a[i-1] and b[j]

1837

j2len = {}

1838

# nothing = []

1839

b2jget = b2j.get

1840

for i in xrange(alo, ahi):

1841

# look at all instances of a[i] in b; note that because

1842

# b2j has no junk keys, the loop is skipped if a[i] is junk

1843

j2lenget = j2len.get

1844

newj2len = {}

1845

1846

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1847

# following improvement

1848

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1849

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1850

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1851

# to

1852

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1853

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1854

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1855

1856

try:

1857

js = b2j[a[i]]

1858

except KeyError:

1859

pass

1860

else:

1861

for j in js:

1862

# a[i] matches b[j]

1863

if j >= blo:

1864

if j >= bhi:

1865

break

1866

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1867

if k > bestsize:

1868

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1869

j2len = newj2len

1870

1871

# Extend the best by non-junk elements on each end. In particular,

1872

# "popular" non-junk elements aren't in b2j, which greatly speeds

1873

# the inner loop above, but also means "the best" match so far

1874

# doesn't contain any junk *or* popular non-junk elements.

1875

while besti > alo and bestj > blo and \

1876

not isbjunk(b[bestj-1]) and \

1877

a[besti-1] == b[bestj-1]:

1878

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1879

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1880

not isbjunk(b[bestj+bestsize]) and \

1881

a[besti+bestsize] == b[bestj+bestsize]:

1882

bestsize += 1

1883

1884

# Now that we have a wholly interesting match (albeit possibly

1885

# empty!), we may as well suck up the matching junk on each

1886

# side of it too. Can't think of a good reason not to, and it

1887

# saves post-processing the (possibly considerable) expense of

1888

# figuring out what to do with it. In the case of an empty

1889

# interesting match, this is clearly the right thing to do,

1890

# because no other kind of match is possible in the regions.

1891

while besti > alo and bestj > blo and \

1892

isbjunk(b[bestj-1]) and \

1893

a[besti-1] == b[bestj-1]:

1894

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1895

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1896

isbjunk(b[bestj+bestsize]) and \

1897

a[besti+bestsize] == b[bestj+bestsize]:

1898

bestsize = bestsize + 1

1899

1900

return besti, bestj, bestsize

1901

Older »