/brz/remove-bazaar : revision 2094.3.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2006-11-10 21:06:11 UTC
mto: This revision was merged to the branch mainline in revision 2159.
Revision ID: john@arbash-meinel.com-20061110210611-ee346404477ecb86

Fix imports to ensure modules are loaded before they are used

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.ui

import bzrlib.weave

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

100

101

# TODO: accommodate binaries, perhaps by storing a byte count

102

103

# TODO: function to check whole file

104

105

# TODO: atomically append data, then measure backwards from the cursor

106

# position after writing to work out where it was located. we may need to

107

# bypass python file buffering.

108

109

DATA_SUFFIX = '.knit'

110

INDEX_SUFFIX = '.kndx'

111

112

113

class KnitContent(object):

114

"""Content of a knit version to which deltas can be applied."""

115

116

def __init__(self, lines):

117

self._lines = lines

118

119

def annotate_iter(self):

120

"""Yield tuples of (origin, text) for each content line."""

121

for origin, text in self._lines:

122

yield origin, text

123

124

def annotate(self):

125

"""Return a list of (origin, text) tuples."""

126

return list(self.annotate_iter())

127

128

def line_delta_iter(self, new_lines):

129

"""Generate line-based delta from this content to new_lines."""

130

new_texts = [text for origin, text in new_lines._lines]

131

old_texts = [text for origin, text in self._lines]

132

s = KnitSequenceMatcher(None, old_texts, new_texts)

133

for op in s.get_opcodes():

134

if op[0] == 'equal':

135

continue

136

# ofrom oto length data

137

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

138

139

def line_delta(self, new_lines):

140

return list(self.line_delta_iter(new_lines))

141

142

def text(self):

143

return [text for origin, text in self._lines]

144

145

def copy(self):

146

return KnitContent(self._lines[:])

147

148

149

class _KnitFactory(object):

150

"""Base factory for creating content objects."""

151

152

def make(self, lines, version):

153

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

155

156

157

class KnitAnnotateFactory(_KnitFactory):

158

"""Factory for creating annotated Content objects."""

159

160

annotated = True

161

162

def parse_fulltext(self, content, version):

163

"""Convert fulltext to internal representation

164

165

fulltext content is of the format

166

revid(utf8) plaintext\n

167

internal representation is of the format:

168

(revid, plaintext)

169

"""

170

decode_utf8 = cache_utf8.decode

171

lines = []

172

for line in content:

173

origin, text = line.split(' ', 1)

174

lines.append((decode_utf8(origin), text))

175

return KnitContent(lines)

176

177

def parse_line_delta_iter(self, lines):

178

for result_item in self.parse_line_delta[lines]:

179

yield result_item

180

181

def parse_line_delta(self, lines, version):

182

"""Convert a line based delta into internal representation.

183

184

line delta is in the form of:

185

intstart intend intcount

186

1..count lines:

187

revid(utf8) newline\n

188

internal representation is

189

(start, end, count, [1..count tuples (revid, newline)])

190

"""

191

decode_utf8 = cache_utf8.decode

192

result = []

193

lines = iter(lines)

194

next = lines.next

195

# walk through the lines parsing.

196

for header in lines:

197

start, end, count = [int(n) for n in header.split(',')]

198

contents = []

199

remaining = count

200

while remaining:

201

origin, text = next().split(' ', 1)

202

remaining -= 1

203

contents.append((decode_utf8(origin), text))

204

result.append((start, end, count, contents))

205

return result

206

207

def lower_fulltext(self, content):

208

"""convert a fulltext content record into a serializable form.

209

210

see parse_fulltext which this inverts.

211

"""

212

encode_utf8 = cache_utf8.encode

213

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

214

215

def lower_line_delta(self, delta):

216

"""convert a delta into a serializable form.

217

218

See parse_line_delta which this inverts.

219

"""

220

encode_utf8 = cache_utf8.encode

221

out = []

222

for start, end, c, lines in delta:

223

out.append('%d,%d,%d\n' % (start, end, c))

224

out.extend(encode_utf8(origin) + ' ' + text

225

for origin, text in lines)

226

return out

227

228

229

class KnitPlainFactory(_KnitFactory):

230

"""Factory for creating plain Content objects."""

231

232

annotated = False

233

234

def parse_fulltext(self, content, version):

235

"""This parses an unannotated fulltext.

236

237

Note that this is not a noop - the internal representation

238

has (versionid, line) - its just a constant versionid.

239

"""

240

return self.make(content, version)

241

242

def parse_line_delta_iter(self, lines, version):

243

while lines:

244

header = lines.pop(0)

245

start, end, c = [int(n) for n in header.split(',')]

246

yield start, end, c, zip([version] * c, lines[:c])

247

del lines[:c]

248

249

def parse_line_delta(self, lines, version):

250

return list(self.parse_line_delta_iter(lines, version))

251

252

def lower_fulltext(self, content):

253

return content.text()

254

255

def lower_line_delta(self, delta):

256

out = []

257

for start, end, c, lines in delta:

258

out.append('%d,%d,%d\n' % (start, end, c))

259

out.extend([text for origin, text in lines])

260

return out

261

262

263

def make_empty_knit(transport, relpath):

264

"""Construct a empty knit at the specified location."""

265

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

266

k._data._open_file()

267

268

269

class KnitVersionedFile(VersionedFile):

270

"""Weave-like structure with faster random access.

271

272

A knit stores a number of texts and a summary of the relationships

273

between them. Texts are identified by a string version-id. Texts

274

are normally stored and retrieved as a series of lines, but can

275

also be passed as single strings.

276

277

Lines are stored with the trailing newline (if any) included, to

278

avoid special cases for files with no final newline. Lines are

279

composed of 8-bit characters, not unicode. The combination of

280

these approaches should mean any 'binary' file can be safely

281

stored and retrieved.

282

"""

283

284

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

285

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

286

create=False, create_parent_dir=False, delay_create=False,

287

dir_mode=None):

288

"""Construct a knit at location specified by relpath.

289

290

:param create: If not True, only open an existing knit.

291

:param create_parent_dir: If True, create the parent directory if

292

creating the file fails. (This is used for stores with

293

hash-prefixes that may not exist yet)

294

:param delay_create: The calling code is aware that the knit won't

295

actually be created until the first data is stored.

296

"""

297

if deprecated_passed(basis_knit):

298

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

299

" deprecated as of bzr 0.9.",

300

DeprecationWarning, stacklevel=2)

301

if access_mode is None:

302

access_mode = 'w'

303

super(KnitVersionedFile, self).__init__(access_mode)

304

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

305

self.transport = transport

306

self.filename = relpath

307

self.factory = factory or KnitAnnotateFactory()

308

self.writable = (access_mode == 'w')

309

self.delta = delta

310

311

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

312

access_mode, create=create, file_mode=file_mode,

313

create_parent_dir=create_parent_dir, delay_create=delay_create,

314

dir_mode=dir_mode)

315

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

316

access_mode, create=create and not len(self), file_mode=file_mode,

317

create_parent_dir=create_parent_dir, delay_create=delay_create,

318

dir_mode=dir_mode)

319

320

def __repr__(self):

321

return '%s(%s)' % (self.__class__.__name__,

322

self.transport.abspath(self.filename))

323

324

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

325

"""See VersionedFile._add_delta()."""

326

self._check_add(version_id, []) # should we check the lines ?

327

self._check_versions_present(parents)

328

present_parents = []

329

ghosts = []

330

parent_texts = {}

331

for parent in parents:

332

if not self.has_version(parent):

333

ghosts.append(parent)

334

else:

335

present_parents.append(parent)

336

337

if delta_parent is None:

338

# reconstitute as full text.

339

assert len(delta) == 1 or len(delta) == 0

340

if len(delta):

341

assert delta[0][0] == 0

342

assert delta[0][1] == 0, delta[0][1]

343

return super(KnitVersionedFile, self)._add_delta(version_id,

344

parents,

345

delta_parent,

346

sha1,

347

noeol,

348

delta)

349

350

digest = sha1

351

352

options = []

353

if noeol:

354

options.append('no-eol')

355

356

if delta_parent is not None:

357

# determine the current delta chain length.

358

# To speed the extract of texts the delta chain is limited

359

# to a fixed number of deltas. This should minimize both

360

# I/O and the time spend applying deltas.

361

count = 0

362

delta_parents = [delta_parent]

363

while count < 25:

364

parent = delta_parents[0]

365

method = self._index.get_method(parent)

366

if method == 'fulltext':

367

break

368

delta_parents = self._index.get_parents(parent)

369

count = count + 1

370

if method == 'line-delta':

371

# did not find a fulltext in the delta limit.

372

# just do a normal insertion.

373

return super(KnitVersionedFile, self)._add_delta(version_id,

374

parents,

375

delta_parent,

376

sha1,

377

noeol,

378

delta)

379

380

options.append('line-delta')

381

store_lines = self.factory.lower_line_delta(delta)

382

383

where, size = self._data.add_record(version_id, digest, store_lines)

384

self._index.add_version(version_id, options, where, size, parents)

385

386

def _add_raw_records(self, records, data):

387

"""Add all the records 'records' with data pre-joined in 'data'.

388

389

:param records: A list of tuples(version_id, options, parents, size).

390

:param data: The data for the records. When it is written, the records

391

are adjusted to have pos pointing into data by the sum of

392

the preceding records sizes.

393

"""

394

# write all the data

395

pos = self._data.add_raw_record(data)

396

offset = 0

397

index_entries = []

398

for (version_id, options, parents, size) in records:

399

index_entries.append((version_id, options, pos+offset,

400

size, parents))

401

if self._data._do_cache:

402

self._data._cache[version_id] = data[offset:offset+size]

403

offset += size

404

self._index.add_versions(index_entries)

405

406

def enable_cache(self):

407

"""Start caching data for this knit"""

408

self._data.enable_cache()

409

410

def clear_cache(self):

411

"""Clear the data cache only."""

412

self._data.clear_cache()

413

414

def copy_to(self, name, transport):

415

"""See VersionedFile.copy_to()."""

416

# copy the current index to a temp index to avoid racing with local

417

# writes

418

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

419

self.transport.get(self._index._filename))

420

# copy the data file

421

f = self._data._open_file()

422

try:

423

transport.put_file(name + DATA_SUFFIX, f)

424

finally:

425

f.close()

426

# move the copied index into place

427

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

428

429

def create_empty(self, name, transport, mode=None):

430

return KnitVersionedFile(name, transport, factory=self.factory,

431

delta=self.delta, create=True)

432

433

def _fix_parents(self, version, new_parents):

434

"""Fix the parents list for version.

435

436

This is done by appending a new version to the index

437

with identical data except for the parents list.

438

the parents list must be a superset of the current

439

list.

440

"""

441

current_values = self._index._cache[version]

442

assert set(current_values[4]).difference(set(new_parents)) == set()

443

self._index.add_version(version,

444

current_values[1],

445

current_values[2],

446

current_values[3],

447

new_parents)

448

449

def get_delta(self, version_id):

450

"""Get a delta for constructing version from some other version."""

451

if not self.has_version(version_id):

452

raise RevisionNotPresent(version_id, self.filename)

453

454

parents = self.get_parents(version_id)

455

if len(parents):

456

parent = parents[0]

457

else:

458

parent = None

459

data_pos, data_size = self._index.get_position(version_id)

460

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

461

version_idx = self._index.lookup(version_id)

462

noeol = 'no-eol' in self._index.get_options(version_id)

463

if 'fulltext' == self._index.get_method(version_id):

464

new_content = self.factory.parse_fulltext(data, version_idx)

465

if parent is not None:

466

reference_content = self._get_content(parent)

467

old_texts = reference_content.text()

468

else:

469

old_texts = []

470

new_texts = new_content.text()

471

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

472

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

473

else:

474

delta = self.factory.parse_line_delta(data, version_idx)

475

return parent, sha1, noeol, delta

476

477

def get_graph_with_ghosts(self):

478

"""See VersionedFile.get_graph_with_ghosts()."""

479

graph_items = self._index.get_graph()

480

return dict(graph_items)

481

482

def get_sha1(self, version_id):

483

"""See VersionedFile.get_sha1()."""

484

record_map = self._get_record_map([version_id])

485

method, content, digest, next = record_map[version_id]

486

return digest

487

488

@staticmethod

489

def get_suffixes():

490

"""See VersionedFile.get_suffixes()."""

491

return [DATA_SUFFIX, INDEX_SUFFIX]

492

493

def has_ghost(self, version_id):

494

"""True if there is a ghost reference in the file to version_id."""

495

# maybe we have it

496

if self.has_version(version_id):

497

return False

498

# optimisable if needed by memoising the _ghosts set.

499

items = self._index.get_graph()

500

for node, parents in items:

501

for parent in parents:

502

if parent not in self._index._cache:

503

if parent == version_id:

504

return True

505

return False

506

507

def versions(self):

508

"""See VersionedFile.versions."""

509

return self._index.get_versions()

510

511

def has_version(self, version_id):

512

"""See VersionedFile.has_version."""

513

return self._index.has_version(version_id)

514

515

__contains__ = has_version

516

517

def _merge_annotations(self, content, parents, parent_texts={},

518

delta=None, annotated=None):

519

"""Merge annotations for content. This is done by comparing

520

the annotations based on changed to the text.

521

"""

522

if annotated:

523

delta_seq = None

524

for parent_id in parents:

525

merge_content = self._get_content(parent_id, parent_texts)

526

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

527

if delta_seq is None:

528

# setup a delta seq to reuse.

529

delta_seq = seq

530

for i, j, n in seq.get_matching_blocks():

531

if n == 0:

532

continue

533

# this appears to copy (origin, text) pairs across to the new

534

# content for any line that matches the last-checked parent.

535

# FIXME: save the sequence control data for delta compression

536

# against the most relevant parent rather than rediffing.

537

content._lines[j:j+n] = merge_content._lines[i:i+n]

538

if delta:

539

if not annotated:

540

reference_content = self._get_content(parents[0], parent_texts)

541

new_texts = content.text()

542

old_texts = reference_content.text()

543

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

544

return self._make_line_delta(delta_seq, content)

545

546

def _make_line_delta(self, delta_seq, new_content):

547

"""Generate a line delta from delta_seq and new_content."""

548

diff_hunks = []

549

for op in delta_seq.get_opcodes():

550

if op[0] == 'equal':

551

continue

552

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

553

return diff_hunks

554

555

def _get_components_positions(self, version_ids):

556

"""Produce a map of position data for the components of versions.

557

558

This data is intended to be used for retrieving the knit records.

559

560

A dict of version_id to (method, data_pos, data_size, next) is

561

returned.

562

method is the way referenced data should be applied.

563

data_pos is the position of the data in the knit.

564

data_size is the size of the data in the knit.

565

next is the build-parent of the version, or None for fulltexts.

566

"""

567

component_data = {}

568

for version_id in version_ids:

569

cursor = version_id

570

571

while cursor is not None and cursor not in component_data:

572

method = self._index.get_method(cursor)

573

if method == 'fulltext':

574

next = None

575

else:

576

next = self.get_parents(cursor)[0]

577

data_pos, data_size = self._index.get_position(cursor)

578

component_data[cursor] = (method, data_pos, data_size, next)

579

cursor = next

580

return component_data

581

582

def _get_content(self, version_id, parent_texts={}):

583

"""Returns a content object that makes up the specified

584

version."""

585

if not self.has_version(version_id):

586

raise RevisionNotPresent(version_id, self.filename)

587

588

cached_version = parent_texts.get(version_id, None)

589

if cached_version is not None:

590

return cached_version

591

592

text_map, contents_map = self._get_content_maps([version_id])

593

return contents_map[version_id]

594

595

def _check_versions_present(self, version_ids):

596

"""Check that all specified versions are present."""

597

version_ids = set(version_ids)

598

for r in list(version_ids):

599

if self._index.has_version(r):

600

version_ids.remove(r)

601

if version_ids:

602

raise RevisionNotPresent(list(version_ids)[0], self.filename)

603

604

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

605

"""See VersionedFile.add_lines_with_ghosts()."""

606

self._check_add(version_id, lines)

607

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

608

609

def _add_lines(self, version_id, parents, lines, parent_texts):

610

"""See VersionedFile.add_lines."""

611

self._check_add(version_id, lines)

612

self._check_versions_present(parents)

613

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

614

615

def _check_add(self, version_id, lines):

616

"""check that version_id and lines are safe to add."""

617

assert self.writable, "knit is not opened for write"

618

### FIXME escape. RBC 20060228

619

if contains_whitespace(version_id):

620

raise InvalidRevisionId(version_id, self.filename)

621

if self.has_version(version_id):

622

raise RevisionAlreadyPresent(version_id, self.filename)

623

self._check_lines_not_unicode(lines)

624

self._check_lines_are_lines(lines)

625

626

def _add(self, version_id, lines, parents, delta, parent_texts):

627

"""Add a set of lines on top of version specified by parents.

628

629

If delta is true, compress the text as a line-delta against

630

the first parent.

631

632

Any versions not present will be converted into ghosts.

633

"""

634

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

635

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

636

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

637

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

638

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

639

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

640

# +1383 0 8.0370 8.0370 +<len>

641

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

642

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

643

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

644

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

645

646

present_parents = []

647

ghosts = []

648

if parent_texts is None:

649

parent_texts = {}

650

for parent in parents:

651

if not self.has_version(parent):

652

ghosts.append(parent)

653

else:

654

present_parents.append(parent)

655

656

if delta and not len(present_parents):

657

delta = False

658

659

digest = sha_strings(lines)

660

options = []

661

if lines:

662

if lines[-1][-1] != '\n':

663

options.append('no-eol')

664

lines[-1] = lines[-1] + '\n'

665

666

if len(present_parents) and delta:

667

# To speed the extract of texts the delta chain is limited

668

# to a fixed number of deltas. This should minimize both

669

# I/O and the time spend applying deltas.

670

count = 0

671

delta_parents = present_parents

672

while count < 25:

673

parent = delta_parents[0]

674

method = self._index.get_method(parent)

675

if method == 'fulltext':

676

break

677

delta_parents = self._index.get_parents(parent)

678

count = count + 1

679

if method == 'line-delta':

680

delta = False

681

682

lines = self.factory.make(lines, version_id)

683

if delta or (self.factory.annotated and len(present_parents) > 0):

684

# Merge annotations from parent texts if so is needed.

685

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

686

delta, self.factory.annotated)

687

688

if delta:

689

options.append('line-delta')

690

store_lines = self.factory.lower_line_delta(delta_hunks)

691

else:

692

options.append('fulltext')

693

store_lines = self.factory.lower_fulltext(lines)

694

695

where, size = self._data.add_record(version_id, digest, store_lines)

696

self._index.add_version(version_id, options, where, size, parents)

697

return lines

698

699

def check(self, progress_bar=None):

700

"""See VersionedFile.check()."""

701

702

def _clone_text(self, new_version_id, old_version_id, parents):

703

"""See VersionedFile.clone_text()."""

704

# FIXME RBC 20060228 make fast by only inserting an index with null

705

# delta.

706

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

707

708

def get_lines(self, version_id):

709

"""See VersionedFile.get_lines()."""

710

return self.get_line_list([version_id])[0]

711

712

def _get_record_map(self, version_ids):

713

"""Produce a dictionary of knit records.

714

715

The keys are version_ids, the values are tuples of (method, content,

716

digest, next).

717

method is the way the content should be applied.

718

content is a KnitContent object.

719

digest is the SHA1 digest of this version id after all steps are done

720

next is the build-parent of the version, i.e. the leftmost ancestor.

721

If the method is fulltext, next will be None.

722

"""

723

position_map = self._get_components_positions(version_ids)

724

# c = component_id, m = method, p = position, s = size, n = next

725

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

726

record_map = {}

727

for component_id, content, digest in \

728

self._data.read_records_iter(records):

729

method, position, size, next = position_map[component_id]

730

record_map[component_id] = method, content, digest, next

731

732

return record_map

733

734

def get_text(self, version_id):

735

"""See VersionedFile.get_text"""

736

return self.get_texts([version_id])[0]

737

738

def get_texts(self, version_ids):

739

return [''.join(l) for l in self.get_line_list(version_ids)]

740

741

def get_line_list(self, version_ids):

742

"""Return the texts of listed versions as a list of strings."""

743

text_map, content_map = self._get_content_maps(version_ids)

744

return [text_map[v] for v in version_ids]

745

746

def _get_content_maps(self, version_ids):

747

"""Produce maps of text and KnitContents

748

749

:return: (text_map, content_map) where text_map contains the texts for

750

the requested versions and content_map contains the KnitContents.

751

Both dicts take version_ids as their keys.

752

"""

753

for version_id in version_ids:

754

if not self.has_version(version_id):

755

raise RevisionNotPresent(version_id, self.filename)

756

record_map = self._get_record_map(version_ids)

757

758

text_map = {}

759

content_map = {}

760

final_content = {}

761

for version_id in version_ids:

762

components = []

763

cursor = version_id

764

while cursor is not None:

765

method, data, digest, next = record_map[cursor]

766

components.append((cursor, method, data, digest))

767

if cursor in content_map:

768

break

769

cursor = next

770

771

content = None

772

for component_id, method, data, digest in reversed(components):

773

if component_id in content_map:

774

content = content_map[component_id]

775

else:

776

version_idx = self._index.lookup(component_id)

777

if method == 'fulltext':

778

assert content is None

779

content = self.factory.parse_fulltext(data, version_idx)

780

elif method == 'line-delta':

781

delta = self.factory.parse_line_delta(data[:],

782

version_idx)

783

content = content.copy()

784

content._lines = self._apply_delta(content._lines,

785

delta)

786

content_map[component_id] = content

787

788

if 'no-eol' in self._index.get_options(version_id):

789

content = content.copy()

790

line = content._lines[-1][1].rstrip('\n')

791

content._lines[-1] = (content._lines[-1][0], line)

792

final_content[version_id] = content

793

794

# digest here is the digest from the last applied component.

795

text = content.text()

796

if sha_strings(text) != digest:

797

raise KnitCorrupt(self.filename,

798

'sha-1 does not match %s' % version_id)

799

800

text_map[version_id] = text

801

return text_map, final_content

802

803

def iter_lines_added_or_present_in_versions(self, version_ids=None,

804

pb=None):

805

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

806

if version_ids is None:

807

version_ids = self.versions()

808

if pb is None:

809

pb = progress.DummyProgress()

810

# we don't care about inclusions, the caller cares.

811

# but we need to setup a list of records to visit.

812

# we need version_id, position, length

813

version_id_records = []

814

requested_versions = list(version_ids)

815

# filter for available versions

816

for version_id in requested_versions:

817

if not self.has_version(version_id):

818

raise RevisionNotPresent(version_id, self.filename)

819

# get a in-component-order queue:

820

version_ids = []

821

for version_id in self.versions():

822

if version_id in requested_versions:

823

version_ids.append(version_id)

824

data_pos, length = self._index.get_position(version_id)

825

version_id_records.append((version_id, data_pos, length))

826

827

count = 0

828

total = len(version_id_records)

829

pb.update('Walking content.', count, total)

830

for version_id, data, sha_value in \

831

self._data.read_records_iter(version_id_records):

832

pb.update('Walking content.', count, total)

833

method = self._index.get_method(version_id)

834

version_idx = self._index.lookup(version_id)

835

assert method in ('fulltext', 'line-delta')

836

if method == 'fulltext':

837

content = self.factory.parse_fulltext(data, version_idx)

838

for line in content.text():

839

yield line

840

else:

841

delta = self.factory.parse_line_delta(data, version_idx)

842

for start, end, count, lines in delta:

843

for origin, line in lines:

844

yield line

845

count +=1

846

pb.update('Walking content.', total, total)

847

848

def num_versions(self):

849

"""See VersionedFile.num_versions()."""

850

return self._index.num_versions()

851

852

__len__ = num_versions

853

854

def annotate_iter(self, version_id):

855

"""See VersionedFile.annotate_iter."""

856

content = self._get_content(version_id)

857

for origin, text in content.annotate_iter():

858

yield origin, text

859

860

def get_parents(self, version_id):

861

"""See VersionedFile.get_parents."""

862

# perf notes:

863

# optimism counts!

864

# 52554 calls in 1264 872 internal down from 3674

865

try:

866

return self._index.get_parents(version_id)

867

except KeyError:

868

raise RevisionNotPresent(version_id, self.filename)

869

870

def get_parents_with_ghosts(self, version_id):

871

"""See VersionedFile.get_parents."""

872

try:

873

return self._index.get_parents_with_ghosts(version_id)

874

except KeyError:

875

raise RevisionNotPresent(version_id, self.filename)

876

877

def get_ancestry(self, versions):

878

"""See VersionedFile.get_ancestry."""

879

if isinstance(versions, basestring):

880

versions = [versions]

881

if not versions:

882

return []

883

self._check_versions_present(versions)

884

return self._index.get_ancestry(versions)

885

886

def get_ancestry_with_ghosts(self, versions):

887

"""See VersionedFile.get_ancestry_with_ghosts."""

888

if isinstance(versions, basestring):

889

versions = [versions]

890

if not versions:

891

return []

892

self._check_versions_present(versions)

893

return self._index.get_ancestry_with_ghosts(versions)

894

895

#@deprecated_method(zero_eight)

896

def walk(self, version_ids):

897

"""See VersionedFile.walk."""

898

# We take the short path here, and extract all relevant texts

899

# and put them in a weave and let that do all the work. Far

900

# from optimal, but is much simpler.

901

# FIXME RB 20060228 this really is inefficient!

902

from bzrlib.weave import Weave

903

904

w = Weave(self.filename)

905

ancestry = self.get_ancestry(version_ids)

906

sorted_graph = topo_sort(self._index.get_graph())

907

version_list = [vid for vid in sorted_graph if vid in ancestry]

908

909

for version_id in version_list:

910

lines = self.get_lines(version_id)

911

w.add_lines(version_id, self.get_parents(version_id), lines)

912

913

for lineno, insert_id, dset, line in w.walk(version_ids):

914

yield lineno, insert_id, dset, line

915

916

def plan_merge(self, ver_a, ver_b):

917

"""See VersionedFile.plan_merge."""

918

ancestors_b = set(self.get_ancestry(ver_b))

919

def status_a(revision, text):

920

if revision in ancestors_b:

921

return 'killed-b', text

922

else:

923

return 'new-a', text

924

925

ancestors_a = set(self.get_ancestry(ver_a))

926

def status_b(revision, text):

927

if revision in ancestors_a:

928

return 'killed-a', text

929

else:

930

return 'new-b', text

931

932

annotated_a = self.annotate(ver_a)

933

annotated_b = self.annotate(ver_b)

934

plain_a = [t for (a, t) in annotated_a]

935

plain_b = [t for (a, t) in annotated_b]

936

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

937

a_cur = 0

938

b_cur = 0

939

for ai, bi, l in blocks:

940

# process all mismatched sections

941

# (last mismatched section is handled because blocks always

942

# includes a 0-length last block)

943

for revision, text in annotated_a[a_cur:ai]:

944

yield status_a(revision, text)

945

for revision, text in annotated_b[b_cur:bi]:

946

yield status_b(revision, text)

947

948

# and now the matched section

949

a_cur = ai + l

950

b_cur = bi + l

951

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

952

assert text_a == text_b

953

yield "unchanged", text_a

954

955

956

class _KnitComponentFile(object):

957

"""One of the files used to implement a knit database"""

958

959

def __init__(self, transport, filename, mode, file_mode=None,

960

create_parent_dir=False, dir_mode=None):

961

self._transport = transport

962

self._filename = filename

963

self._mode = mode

964

self._file_mode = file_mode

965

self._dir_mode = dir_mode

966

self._create_parent_dir = create_parent_dir

967

self._need_to_create = False

968

969

def check_header(self, fp):

970

line = fp.readline()

971

if line != self.HEADER:

972

raise KnitHeaderError(badline=line)

973

974

def commit(self):

975

"""Commit is a nop."""

976

977

def __repr__(self):

978

return '%s(%s)' % (self.__class__.__name__, self._filename)

979

980

981

class _KnitIndex(_KnitComponentFile):

982

"""Manages knit index file.

983

984

The index is already kept in memory and read on startup, to enable

985

fast lookups of revision information. The cursor of the index

986

file is always pointing to the end, making it easy to append

987

entries.

988

989

_cache is a cache for fast mapping from version id to a Index

990

object.

991

992

_history is a cache for fast mapping from indexes to version ids.

993

994

The index data format is dictionary compressed when it comes to

995

parent references; a index entry may only have parents that with a

996

lover index number. As a result, the index is topological sorted.

997

998

Duplicate entries may be written to the index for a single version id

999

if this is done then the latter one completely replaces the former:

1000

this allows updates to correct version and parent information.

1001

Note that the two entries may share the delta, and that successive

1002

annotations and references MUST point to the first entry.

1003

1004

The index file on disc contains a header, followed by one line per knit

1005

record. The same revision can be present in an index file more than once.

1006

The first occurrence gets assigned a sequence number starting from 0.

1007

1008

The format of a single line is

1009

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1010

REVISION_ID is a utf8-encoded revision id

1011

FLAGS is a comma separated list of flags about the record. Values include

1012

no-eol, line-delta, fulltext.

1013

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1014

that the the compressed data starts at.

1015

LENGTH is the ascii representation of the length of the data file.

1016

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1017

REVISION_ID.

1018

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1019

revision id already in the knit that is a parent of REVISION_ID.

1020

The ' :' marker is the end of record marker.

1021

1022

partial writes:

1023

when a write is interrupted to the index file, it will result in a line that

1024

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1025

the end of the file, then the record that is missing it will be ignored by

1026

the parser.

1027

1028

When writing new records to the index file, the data is preceded by '\n'

1029

to ensure that records always start on new lines even if the last write was

1030

interrupted. As a result its normal for the last line in the index to be

1031

missing a trailing newline. One can be added with no harmful effects.

1032

"""

1033

1034

HEADER = "# bzr knit index 8\n"

1035

1036

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1037

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1038

1039

def _cache_version(self, version_id, options, pos, size, parents):

1040

"""Cache a version record in the history array and index cache.

1041

1042

This is inlined into __init__ for performance. KEEP IN SYNC.

1043

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1044

indexes).

1045

"""

1046

# only want the _history index to reference the 1st index entry

1047

# for version_id

1048

if version_id not in self._cache:

1049

index = len(self._history)

1050

self._history.append(version_id)

1051

else:

1052

index = self._cache[version_id][5]

1053

self._cache[version_id] = (version_id,

1054

options,

1055

pos,

1056

size,

1057

parents,

1058

index)

1059

1060

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1061

create_parent_dir=False, delay_create=False, dir_mode=None):

1062

_KnitComponentFile.__init__(self, transport, filename, mode,

1063

file_mode=file_mode,

1064

create_parent_dir=create_parent_dir,

1065

dir_mode=dir_mode)

1066

self._cache = {}

1067

# position in _history is the 'official' index for a revision

1068

# but the values may have come from a newer entry.

1069

# so - wc -l of a knit index is != the number of unique names

1070

# in the knit.

1071

self._history = []

1072

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1073

try:

1074

count = 0

1075

total = 1

1076

try:

1077

pb.update('read knit index', count, total)

1078

fp = self._transport.get(self._filename)

1079

try:

1080

self.check_header(fp)

1081

# readlines reads the whole file at once:

1082

# bad for transports like http, good for local disk

1083

# we save 60 ms doing this one change (

1084

# from calling readline each time to calling

1085

# readlines once.

1086

# probably what we want for nice behaviour on

1087

# http is a incremental readlines that yields, or

1088

# a check for local vs non local indexes,

1089

for l in fp.readlines():

1090

rec = l.split()

1091

if len(rec) < 5 or rec[-1] != ':':

1092

# corrupt line.

1093

# FIXME: in the future we should determine if its a

1094

# short write - and ignore it

1095

# or a different failure, and raise. RBC 20060407

1096

continue

1097

count += 1

1098

total += 1

1099

#pb.update('read knit index', count, total)

1100

# See self._parse_parents

1101

parents = []

1102

for value in rec[4:-1]:

1103

if '.' == value[0]:

1104

# uncompressed reference

1105

parents.append(value[1:])

1106

else:

1107

# this is 15/4000ms faster than isinstance,

1108

# (in lsprof)

1109

# this function is called thousands of times a

1110

# second so small variations add up.

1111

assert value.__class__ is str

1112

parents.append(self._history[int(value)])

1113

# end self._parse_parents

1114

# self._cache_version(rec[0],

1115

# rec[1].split(','),

1116

# int(rec[2]),

1117

# int(rec[3]),

1118

# parents)

1119

# --- self._cache_version

1120

# only want the _history index to reference the 1st

1121

# index entry for version_id

1122

version_id = rec[0]

1123

if version_id not in self._cache:

1124

index = len(self._history)

1125

self._history.append(version_id)

1126

else:

1127

index = self._cache[version_id][5]

1128

self._cache[version_id] = (version_id,

1129

rec[1].split(','),

1130

int(rec[2]),

1131

int(rec[3]),

1132

parents,

1133

index)

1134

# --- self._cache_version

1135

finally:

1136

fp.close()

1137

except NoSuchFile, e:

1138

if mode != 'w' or not create:

1139

raise

1140

if delay_create:

1141

self._need_to_create = True

1142

else:

1143

self._transport.put_bytes_non_atomic(self._filename,

1144

self.HEADER, mode=self._file_mode)

1145

1146

finally:

1147

pb.update('read knit index', total, total)

1148

pb.finished()

1149

1150

def _parse_parents(self, compressed_parents):

1151

"""convert a list of string parent values into version ids.

1152

1153

ints are looked up in the index.

1154

.FOO values are ghosts and converted in to FOO.

1155

1156

NOTE: the function is retained here for clarity, and for possible

1157

use in partial index reads. However bulk processing now has

1158

it inlined in __init__ for inner-loop optimisation.

1159

"""

1160

result = []

1161

for value in compressed_parents:

1162

if value[-1] == '.':

1163

# uncompressed reference

1164

result.append(value[1:])

1165

else:

1166

# this is 15/4000ms faster than isinstance,

1167

# this function is called thousands of times a

1168

# second so small variations add up.

1169

assert value.__class__ is str

1170

result.append(self._history[int(value)])

1171

return result

1172

1173

def get_graph(self):

1174

graph = []

1175

for version_id, index in self._cache.iteritems():

1176

graph.append((version_id, index[4]))

1177

return graph

1178

1179

def get_ancestry(self, versions):

1180

"""See VersionedFile.get_ancestry."""

1181

# get a graph of all the mentioned versions:

1182

graph = {}

1183

pending = set(versions)

1184

while len(pending):

1185

version = pending.pop()

1186

parents = self._cache[version][4]

1187

# got the parents ok

1188

# trim ghosts

1189

parents = [parent for parent in parents if parent in self._cache]

1190

for parent in parents:

1191

# if not completed and not a ghost

1192

if parent not in graph:

1193

pending.add(parent)

1194

graph[version] = parents

1195

return topo_sort(graph.items())

1196

1197

def get_ancestry_with_ghosts(self, versions):

1198

"""See VersionedFile.get_ancestry_with_ghosts."""

1199

# get a graph of all the mentioned versions:

1200

graph = {}

1201

pending = set(versions)

1202

while len(pending):

1203

version = pending.pop()

1204

try:

1205

parents = self._cache[version][4]

1206

except KeyError:

1207

# ghost, fake it

1208

graph[version] = []

1209

pass

1210

else:

1211

# got the parents ok

1212

for parent in parents:

1213

if parent not in graph:

1214

pending.add(parent)

1215

graph[version] = parents

1216

return topo_sort(graph.items())

1217

1218

def num_versions(self):

1219

return len(self._history)

1220

1221

__len__ = num_versions

1222

1223

def get_versions(self):

1224

return self._history

1225

1226

def idx_to_name(self, idx):

1227

return self._history[idx]

1228

1229

def lookup(self, version_id):

1230

assert version_id in self._cache

1231

return self._cache[version_id][5]

1232

1233

def _version_list_to_index(self, versions):

1234

encode_utf8 = cache_utf8.encode

1235

result_list = []

1236

for version in versions:

1237

if version in self._cache:

1238

# -- inlined lookup() --

1239

result_list.append(str(self._cache[version][5]))

1240

# -- end lookup () --

1241

else:

1242

result_list.append('.' + encode_utf8(version))

1243

return ' '.join(result_list)

1244

1245

def add_version(self, version_id, options, pos, size, parents):

1246

"""Add a version record to the index."""

1247

self.add_versions(((version_id, options, pos, size, parents),))

1248

1249

def add_versions(self, versions):

1250

"""Add multiple versions to the index.

1251

1252

:param versions: a list of tuples:

1253

(version_id, options, pos, size, parents).

1254

"""

1255

lines = []

1256

encode_utf8 = cache_utf8.encode

1257

for version_id, options, pos, size, parents in versions:

1258

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1259

','.join(options),

1260

pos,

1261

size,

1262

self._version_list_to_index(parents))

1263

assert isinstance(line, str), \

1264

'content must be utf-8 encoded: %r' % (line,)

1265

lines.append(line)

1266

if not self._need_to_create:

1267

self._transport.append_bytes(self._filename, ''.join(lines))

1268

else:

1269

sio = StringIO()

1270

sio.write(self.HEADER)

1271

sio.writelines(lines)

1272

sio.seek(0)

1273

self._transport.put_file_non_atomic(self._filename, sio,

1274

create_parent_dir=self._create_parent_dir,

1275

mode=self._file_mode,

1276

dir_mode=self._dir_mode)

1277

self._need_to_create = False

1278

1279

# cache after writing, so that a failed write leads to missing cache

1280

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1281

# failure, reload the index or flush it or some such, to prevent

1282

# writing records that did complete twice.

1283

for version_id, options, pos, size, parents in versions:

1284

self._cache_version(version_id, options, pos, size, parents)

1285

1286

def has_version(self, version_id):

1287

"""True if the version is in the index."""

1288

return (version_id in self._cache)

1289

1290

def get_position(self, version_id):

1291

"""Return data position and size of specified version."""

1292

return (self._cache[version_id][2], \

1293

self._cache[version_id][3])

1294

1295

def get_method(self, version_id):

1296

"""Return compression method of specified version."""

1297

options = self._cache[version_id][1]

1298

if 'fulltext' in options:

1299

return 'fulltext'

1300

else:

1301

assert 'line-delta' in options

1302

return 'line-delta'

1303

1304

def get_options(self, version_id):

1305

return self._cache[version_id][1]

1306

1307

def get_parents(self, version_id):

1308

"""Return parents of specified version ignoring ghosts."""

1309

return [parent for parent in self._cache[version_id][4]

1310

if parent in self._cache]

1311

1312

def get_parents_with_ghosts(self, version_id):

1313

"""Return parents of specified version with ghosts."""

1314

return self._cache[version_id][4]

1315

1316

def check_versions_present(self, version_ids):

1317

"""Check that all specified versions are present."""

1318

version_ids = set(version_ids)

1319

for version_id in list(version_ids):

1320

if version_id in self._cache:

1321

version_ids.remove(version_id)

1322

if version_ids:

1323

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1324

1325

1326

class _KnitData(_KnitComponentFile):

1327

"""Contents of the knit data file"""

1328

1329

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1330

create_parent_dir=False, delay_create=False,

1331

dir_mode=None):

1332

_KnitComponentFile.__init__(self, transport, filename, mode,

1333

file_mode=file_mode,

1334

create_parent_dir=create_parent_dir,

1335

dir_mode=dir_mode)

1336

self._checked = False

1337

# TODO: jam 20060713 conceptually, this could spill to disk

1338

# if the cached size gets larger than a certain amount

1339

# but it complicates the model a bit, so for now just use

1340

# a simple dictionary

1341

self._cache = {}

1342

self._do_cache = False

1343

if create:

1344

if delay_create:

1345

self._need_to_create = create

1346

else:

1347

self._transport.put_bytes_non_atomic(self._filename, '',

1348

mode=self._file_mode)

1349

1350

def enable_cache(self):

1351

"""Enable caching of reads."""

1352

self._do_cache = True

1353

1354

def clear_cache(self):

1355

"""Clear the record cache."""

1356

self._do_cache = False

1357

self._cache = {}

1358

1359

def _open_file(self):

1360

try:

1361

return self._transport.get(self._filename)

1362

except NoSuchFile:

1363

pass

1364

return None

1365

1366

def _record_to_data(self, version_id, digest, lines):

1367

"""Convert version_id, digest, lines into a raw data block.

1368

1369

:return: (len, a StringIO instance with the raw data ready to read.)

1370

"""

1371

sio = StringIO()

1372

data_file = GzipFile(None, mode='wb', fileobj=sio)

1373

1374

version_id_utf8 = cache_utf8.encode(version_id)

1375

data_file.writelines(chain(

1376

["version %s %d %s\n" % (version_id_utf8,

1377

len(lines),

1378

digest)],

1379

lines,

1380

["end %s\n" % version_id_utf8]))

1381

data_file.close()

1382

length= sio.tell()

1383

1384

sio.seek(0)

1385

return length, sio

1386

1387

def add_raw_record(self, raw_data):

1388

"""Append a prepared record to the data file.

1389

1390

:return: the offset in the data file raw_data was written.

1391

"""

1392

assert isinstance(raw_data, str), 'data must be plain bytes'

1393

if not self._need_to_create:

1394

return self._transport.append_bytes(self._filename, raw_data)

1395

else:

1396

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1397

create_parent_dir=self._create_parent_dir,

1398

mode=self._file_mode,

1399

dir_mode=self._dir_mode)

1400

self._need_to_create = False

1401

return 0

1402

1403

def add_record(self, version_id, digest, lines):

1404

"""Write new text record to disk. Returns the position in the

1405

file where it was written."""

1406

size, sio = self._record_to_data(version_id, digest, lines)

1407

# write to disk

1408

if not self._need_to_create:

1409

start_pos = self._transport.append_file(self._filename, sio)

1410

else:

1411

self._transport.put_file_non_atomic(self._filename, sio,

1412

create_parent_dir=self._create_parent_dir,

1413

mode=self._file_mode,

1414

dir_mode=self._dir_mode)

1415

self._need_to_create = False

1416

start_pos = 0

1417

if self._do_cache:

1418

self._cache[version_id] = sio.getvalue()

1419

return start_pos, size

1420

1421

def _parse_record_header(self, version_id, raw_data):

1422

"""Parse a record header for consistency.

1423

1424

:return: the header and the decompressor stream.

1425

as (stream, header_record)

1426

"""

1427

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1428

rec = df.readline().split()

1429

if len(rec) != 4:

1430

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1431

if cache_utf8.decode(rec[1]) != version_id:

1432

raise KnitCorrupt(self._filename,

1433

'unexpected version, wanted %r, got %r' % (

1434

version_id, rec[1]))

1435

return df, rec

1436

1437

def _parse_record(self, version_id, data):

1438

# profiling notes:

1439

# 4168 calls in 2880 217 internal

1440

# 4168 calls to _parse_record_header in 2121

1441

# 4168 calls to readlines in 330

1442

df, rec = self._parse_record_header(version_id, data)

1443

record_contents = df.readlines()

1444

l = record_contents.pop()

1445

assert len(record_contents) == int(rec[2])

1446

if l != 'end %s\n' % cache_utf8.encode(version_id):

1447

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1448

% (l, version_id))

1449

df.close()

1450

return record_contents, rec[3]

1451

1452

def read_records_iter_raw(self, records):

1453

"""Read text records from data file and yield raw data.

1454

1455

This unpacks enough of the text record to validate the id is

1456

as expected but thats all.

1457

"""

1458

# setup an iterator of the external records:

1459

# uses readv so nice and fast we hope.

1460

if len(records):

1461

# grab the disk data needed.

1462

if self._cache:

1463

# Don't check _cache if it is empty

1464

needed_offsets = [(pos, size) for version_id, pos, size

1465

in records

1466

if version_id not in self._cache]

1467

else:

1468

needed_offsets = [(pos, size) for version_id, pos, size

1469

in records]

1470

1471

raw_records = self._transport.readv(self._filename, needed_offsets)

1472

1473

1474

for version_id, pos, size in records:

1475

if version_id in self._cache:

1476

# This data has already been validated

1477

data = self._cache[version_id]

1478

else:

1479

pos, data = raw_records.next()

1480

if self._do_cache:

1481

self._cache[version_id] = data

1482

1483

# validate the header

1484

df, rec = self._parse_record_header(version_id, data)

1485

df.close()

1486

yield version_id, data

1487

1488

def read_records_iter(self, records):

1489

"""Read text records from data file and yield result.

1490

1491

The result will be returned in whatever is the fastest to read.

1492

Not by the order requested. Also, multiple requests for the same

1493

record will only yield 1 response.

1494

:param records: A list of (version_id, pos, len) entries

1495

:return: Yields (version_id, contents, digest) in the order

1496

read, not the order requested

1497

"""

1498

if not records:

1499

return

1500

1501

if self._cache:

1502

# Skip records we have alread seen

1503

yielded_records = set()

1504

needed_records = set()

1505

for record in records:

1506

if record[0] in self._cache:

1507

if record[0] in yielded_records:

1508

continue

1509

yielded_records.add(record[0])

1510

data = self._cache[record[0]]

1511

content, digest = self._parse_record(record[0], data)

1512

yield (record[0], content, digest)

1513

else:

1514

needed_records.add(record)

1515

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1516

else:

1517

needed_records = sorted(set(records), key=operator.itemgetter(1))

1518

1519

if not needed_records:

1520

return

1521

1522

# The transport optimizes the fetching as well

1523

# (ie, reads continuous ranges.)

1524

readv_response = self._transport.readv(self._filename,

1525

[(pos, size) for version_id, pos, size in needed_records])

1526

1527

for (version_id, pos, size), (pos, data) in \

1528

izip(iter(needed_records), readv_response):

1529

content, digest = self._parse_record(version_id, data)

1530

if self._do_cache:

1531

self._cache[version_id] = data

1532

yield version_id, content, digest

1533

1534

def read_records(self, records):

1535

"""Read records into a dictionary."""

1536

components = {}

1537

for record_id, content, digest in \

1538

self.read_records_iter(records):

1539

components[record_id] = (content, digest)

1540

return components

1541

1542

1543

class InterKnit(InterVersionedFile):

1544

"""Optimised code paths for knit to knit operations."""

1545

1546

_matching_file_from_factory = KnitVersionedFile

1547

_matching_file_to_factory = KnitVersionedFile

1548

1549

@staticmethod

1550

def is_compatible(source, target):

1551

"""Be compatible with knits. """

1552

try:

1553

return (isinstance(source, KnitVersionedFile) and

1554

isinstance(target, KnitVersionedFile))

1555

except AttributeError:

1556

return False

1557

1558

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1559

"""See InterVersionedFile.join."""

1560

assert isinstance(self.source, KnitVersionedFile)

1561

assert isinstance(self.target, KnitVersionedFile)

1562

1563

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1564

1565

if not version_ids:

1566

return 0

1567

1568

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1569

try:

1570

version_ids = list(version_ids)

1571

if None in version_ids:

1572

version_ids.remove(None)

1573

1574

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1575

this_versions = set(self.target._index.get_versions())

1576

needed_versions = self.source_ancestry - this_versions

1577

cross_check_versions = self.source_ancestry.intersection(this_versions)

1578

mismatched_versions = set()

1579

for version in cross_check_versions:

1580

# scan to include needed parents.

1581

n1 = set(self.target.get_parents_with_ghosts(version))

1582

n2 = set(self.source.get_parents_with_ghosts(version))

1583

if n1 != n2:

1584

# FIXME TEST this check for cycles being introduced works

1585

# the logic is we have a cycle if in our graph we are an

1586

# ancestor of any of the n2 revisions.

1587

for parent in n2:

1588

if parent in n1:

1589

# safe

1590

continue

1591

else:

1592

parent_ancestors = self.source.get_ancestry(parent)

1593

if version in parent_ancestors:

1594

raise errors.GraphCycleError([parent, version])

1595

# ensure this parent will be available later.

1596

new_parents = n2.difference(n1)

1597

needed_versions.update(new_parents.difference(this_versions))

1598

mismatched_versions.add(version)

1599

1600

if not needed_versions and not mismatched_versions:

1601

return 0

1602

full_list = topo_sort(self.source.get_graph())

1603

1604

version_list = [i for i in full_list if (not self.target.has_version(i)

1605

and i in needed_versions)]

1606

1607

# plan the join:

1608

copy_queue = []

1609

copy_queue_records = []

1610

copy_set = set()

1611

for version_id in version_list:

1612

options = self.source._index.get_options(version_id)

1613

parents = self.source._index.get_parents_with_ghosts(version_id)

1614

# check that its will be a consistent copy:

1615

for parent in parents:

1616

# if source has the parent, we must :

1617

# * already have it or

1618

# * have it scheduled already

1619

# otherwise we don't care

1620

assert (self.target.has_version(parent) or

1621

parent in copy_set or

1622

not self.source.has_version(parent))

1623

data_pos, data_size = self.source._index.get_position(version_id)

1624

copy_queue_records.append((version_id, data_pos, data_size))

1625

copy_queue.append((version_id, options, parents))

1626

copy_set.add(version_id)

1627

1628

# data suck the join:

1629

count = 0

1630

total = len(version_list)

1631

raw_datum = []

1632

raw_records = []

1633

for (version_id, raw_data), \

1634

(version_id2, options, parents) in \

1635

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1636

copy_queue):

1637

assert version_id == version_id2, 'logic error, inconsistent results'

1638

count = count + 1

1639

pb.update("Joining knit", count, total)

1640

raw_records.append((version_id, options, parents, len(raw_data)))

1641

raw_datum.append(raw_data)

1642

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1643

1644

for version in mismatched_versions:

1645

# FIXME RBC 20060309 is this needed?

1646

n1 = set(self.target.get_parents_with_ghosts(version))

1647

n2 = set(self.source.get_parents_with_ghosts(version))

1648

# write a combined record to our history preserving the current

1649

# parents as first in the list

1650

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1651

self.target.fix_parents(version, new_parents)

1652

return count

1653

finally:

1654

pb.finished()

1655

1656

1657

InterVersionedFile.register_optimiser(InterKnit)

1658

1659

1660

class WeaveToKnit(InterVersionedFile):

1661

"""Optimised code paths for weave to knit operations."""

1662

1663

_matching_file_from_factory = bzrlib.weave.WeaveFile

1664

_matching_file_to_factory = KnitVersionedFile

1665

1666

@staticmethod

1667

def is_compatible(source, target):

1668

"""Be compatible with weaves to knits."""

1669

try:

1670

return (isinstance(source, bzrlib.weave.Weave) and

1671

isinstance(target, KnitVersionedFile))

1672

except AttributeError:

1673

return False

1674

1675

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1676

"""See InterVersionedFile.join."""

1677

assert isinstance(self.source, bzrlib.weave.Weave)

1678

assert isinstance(self.target, KnitVersionedFile)

1679

1680

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1681

1682

if not version_ids:

1683

return 0

1684

1685

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1686

try:

1687

version_ids = list(version_ids)

1688

1689

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1690

this_versions = set(self.target._index.get_versions())

1691

needed_versions = self.source_ancestry - this_versions

1692

cross_check_versions = self.source_ancestry.intersection(this_versions)

1693

mismatched_versions = set()

1694

for version in cross_check_versions:

1695

# scan to include needed parents.

1696

n1 = set(self.target.get_parents_with_ghosts(version))

1697

n2 = set(self.source.get_parents(version))

1698

# if all of n2's parents are in n1, then its fine.

1699

if n2.difference(n1):

1700

# FIXME TEST this check for cycles being introduced works

1701

# the logic is we have a cycle if in our graph we are an

1702

# ancestor of any of the n2 revisions.

1703

for parent in n2:

1704

if parent in n1:

1705

# safe

1706

continue

1707

else:

1708

parent_ancestors = self.source.get_ancestry(parent)

1709

if version in parent_ancestors:

1710

raise errors.GraphCycleError([parent, version])

1711

# ensure this parent will be available later.

1712

new_parents = n2.difference(n1)

1713

needed_versions.update(new_parents.difference(this_versions))

1714

mismatched_versions.add(version)

1715

1716

if not needed_versions and not mismatched_versions:

1717

return 0

1718

full_list = topo_sort(self.source.get_graph())

1719

1720

version_list = [i for i in full_list if (not self.target.has_version(i)

1721

and i in needed_versions)]

1722

1723

# do the join:

1724

count = 0

1725

total = len(version_list)

1726

for version_id in version_list:

1727

pb.update("Converting to knit", count, total)

1728

parents = self.source.get_parents(version_id)

1729

# check that its will be a consistent copy:

1730

for parent in parents:

1731

# if source has the parent, we must already have it

1732

assert (self.target.has_version(parent))

1733

self.target.add_lines(

1734

version_id, parents, self.source.get_lines(version_id))

1735

count = count + 1

1736

1737

for version in mismatched_versions:

1738

# FIXME RBC 20060309 is this needed?

1739

n1 = set(self.target.get_parents_with_ghosts(version))

1740

n2 = set(self.source.get_parents(version))

1741

# write a combined record to our history preserving the current

1742

# parents as first in the list

1743

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1744

self.target.fix_parents(version, new_parents)

1745

return count

1746

finally:

1747

pb.finished()

1748

1749

1750

InterVersionedFile.register_optimiser(WeaveToKnit)

1751

1752

1753

class KnitSequenceMatcher(difflib.SequenceMatcher):

1754

"""Knit tuned sequence matcher.

1755

1756

This is based on profiling of difflib which indicated some improvements

1757

for our usage pattern.

1758

"""

1759

1760

def find_longest_match(self, alo, ahi, blo, bhi):

1761

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1762

1763

If isjunk is not defined:

1764

1765

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1766

alo <= i <= i+k <= ahi

1767

blo <= j <= j+k <= bhi

1768

and for all (i',j',k') meeting those conditions,

1769

k >= k'

1770

i <= i'

1771

and if i == i', j <= j'

1772

1773

In other words, of all maximal matching blocks, return one that

1774

starts earliest in a, and of all those maximal matching blocks that

1775

start earliest in a, return the one that starts earliest in b.

1776

1777

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1778

>>> s.find_longest_match(0, 5, 0, 9)

1779

(0, 4, 5)

1780

1781

If isjunk is defined, first the longest matching block is

1782

determined as above, but with the additional restriction that no

1783

junk element appears in the block. Then that block is extended as

1784

far as possible by matching (only) junk elements on both sides. So

1785

the resulting block never matches on junk except as identical junk

1786

happens to be adjacent to an "interesting" match.

1787

1788

Here's the same example as before, but considering blanks to be

1789

junk. That prevents " abcd" from matching the " abcd" at the tail

1790

end of the second sequence directly. Instead only the "abcd" can

1791

match, and matches the leftmost "abcd" in the second sequence:

1792

1793

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1794

>>> s.find_longest_match(0, 5, 0, 9)

1795

(1, 0, 4)

1796

1797

If no blocks match, return (alo, blo, 0).

1798

1799

>>> s = SequenceMatcher(None, "ab", "c")

1800

>>> s.find_longest_match(0, 2, 0, 1)

1801

(0, 0, 0)

1802

"""

1803

1804

# CAUTION: stripping common prefix or suffix would be incorrect.

1805

# E.g.,

1806

# ab

1807

# acab

1808

# Longest matching block is "ab", but if common prefix is

1809

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1810

# strip, so ends up claiming that ab is changed to acab by

1811

# inserting "ca" in the middle. That's minimal but unintuitive:

1812

# "it's obvious" that someone inserted "ac" at the front.

1813

# Windiff ends up at the same place as diff, but by pairing up

1814

# the unique 'b's and then matching the first two 'a's.

1815

1816

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1817

besti, bestj, bestsize = alo, blo, 0

1818

# find longest junk-free match

1819

# during an iteration of the loop, j2len[j] = length of longest

1820

# junk-free match ending with a[i-1] and b[j]

1821

j2len = {}

1822

# nothing = []

1823

b2jget = b2j.get

1824

for i in xrange(alo, ahi):

1825

# look at all instances of a[i] in b; note that because

1826

# b2j has no junk keys, the loop is skipped if a[i] is junk

1827

j2lenget = j2len.get

1828

newj2len = {}

1829

1830

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1831

# following improvement

1832

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1833

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1834

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1835

# to

1836

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1837

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1838

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1839

1840

try:

1841

js = b2j[a[i]]

1842

except KeyError:

1843

pass

1844

else:

1845

for j in js:

1846

# a[i] matches b[j]

1847

if j >= blo:

1848

if j >= bhi:

1849

break

1850

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1851

if k > bestsize:

1852

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1853

j2len = newj2len

1854

1855

# Extend the best by non-junk elements on each end. In particular,

1856

# "popular" non-junk elements aren't in b2j, which greatly speeds

1857

# the inner loop above, but also means "the best" match so far

1858

# doesn't contain any junk *or* popular non-junk elements.

1859

while besti > alo and bestj > blo and \

1860

not isbjunk(b[bestj-1]) and \

1861

a[besti-1] == b[bestj-1]:

1862

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1863

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1864

not isbjunk(b[bestj+bestsize]) and \

1865

a[besti+bestsize] == b[bestj+bestsize]:

1866

bestsize += 1

1867

1868

# Now that we have a wholly interesting match (albeit possibly

1869

# empty!), we may as well suck up the matching junk on each

1870

# side of it too. Can't think of a good reason not to, and it

1871

# saves post-processing the (possibly considerable) expense of

1872

# figuring out what to do with it. In the case of an empty

1873

# interesting match, this is clearly the right thing to do,

1874

# because no other kind of match is possible in the regions.

1875

while besti > alo and bestj > blo and \

1876

isbjunk(b[bestj-1]) and \

1877

a[besti-1] == b[bestj-1]:

1878

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1879

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1880

isbjunk(b[bestj+bestsize]) and \

1881

a[besti+bestsize] == b[bestj+bestsize]:

1882

bestsize = bestsize + 1

1883

1884

return besti, bestj, bestsize

1885

Older »