/brz/remove-bazaar : revision 2208

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2006-12-21 04:38:20 UTC
mfrom: (1551.9.25 Aaron's mergeable stuff)
Revision ID: pqm@pqm.ubuntu.com-20061221043820-0b56b176269f173a

unhide ls, support --kind flag

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

COPYING.txt

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr.ico

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

profile_imports.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.ui

import bzrlib.weave

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

100

# files belong together?

101

102

# TODO: accommodate binaries, perhaps by storing a byte count

103

104

# TODO: function to check whole file

105

106

# TODO: atomically append data, then measure backwards from the cursor

107

# position after writing to work out where it was located. we may need to

108

# bypass python file buffering.

109

110

DATA_SUFFIX = '.knit'

111

INDEX_SUFFIX = '.kndx'

112

113

114

class KnitContent(object):

115

"""Content of a knit version to which deltas can be applied."""

116

117

def __init__(self, lines):

118

self._lines = lines

119

120

def annotate_iter(self):

121

"""Yield tuples of (origin, text) for each content line."""

122

return iter(self._lines)

123

124

def annotate(self):

125

"""Return a list of (origin, text) tuples."""

126

return list(self.annotate_iter())

127

128

def line_delta_iter(self, new_lines):

129

"""Generate line-based delta from this content to new_lines."""

130

new_texts = new_lines.text()

131

old_texts = self.text()

132

s = KnitSequenceMatcher(None, old_texts, new_texts)

133

for tag, i1, i2, j1, j2 in s.get_opcodes():

134

if tag == 'equal':

135

continue

136

# ofrom, oto, length, data

137

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

138

139

def line_delta(self, new_lines):

140

return list(self.line_delta_iter(new_lines))

141

142

def text(self):

143

return [text for origin, text in self._lines]

144

145

def copy(self):

146

return KnitContent(self._lines[:])

147

148

149

class _KnitFactory(object):

150

"""Base factory for creating content objects."""

151

152

def make(self, lines, version):

153

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

155

156

157

class KnitAnnotateFactory(_KnitFactory):

158

"""Factory for creating annotated Content objects."""

159

160

annotated = True

161

162

def parse_fulltext(self, content, version):

163

"""Convert fulltext to internal representation

164

165

fulltext content is of the format

166

revid(utf8) plaintext\n

167

internal representation is of the format:

168

(revid, plaintext)

169

"""

170

decode_utf8 = cache_utf8.decode

171

lines = []

172

for line in content:

173

origin, text = line.split(' ', 1)

174

lines.append((decode_utf8(origin), text))

175

return KnitContent(lines)

176

177

def parse_line_delta_iter(self, lines):

178

return iter(self.parse_line_delta(lines))

179

180

def parse_line_delta(self, lines, version):

181

"""Convert a line based delta into internal representation.

182

183

line delta is in the form of:

184

intstart intend intcount

185

1..count lines:

186

revid(utf8) newline\n

187

internal representation is

188

(start, end, count, [1..count tuples (revid, newline)])

189

"""

190

decode_utf8 = cache_utf8.decode

191

result = []

192

lines = iter(lines)

193

next = lines.next

194

# walk through the lines parsing.

195

for header in lines:

196

start, end, count = [int(n) for n in header.split(',')]

197

contents = []

198

remaining = count

199

while remaining:

200

origin, text = next().split(' ', 1)

201

remaining -= 1

202

contents.append((decode_utf8(origin), text))

203

result.append((start, end, count, contents))

204

return result

205

206

def get_fulltext_content(self, lines):

207

"""Extract just the content lines from a fulltext."""

208

return (line.split(' ', 1)[1] for line in lines)

209

210

def get_linedelta_content(self, lines):

211

"""Extract just the content from a line delta.

212

213

This doesn't return all of the extra information stored in a delta.

214

Only the actual content lines.

215

"""

216

lines = iter(lines)

217

next = lines.next

218

for header in lines:

219

header = header.split(',')

220

count = int(header[2])

221

for i in xrange(count):

222

origin, text = next().split(' ', 1)

223

yield text

224

225

def lower_fulltext(self, content):

226

"""convert a fulltext content record into a serializable form.

227

228

see parse_fulltext which this inverts.

229

"""

230

encode_utf8 = cache_utf8.encode

231

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

232

233

def lower_line_delta(self, delta):

234

"""convert a delta into a serializable form.

235

236

See parse_line_delta which this inverts.

237

"""

238

encode_utf8 = cache_utf8.encode

239

out = []

240

for start, end, c, lines in delta:

241

out.append('%d,%d,%d\n' % (start, end, c))

242

out.extend(encode_utf8(origin) + ' ' + text

243

for origin, text in lines)

244

return out

245

246

247

class KnitPlainFactory(_KnitFactory):

248

"""Factory for creating plain Content objects."""

249

250

annotated = False

251

252

def parse_fulltext(self, content, version):

253

"""This parses an unannotated fulltext.

254

255

Note that this is not a noop - the internal representation

256

has (versionid, line) - its just a constant versionid.

257

"""

258

return self.make(content, version)

259

260

def parse_line_delta_iter(self, lines, version):

261

cur = 0

262

num_lines = len(lines)

263

while cur < num_lines:

264

header = lines[cur]

265

cur += 1

266

start, end, c = [int(n) for n in header.split(',')]

267

yield start, end, c, zip([version] * c, lines[cur:cur+c])

268

cur += c

269

270

def parse_line_delta(self, lines, version):

271

return list(self.parse_line_delta_iter(lines, version))

272

273

def get_fulltext_content(self, lines):

274

"""Extract just the content lines from a fulltext."""

275

return iter(lines)

276

277

def get_linedelta_content(self, lines):

278

"""Extract just the content from a line delta.

279

280

This doesn't return all of the extra information stored in a delta.

281

Only the actual content lines.

282

"""

283

lines = iter(lines)

284

next = lines.next

285

for header in lines:

286

header = header.split(',')

287

count = int(header[2])

288

for i in xrange(count):

289

yield next()

290

291

def lower_fulltext(self, content):

292

return content.text()

293

294

def lower_line_delta(self, delta):

295

out = []

296

for start, end, c, lines in delta:

297

out.append('%d,%d,%d\n' % (start, end, c))

298

out.extend([text for origin, text in lines])

299

return out

300

301

302

def make_empty_knit(transport, relpath):

303

"""Construct a empty knit at the specified location."""

304

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

305

k._data._open_file()

306

307

308

class KnitVersionedFile(VersionedFile):

309

"""Weave-like structure with faster random access.

310

311

A knit stores a number of texts and a summary of the relationships

312

between them. Texts are identified by a string version-id. Texts

313

are normally stored and retrieved as a series of lines, but can

314

also be passed as single strings.

315

316

Lines are stored with the trailing newline (if any) included, to

317

avoid special cases for files with no final newline. Lines are

318

composed of 8-bit characters, not unicode. The combination of

319

these approaches should mean any 'binary' file can be safely

320

stored and retrieved.

321

"""

322

323

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

324

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

325

create=False, create_parent_dir=False, delay_create=False,

326

dir_mode=None):

327

"""Construct a knit at location specified by relpath.

328

329

:param create: If not True, only open an existing knit.

330

:param create_parent_dir: If True, create the parent directory if

331

creating the file fails. (This is used for stores with

332

hash-prefixes that may not exist yet)

333

:param delay_create: The calling code is aware that the knit won't

334

actually be created until the first data is stored.

335

"""

336

if deprecated_passed(basis_knit):

337

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

338

" deprecated as of bzr 0.9.",

339

DeprecationWarning, stacklevel=2)

340

if access_mode is None:

341

access_mode = 'w'

342

super(KnitVersionedFile, self).__init__(access_mode)

343

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

344

self.transport = transport

345

self.filename = relpath

346

self.factory = factory or KnitAnnotateFactory()

347

self.writable = (access_mode == 'w')

348

self.delta = delta

349

350

self._max_delta_chain = 200

351

352

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

353

access_mode, create=create, file_mode=file_mode,

354

create_parent_dir=create_parent_dir, delay_create=delay_create,

355

dir_mode=dir_mode)

356

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

357

access_mode, create=create and not len(self), file_mode=file_mode,

358

create_parent_dir=create_parent_dir, delay_create=delay_create,

359

dir_mode=dir_mode)

360

361

def __repr__(self):

362

return '%s(%s)' % (self.__class__.__name__,

363

self.transport.abspath(self.filename))

364

365

def _check_should_delta(self, first_parents):

366

"""Iterate back through the parent listing, looking for a fulltext.

367

368

This is used when we want to decide whether to add a delta or a new

369

fulltext. It searches for _max_delta_chain parents. When it finds a

370

fulltext parent, it sees if the total size of the deltas leading up to

371

it is large enough to indicate that we want a new full text anyway.

372

373

Return True if we should create a new delta, False if we should use a

374

full text.

375

"""

376

delta_size = 0

377

fulltext_size = None

378

delta_parents = first_parents

379

for count in xrange(self._max_delta_chain):

380

parent = delta_parents[0]

381

method = self._index.get_method(parent)

382

pos, size = self._index.get_position(parent)

383

if method == 'fulltext':

384

fulltext_size = size

385

break

386

delta_size += size

387

delta_parents = self._index.get_parents(parent)

388

else:

389

# We couldn't find a fulltext, so we must create a new one

390

return False

391

392

return fulltext_size > delta_size

393

394

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

395

"""See VersionedFile._add_delta()."""

396

self._check_add(version_id, []) # should we check the lines ?

397

self._check_versions_present(parents)

398

present_parents = []

399

ghosts = []

400

parent_texts = {}

401

for parent in parents:

402

if not self.has_version(parent):

403

ghosts.append(parent)

404

else:

405

present_parents.append(parent)

406

407

if delta_parent is None:

408

# reconstitute as full text.

409

assert len(delta) == 1 or len(delta) == 0

410

if len(delta):

411

assert delta[0][0] == 0

412

assert delta[0][1] == 0, delta[0][1]

413

return super(KnitVersionedFile, self)._add_delta(version_id,

414

parents,

415

delta_parent,

416

sha1,

417

noeol,

418

delta)

419

420

digest = sha1

421

422

options = []

423

if noeol:

424

options.append('no-eol')

425

426

if delta_parent is not None:

427

# determine the current delta chain length.

428

# To speed the extract of texts the delta chain is limited

429

# to a fixed number of deltas. This should minimize both

430

# I/O and the time spend applying deltas.

431

# The window was changed to a maximum of 200 deltas, but also added

432

# was a check that the total compressed size of the deltas is

433

# smaller than the compressed size of the fulltext.

434

if not self._check_should_delta([delta_parent]):

435

# We don't want a delta here, just do a normal insertion.

436

return super(KnitVersionedFile, self)._add_delta(version_id,

437

parents,

438

delta_parent,

439

sha1,

440

noeol,

441

delta)

442

443

options.append('line-delta')

444

store_lines = self.factory.lower_line_delta(delta)

445

446

where, size = self._data.add_record(version_id, digest, store_lines)

447

self._index.add_version(version_id, options, where, size, parents)

448

449

def _add_raw_records(self, records, data):

450

"""Add all the records 'records' with data pre-joined in 'data'.

451

452

:param records: A list of tuples(version_id, options, parents, size).

453

:param data: The data for the records. When it is written, the records

454

are adjusted to have pos pointing into data by the sum of

455

the preceding records sizes.

456

"""

457

# write all the data

458

pos = self._data.add_raw_record(data)

459

offset = 0

460

index_entries = []

461

for (version_id, options, parents, size) in records:

462

index_entries.append((version_id, options, pos+offset,

463

size, parents))

464

if self._data._do_cache:

465

self._data._cache[version_id] = data[offset:offset+size]

466

offset += size

467

self._index.add_versions(index_entries)

468

469

def enable_cache(self):

470

"""Start caching data for this knit"""

471

self._data.enable_cache()

472

473

def clear_cache(self):

474

"""Clear the data cache only."""

475

self._data.clear_cache()

476

477

def copy_to(self, name, transport):

478

"""See VersionedFile.copy_to()."""

479

# copy the current index to a temp index to avoid racing with local

480

# writes

481

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

482

self.transport.get(self._index._filename))

483

# copy the data file

484

f = self._data._open_file()

485

try:

486

transport.put_file(name + DATA_SUFFIX, f)

487

finally:

488

f.close()

489

# move the copied index into place

490

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

491

492

def create_empty(self, name, transport, mode=None):

493

return KnitVersionedFile(name, transport, factory=self.factory,

494

delta=self.delta, create=True)

495

496

def _fix_parents(self, version, new_parents):

497

"""Fix the parents list for version.

498

499

This is done by appending a new version to the index

500

with identical data except for the parents list.

501

the parents list must be a superset of the current

502

list.

503

"""

504

current_values = self._index._cache[version]

505

assert set(current_values[4]).difference(set(new_parents)) == set()

506

self._index.add_version(version,

507

current_values[1],

508

current_values[2],

509

current_values[3],

510

new_parents)

511

512

def get_delta(self, version_id):

513

"""Get a delta for constructing version from some other version."""

514

if not self.has_version(version_id):

515

raise RevisionNotPresent(version_id, self.filename)

516

517

parents = self.get_parents(version_id)

518

if len(parents):

519

parent = parents[0]

520

else:

521

parent = None

522

data_pos, data_size = self._index.get_position(version_id)

523

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

524

version_idx = self._index.lookup(version_id)

525

noeol = 'no-eol' in self._index.get_options(version_id)

526

if 'fulltext' == self._index.get_method(version_id):

527

new_content = self.factory.parse_fulltext(data, version_idx)

528

if parent is not None:

529

reference_content = self._get_content(parent)

530

old_texts = reference_content.text()

531

else:

532

old_texts = []

533

new_texts = new_content.text()

534

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

535

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

536

else:

537

delta = self.factory.parse_line_delta(data, version_idx)

538

return parent, sha1, noeol, delta

539

540

def get_graph_with_ghosts(self):

541

"""See VersionedFile.get_graph_with_ghosts()."""

542

graph_items = self._index.get_graph()

543

return dict(graph_items)

544

545

def get_sha1(self, version_id):

546

"""See VersionedFile.get_sha1()."""

547

record_map = self._get_record_map([version_id])

548

method, content, digest, next = record_map[version_id]

549

return digest

550

551

@staticmethod

552

def get_suffixes():

553

"""See VersionedFile.get_suffixes()."""

554

return [DATA_SUFFIX, INDEX_SUFFIX]

555

556

def has_ghost(self, version_id):

557

"""True if there is a ghost reference in the file to version_id."""

558

# maybe we have it

559

if self.has_version(version_id):

560

return False

561

# optimisable if needed by memoising the _ghosts set.

562

items = self._index.get_graph()

563

for node, parents in items:

564

for parent in parents:

565

if parent not in self._index._cache:

566

if parent == version_id:

567

return True

568

return False

569

570

def versions(self):

571

"""See VersionedFile.versions."""

572

return self._index.get_versions()

573

574

def has_version(self, version_id):

575

"""See VersionedFile.has_version."""

576

return self._index.has_version(version_id)

577

578

__contains__ = has_version

579

580

def _merge_annotations(self, content, parents, parent_texts={},

581

delta=None, annotated=None):

582

"""Merge annotations for content. This is done by comparing

583

the annotations based on changed to the text.

584

"""

585

if annotated:

586

delta_seq = None

587

for parent_id in parents:

588

merge_content = self._get_content(parent_id, parent_texts)

589

seq = patiencediff.PatienceSequenceMatcher(

590

None, merge_content.text(), content.text())

591

if delta_seq is None:

592

# setup a delta seq to reuse.

593

delta_seq = seq

594

for i, j, n in seq.get_matching_blocks():

595

if n == 0:

596

continue

597

# this appears to copy (origin, text) pairs across to the new

598

# content for any line that matches the last-checked parent.

599

# FIXME: save the sequence control data for delta compression

600

# against the most relevant parent rather than rediffing.

601

content._lines[j:j+n] = merge_content._lines[i:i+n]

602

if delta:

603

if not annotated:

604

reference_content = self._get_content(parents[0], parent_texts)

605

new_texts = content.text()

606

old_texts = reference_content.text()

607

delta_seq = patiencediff.PatienceSequenceMatcher(

608

None, old_texts, new_texts)

609

return self._make_line_delta(delta_seq, content)

610

611

def _make_line_delta(self, delta_seq, new_content):

612

"""Generate a line delta from delta_seq and new_content."""

613

diff_hunks = []

614

for op in delta_seq.get_opcodes():

615

if op[0] == 'equal':

616

continue

617

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

618

return diff_hunks

619

620

def _get_components_positions(self, version_ids):

621

"""Produce a map of position data for the components of versions.

622

623

This data is intended to be used for retrieving the knit records.

624

625

A dict of version_id to (method, data_pos, data_size, next) is

626

returned.

627

method is the way referenced data should be applied.

628

data_pos is the position of the data in the knit.

629

data_size is the size of the data in the knit.

630

next is the build-parent of the version, or None for fulltexts.

631

"""

632

component_data = {}

633

for version_id in version_ids:

634

cursor = version_id

635

636

while cursor is not None and cursor not in component_data:

637

method = self._index.get_method(cursor)

638

if method == 'fulltext':

639

next = None

640

else:

641

next = self.get_parents(cursor)[0]

642

data_pos, data_size = self._index.get_position(cursor)

643

component_data[cursor] = (method, data_pos, data_size, next)

644

cursor = next

645

return component_data

646

647

def _get_content(self, version_id, parent_texts={}):

648

"""Returns a content object that makes up the specified

649

version."""

650

if not self.has_version(version_id):

651

raise RevisionNotPresent(version_id, self.filename)

652

653

cached_version = parent_texts.get(version_id, None)

654

if cached_version is not None:

655

return cached_version

656

657

text_map, contents_map = self._get_content_maps([version_id])

658

return contents_map[version_id]

659

660

def _check_versions_present(self, version_ids):

661

"""Check that all specified versions are present."""

662

version_ids = set(version_ids)

663

for r in list(version_ids):

664

if self._index.has_version(r):

665

version_ids.remove(r)

666

if version_ids:

667

raise RevisionNotPresent(list(version_ids)[0], self.filename)

668

669

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

670

"""See VersionedFile.add_lines_with_ghosts()."""

671

self._check_add(version_id, lines)

672

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

673

674

def _add_lines(self, version_id, parents, lines, parent_texts):

675

"""See VersionedFile.add_lines."""

676

self._check_add(version_id, lines)

677

self._check_versions_present(parents)

678

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

679

680

def _check_add(self, version_id, lines):

681

"""check that version_id and lines are safe to add."""

682

assert self.writable, "knit is not opened for write"

683

### FIXME escape. RBC 20060228

684

if contains_whitespace(version_id):

685

raise InvalidRevisionId(version_id, self.filename)

686

if self.has_version(version_id):

687

raise RevisionAlreadyPresent(version_id, self.filename)

688

self._check_lines_not_unicode(lines)

689

self._check_lines_are_lines(lines)

690

691

def _add(self, version_id, lines, parents, delta, parent_texts):

692

"""Add a set of lines on top of version specified by parents.

693

694

If delta is true, compress the text as a line-delta against

695

the first parent.

696

697

Any versions not present will be converted into ghosts.

698

"""

699

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

700

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

701

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

702

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

703

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

704

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

705

# +1383 0 8.0370 8.0370 +<len>

706

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

707

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

708

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

709

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

710

711

present_parents = []

712

ghosts = []

713

if parent_texts is None:

714

parent_texts = {}

715

for parent in parents:

716

if not self.has_version(parent):

717

ghosts.append(parent)

718

else:

719

present_parents.append(parent)

720

721

if delta and not len(present_parents):

722

delta = False

723

724

digest = sha_strings(lines)

725

options = []

726

if lines:

727

if lines[-1][-1] != '\n':

728

options.append('no-eol')

729

lines[-1] = lines[-1] + '\n'

730

731

if len(present_parents) and delta:

732

# To speed the extract of texts the delta chain is limited

733

# to a fixed number of deltas. This should minimize both

734

# I/O and the time spend applying deltas.

735

delta = self._check_should_delta(present_parents)

736

737

lines = self.factory.make(lines, version_id)

738

if delta or (self.factory.annotated and len(present_parents) > 0):

739

# Merge annotations from parent texts if so is needed.

740

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

741

delta, self.factory.annotated)

742

743

if delta:

744

options.append('line-delta')

745

store_lines = self.factory.lower_line_delta(delta_hunks)

746

else:

747

options.append('fulltext')

748

store_lines = self.factory.lower_fulltext(lines)

749

750

where, size = self._data.add_record(version_id, digest, store_lines)

751

self._index.add_version(version_id, options, where, size, parents)

752

return lines

753

754

def check(self, progress_bar=None):

755

"""See VersionedFile.check()."""

756

757

def _clone_text(self, new_version_id, old_version_id, parents):

758

"""See VersionedFile.clone_text()."""

759

# FIXME RBC 20060228 make fast by only inserting an index with null

760

# delta.

761

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

762

763

def get_lines(self, version_id):

764

"""See VersionedFile.get_lines()."""

765

return self.get_line_list([version_id])[0]

766

767

def _get_record_map(self, version_ids):

768

"""Produce a dictionary of knit records.

769

770

The keys are version_ids, the values are tuples of (method, content,

771

digest, next).

772

method is the way the content should be applied.

773

content is a KnitContent object.

774

digest is the SHA1 digest of this version id after all steps are done

775

next is the build-parent of the version, i.e. the leftmost ancestor.

776

If the method is fulltext, next will be None.

777

"""

778

position_map = self._get_components_positions(version_ids)

779

# c = component_id, m = method, p = position, s = size, n = next

780

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

781

record_map = {}

782

for component_id, content, digest in \

783

self._data.read_records_iter(records):

784

method, position, size, next = position_map[component_id]

785

record_map[component_id] = method, content, digest, next

786

787

return record_map

788

789

def get_text(self, version_id):

790

"""See VersionedFile.get_text"""

791

return self.get_texts([version_id])[0]

792

793

def get_texts(self, version_ids):

794

return [''.join(l) for l in self.get_line_list(version_ids)]

795

796

def get_line_list(self, version_ids):

797

"""Return the texts of listed versions as a list of strings."""

798

text_map, content_map = self._get_content_maps(version_ids)

799

return [text_map[v] for v in version_ids]

800

801

def _get_content_maps(self, version_ids):

802

"""Produce maps of text and KnitContents

803

804

:return: (text_map, content_map) where text_map contains the texts for

805

the requested versions and content_map contains the KnitContents.

806

Both dicts take version_ids as their keys.

807

"""

808

for version_id in version_ids:

809

if not self.has_version(version_id):

810

raise RevisionNotPresent(version_id, self.filename)

811

record_map = self._get_record_map(version_ids)

812

813

text_map = {}

814

content_map = {}

815

final_content = {}

816

for version_id in version_ids:

817

components = []

818

cursor = version_id

819

while cursor is not None:

820

method, data, digest, next = record_map[cursor]

821

components.append((cursor, method, data, digest))

822

if cursor in content_map:

823

break

824

cursor = next

825

826

content = None

827

for component_id, method, data, digest in reversed(components):

828

if component_id in content_map:

829

content = content_map[component_id]

830

else:

831

version_idx = self._index.lookup(component_id)

832

if method == 'fulltext':

833

assert content is None

834

content = self.factory.parse_fulltext(data, version_idx)

835

elif method == 'line-delta':

836

delta = self.factory.parse_line_delta(data, version_idx)

837

content = content.copy()

838

content._lines = self._apply_delta(content._lines,

839

delta)

840

content_map[component_id] = content

841

842

if 'no-eol' in self._index.get_options(version_id):

843

content = content.copy()

844

line = content._lines[-1][1].rstrip('\n')

845

content._lines[-1] = (content._lines[-1][0], line)

846

final_content[version_id] = content

847

848

# digest here is the digest from the last applied component.

849

text = content.text()

850

if sha_strings(text) != digest:

851

raise KnitCorrupt(self.filename,

852

'sha-1 does not match %s' % version_id)

853

854

text_map[version_id] = text

855

return text_map, final_content

856

857

def iter_lines_added_or_present_in_versions(self, version_ids=None,

858

pb=None):

859

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

860

if version_ids is None:

861

version_ids = self.versions()

862

if pb is None:

863

pb = progress.DummyProgress()

864

# we don't care about inclusions, the caller cares.

865

# but we need to setup a list of records to visit.

866

# we need version_id, position, length

867

version_id_records = []

868

requested_versions = set(version_ids)

869

# filter for available versions

870

for version_id in requested_versions:

871

if not self.has_version(version_id):

872

raise RevisionNotPresent(version_id, self.filename)

873

# get a in-component-order queue:

874

for version_id in self.versions():

875

if version_id in requested_versions:

876

data_pos, length = self._index.get_position(version_id)

877

version_id_records.append((version_id, data_pos, length))

878

879

total = len(version_id_records)

880

for version_idx, (version_id, data, sha_value) in \

881

enumerate(self._data.read_records_iter(version_id_records)):

882

pb.update('Walking content.', version_idx, total)

883

method = self._index.get_method(version_id)

884

version_idx = self._index.lookup(version_id)

885

886

assert method in ('fulltext', 'line-delta')

887

if method == 'fulltext':

888

line_iterator = self.factory.get_fulltext_content(data)

889

else:

890

line_iterator = self.factory.get_linedelta_content(data)

891

for line in line_iterator:

892

yield line

893

894

pb.update('Walking content.', total, total)

895

896

def num_versions(self):

897

"""See VersionedFile.num_versions()."""

898

return self._index.num_versions()

899

900

__len__ = num_versions

901

902

def annotate_iter(self, version_id):

903

"""See VersionedFile.annotate_iter."""

904

content = self._get_content(version_id)

905

for origin, text in content.annotate_iter():

906

yield origin, text

907

908

def get_parents(self, version_id):

909

"""See VersionedFile.get_parents."""

910

# perf notes:

911

# optimism counts!

912

# 52554 calls in 1264 872 internal down from 3674

913

try:

914

return self._index.get_parents(version_id)

915

except KeyError:

916

raise RevisionNotPresent(version_id, self.filename)

917

918

def get_parents_with_ghosts(self, version_id):

919

"""See VersionedFile.get_parents."""

920

try:

921

return self._index.get_parents_with_ghosts(version_id)

922

except KeyError:

923

raise RevisionNotPresent(version_id, self.filename)

924

925

def get_ancestry(self, versions):

926

"""See VersionedFile.get_ancestry."""

927

if isinstance(versions, basestring):

928

versions = [versions]

929

if not versions:

930

return []

931

self._check_versions_present(versions)

932

return self._index.get_ancestry(versions)

933

934

def get_ancestry_with_ghosts(self, versions):

935

"""See VersionedFile.get_ancestry_with_ghosts."""

936

if isinstance(versions, basestring):

937

versions = [versions]

938

if not versions:

939

return []

940

self._check_versions_present(versions)

941

return self._index.get_ancestry_with_ghosts(versions)

942

943

#@deprecated_method(zero_eight)

944

def walk(self, version_ids):

945

"""See VersionedFile.walk."""

946

# We take the short path here, and extract all relevant texts

947

# and put them in a weave and let that do all the work. Far

948

# from optimal, but is much simpler.

949

# FIXME RB 20060228 this really is inefficient!

950

from bzrlib.weave import Weave

951

952

w = Weave(self.filename)

953

ancestry = self.get_ancestry(version_ids)

954

sorted_graph = topo_sort(self._index.get_graph())

955

version_list = [vid for vid in sorted_graph if vid in ancestry]

956

957

for version_id in version_list:

958

lines = self.get_lines(version_id)

959

w.add_lines(version_id, self.get_parents(version_id), lines)

960

961

for lineno, insert_id, dset, line in w.walk(version_ids):

962

yield lineno, insert_id, dset, line

963

964

def plan_merge(self, ver_a, ver_b):

965

"""See VersionedFile.plan_merge."""

966

ancestors_b = set(self.get_ancestry(ver_b))

967

def status_a(revision, text):

968

if revision in ancestors_b:

969

return 'killed-b', text

970

else:

971

return 'new-a', text

972

973

ancestors_a = set(self.get_ancestry(ver_a))

974

def status_b(revision, text):

975

if revision in ancestors_a:

976

return 'killed-a', text

977

else:

978

return 'new-b', text

979

980

annotated_a = self.annotate(ver_a)

981

annotated_b = self.annotate(ver_b)

982

plain_a = [t for (a, t) in annotated_a]

983

plain_b = [t for (a, t) in annotated_b]

984

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

985

a_cur = 0

986

b_cur = 0

987

for ai, bi, l in blocks:

988

# process all mismatched sections

989

# (last mismatched section is handled because blocks always

990

# includes a 0-length last block)

991

for revision, text in annotated_a[a_cur:ai]:

992

yield status_a(revision, text)

993

for revision, text in annotated_b[b_cur:bi]:

994

yield status_b(revision, text)

995

996

# and now the matched section

997

a_cur = ai + l

998

b_cur = bi + l

999

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

1000

assert text_a == text_b

1001

yield "unchanged", text_a

1002

1003

1004

class _KnitComponentFile(object):

1005

"""One of the files used to implement a knit database"""

1006

1007

def __init__(self, transport, filename, mode, file_mode=None,

1008

create_parent_dir=False, dir_mode=None):

1009

self._transport = transport

1010

self._filename = filename

1011

self._mode = mode

1012

self._file_mode = file_mode

1013

self._dir_mode = dir_mode

1014

self._create_parent_dir = create_parent_dir

1015

self._need_to_create = False

1016

1017

def check_header(self, fp):

1018

line = fp.readline()

1019

if line == '':

1020

# An empty file can actually be treated as though the file doesn't

1021

# exist yet.

1022

raise errors.NoSuchFile(self._transport.base + self._filename)

1023

if line != self.HEADER:

1024

raise KnitHeaderError(badline=line,

1025

filename=self._transport.abspath(self._filename))

1026

1027

def commit(self):

1028

"""Commit is a nop."""

1029

1030

def __repr__(self):

1031

return '%s(%s)' % (self.__class__.__name__, self._filename)

1032

1033

1034

class _KnitIndex(_KnitComponentFile):

1035

"""Manages knit index file.

1036

1037

The index is already kept in memory and read on startup, to enable

1038

fast lookups of revision information. The cursor of the index

1039

file is always pointing to the end, making it easy to append

1040

entries.

1041

1042

_cache is a cache for fast mapping from version id to a Index

1043

object.

1044

1045

_history is a cache for fast mapping from indexes to version ids.

1046

1047

The index data format is dictionary compressed when it comes to

1048

parent references; a index entry may only have parents that with a

1049

lover index number. As a result, the index is topological sorted.

1050

1051

Duplicate entries may be written to the index for a single version id

1052

if this is done then the latter one completely replaces the former:

1053

this allows updates to correct version and parent information.

1054

Note that the two entries may share the delta, and that successive

1055

annotations and references MUST point to the first entry.

1056

1057

The index file on disc contains a header, followed by one line per knit

1058

record. The same revision can be present in an index file more than once.

1059

The first occurrence gets assigned a sequence number starting from 0.

1060

1061

The format of a single line is

1062

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1063

REVISION_ID is a utf8-encoded revision id

1064

FLAGS is a comma separated list of flags about the record. Values include

1065

no-eol, line-delta, fulltext.

1066

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1067

that the the compressed data starts at.

1068

LENGTH is the ascii representation of the length of the data file.

1069

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1070

REVISION_ID.

1071

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1072

revision id already in the knit that is a parent of REVISION_ID.

1073

The ' :' marker is the end of record marker.

1074

1075

partial writes:

1076

when a write is interrupted to the index file, it will result in a line that

1077

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1078

the end of the file, then the record that is missing it will be ignored by

1079

the parser.

1080

1081

When writing new records to the index file, the data is preceded by '\n'

1082

to ensure that records always start on new lines even if the last write was

1083

interrupted. As a result its normal for the last line in the index to be

1084

missing a trailing newline. One can be added with no harmful effects.

1085

"""

1086

1087

HEADER = "# bzr knit index 8\n"

1088

1089

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1090

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1091

1092

def _cache_version(self, version_id, options, pos, size, parents):

1093

"""Cache a version record in the history array and index cache.

1094

1095

This is inlined into __init__ for performance. KEEP IN SYNC.

1096

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1097

indexes).

1098

"""

1099

# only want the _history index to reference the 1st index entry

1100

# for version_id

1101

if version_id not in self._cache:

1102

index = len(self._history)

1103

self._history.append(version_id)

1104

else:

1105

index = self._cache[version_id][5]

1106

self._cache[version_id] = (version_id,

1107

options,

1108

pos,

1109

size,

1110

parents,

1111

index)

1112

1113

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1114

create_parent_dir=False, delay_create=False, dir_mode=None):

1115

_KnitComponentFile.__init__(self, transport, filename, mode,

1116

file_mode=file_mode,

1117

create_parent_dir=create_parent_dir,

1118

dir_mode=dir_mode)

1119

self._cache = {}

1120

# position in _history is the 'official' index for a revision

1121

# but the values may have come from a newer entry.

1122

# so - wc -l of a knit index is != the number of unique names

1123

# in the knit.

1124

self._history = []

1125

decode_utf8 = cache_utf8.decode

1126

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1127

try:

1128

count = 0

1129

total = 1

1130

try:

1131

pb.update('read knit index', count, total)

1132

fp = self._transport.get(self._filename)

1133

try:

1134

self.check_header(fp)

1135

# readlines reads the whole file at once:

1136

# bad for transports like http, good for local disk

1137

# we save 60 ms doing this one change (

1138

# from calling readline each time to calling

1139

# readlines once.

1140

# probably what we want for nice behaviour on

1141

# http is a incremental readlines that yields, or

1142

# a check for local vs non local indexes,

1143

for l in fp.readlines():

1144

rec = l.split()

1145

if len(rec) < 5 or rec[-1] != ':':

1146

# corrupt line.

1147

# FIXME: in the future we should determine if its a

1148

# short write - and ignore it

1149

# or a different failure, and raise. RBC 20060407

1150

continue

1151

count += 1

1152

total += 1

1153

#pb.update('read knit index', count, total)

1154

# See self._parse_parents

1155

parents = []

1156

for value in rec[4:-1]:

1157

if '.' == value[0]:

1158

# uncompressed reference

1159

parents.append(decode_utf8(value[1:]))

1160

else:

1161

# this is 15/4000ms faster than isinstance,

1162

# (in lsprof)

1163

# this function is called thousands of times a

1164

# second so small variations add up.

1165

assert value.__class__ is str

1166

parents.append(self._history[int(value)])

1167

# end self._parse_parents

1168

# self._cache_version(decode_utf8(rec[0]),

1169

# rec[1].split(','),

1170

# int(rec[2]),

1171

# int(rec[3]),

1172

# parents)

1173

# --- self._cache_version

1174

# only want the _history index to reference the 1st

1175

# index entry for version_id

1176

version_id = decode_utf8(rec[0])

1177

if version_id not in self._cache:

1178

index = len(self._history)

1179

self._history.append(version_id)

1180

else:

1181

index = self._cache[version_id][5]

1182

self._cache[version_id] = (version_id,

1183

rec[1].split(','),

1184

int(rec[2]),

1185

int(rec[3]),

1186

parents,

1187

index)

1188

# --- self._cache_version

1189

finally:

1190

fp.close()

1191

except NoSuchFile, e:

1192

if mode != 'w' or not create:

1193

raise

1194

if delay_create:

1195

self._need_to_create = True

1196

else:

1197

self._transport.put_bytes_non_atomic(self._filename,

1198

self.HEADER, mode=self._file_mode)

1199

1200

finally:

1201

pb.update('read knit index', total, total)

1202

pb.finished()

1203

1204

def _parse_parents(self, compressed_parents):

1205

"""convert a list of string parent values into version ids.

1206

1207

ints are looked up in the index.

1208

.FOO values are ghosts and converted in to FOO.

1209

1210

NOTE: the function is retained here for clarity, and for possible

1211

use in partial index reads. However bulk processing now has

1212

it inlined in __init__ for inner-loop optimisation.

1213

"""

1214

result = []

1215

for value in compressed_parents:

1216

if value[-1] == '.':

1217

# uncompressed reference

1218

result.append(cache_utf8.decode_utf8(value[1:]))

1219

else:

1220

# this is 15/4000ms faster than isinstance,

1221

# this function is called thousands of times a

1222

# second so small variations add up.

1223

assert value.__class__ is str

1224

result.append(self._history[int(value)])

1225

return result

1226

1227

def get_graph(self):

1228

graph = []

1229

for version_id, index in self._cache.iteritems():

1230

graph.append((version_id, index[4]))

1231

return graph

1232

1233

def get_ancestry(self, versions):

1234

"""See VersionedFile.get_ancestry."""

1235

# get a graph of all the mentioned versions:

1236

graph = {}

1237

pending = set(versions)

1238

while len(pending):

1239

version = pending.pop()

1240

parents = self._cache[version][4]

1241

# got the parents ok

1242

# trim ghosts

1243

parents = [parent for parent in parents if parent in self._cache]

1244

for parent in parents:

1245

# if not completed and not a ghost

1246

if parent not in graph:

1247

pending.add(parent)

1248

graph[version] = parents

1249

return topo_sort(graph.items())

1250

1251

def get_ancestry_with_ghosts(self, versions):

1252

"""See VersionedFile.get_ancestry_with_ghosts."""

1253

# get a graph of all the mentioned versions:

1254

graph = {}

1255

pending = set(versions)

1256

while len(pending):

1257

version = pending.pop()

1258

try:

1259

parents = self._cache[version][4]

1260

except KeyError:

1261

# ghost, fake it

1262

graph[version] = []

1263

pass

1264

else:

1265

# got the parents ok

1266

for parent in parents:

1267

if parent not in graph:

1268

pending.add(parent)

1269

graph[version] = parents

1270

return topo_sort(graph.items())

1271

1272

def num_versions(self):

1273

return len(self._history)

1274

1275

__len__ = num_versions

1276

1277

def get_versions(self):

1278

return self._history

1279

1280

def idx_to_name(self, idx):

1281

return self._history[idx]

1282

1283

def lookup(self, version_id):

1284

assert version_id in self._cache

1285

return self._cache[version_id][5]

1286

1287

def _version_list_to_index(self, versions):

1288

encode_utf8 = cache_utf8.encode

1289

result_list = []

1290

for version in versions:

1291

if version in self._cache:

1292

# -- inlined lookup() --

1293

result_list.append(str(self._cache[version][5]))

1294

# -- end lookup () --

1295

else:

1296

result_list.append('.' + encode_utf8(version))

1297

return ' '.join(result_list)

1298

1299

def add_version(self, version_id, options, pos, size, parents):

1300

"""Add a version record to the index."""

1301

self.add_versions(((version_id, options, pos, size, parents),))

1302

1303

def add_versions(self, versions):

1304

"""Add multiple versions to the index.

1305

1306

:param versions: a list of tuples:

1307

(version_id, options, pos, size, parents).

1308

"""

1309

lines = []

1310

encode_utf8 = cache_utf8.encode

1311

orig_history = self._history[:]

1312

orig_cache = self._cache.copy()

1313

1314

try:

1315

for version_id, options, pos, size, parents in versions:

1316

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1317

','.join(options),

1318

pos,

1319

size,

1320

self._version_list_to_index(parents))

1321

assert isinstance(line, str), \

1322

'content must be utf-8 encoded: %r' % (line,)

1323

lines.append(line)

1324

self._cache_version(version_id, options, pos, size, parents)

1325

if not self._need_to_create:

1326

self._transport.append_bytes(self._filename, ''.join(lines))

1327

else:

1328

sio = StringIO()

1329

sio.write(self.HEADER)

1330

sio.writelines(lines)

1331

sio.seek(0)

1332

self._transport.put_file_non_atomic(self._filename, sio,

1333

create_parent_dir=self._create_parent_dir,

1334

mode=self._file_mode,

1335

dir_mode=self._dir_mode)

1336

self._need_to_create = False

1337

except:

1338

# If any problems happen, restore the original values and re-raise

1339

self._history = orig_history

1340

self._cache = orig_cache

1341

raise

1342

1343

def has_version(self, version_id):

1344

"""True if the version is in the index."""

1345

return (version_id in self._cache)

1346

1347

def get_position(self, version_id):

1348

"""Return data position and size of specified version."""

1349

return (self._cache[version_id][2], \

1350

self._cache[version_id][3])

1351

1352

def get_method(self, version_id):

1353

"""Return compression method of specified version."""

1354

options = self._cache[version_id][1]

1355

if 'fulltext' in options:

1356

return 'fulltext'

1357

else:

1358

assert 'line-delta' in options

1359

return 'line-delta'

1360

1361

def get_options(self, version_id):

1362

return self._cache[version_id][1]

1363

1364

def get_parents(self, version_id):

1365

"""Return parents of specified version ignoring ghosts."""

1366

return [parent for parent in self._cache[version_id][4]

1367

if parent in self._cache]

1368

1369

def get_parents_with_ghosts(self, version_id):

1370

"""Return parents of specified version with ghosts."""

1371

return self._cache[version_id][4]

1372

1373

def check_versions_present(self, version_ids):

1374

"""Check that all specified versions are present."""

1375

version_ids = set(version_ids)

1376

for version_id in list(version_ids):

1377

if version_id in self._cache:

1378

version_ids.remove(version_id)

1379

if version_ids:

1380

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1381

1382

1383

class _KnitData(_KnitComponentFile):

1384

"""Contents of the knit data file"""

1385

1386

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1387

create_parent_dir=False, delay_create=False,

1388

dir_mode=None):

1389

_KnitComponentFile.__init__(self, transport, filename, mode,

1390

file_mode=file_mode,

1391

create_parent_dir=create_parent_dir,

1392

dir_mode=dir_mode)

1393

self._checked = False

1394

# TODO: jam 20060713 conceptually, this could spill to disk

1395

# if the cached size gets larger than a certain amount

1396

# but it complicates the model a bit, so for now just use

1397

# a simple dictionary

1398

self._cache = {}

1399

self._do_cache = False

1400

if create:

1401

if delay_create:

1402

self._need_to_create = create

1403

else:

1404

self._transport.put_bytes_non_atomic(self._filename, '',

1405

mode=self._file_mode)

1406

1407

def enable_cache(self):

1408

"""Enable caching of reads."""

1409

self._do_cache = True

1410

1411

def clear_cache(self):

1412

"""Clear the record cache."""

1413

self._do_cache = False

1414

self._cache = {}

1415

1416

def _open_file(self):

1417

try:

1418

return self._transport.get(self._filename)

1419

except NoSuchFile:

1420

pass

1421

return None

1422

1423

def _record_to_data(self, version_id, digest, lines):

1424

"""Convert version_id, digest, lines into a raw data block.

1425

1426

:return: (len, a StringIO instance with the raw data ready to read.)

1427

"""

1428

sio = StringIO()

1429

data_file = GzipFile(None, mode='wb', fileobj=sio)

1430

1431

version_id_utf8 = cache_utf8.encode(version_id)

1432

data_file.writelines(chain(

1433

["version %s %d %s\n" % (version_id_utf8,

1434

len(lines),

1435

digest)],

1436

lines,

1437

["end %s\n" % version_id_utf8]))

1438

data_file.close()

1439

length= sio.tell()

1440

1441

sio.seek(0)

1442

return length, sio

1443

1444

def add_raw_record(self, raw_data):

1445

"""Append a prepared record to the data file.

1446

1447

:return: the offset in the data file raw_data was written.

1448

"""

1449

assert isinstance(raw_data, str), 'data must be plain bytes'

1450

if not self._need_to_create:

1451

return self._transport.append_bytes(self._filename, raw_data)

1452

else:

1453

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1454

create_parent_dir=self._create_parent_dir,

1455

mode=self._file_mode,

1456

dir_mode=self._dir_mode)

1457

self._need_to_create = False

1458

return 0

1459

1460

def add_record(self, version_id, digest, lines):

1461

"""Write new text record to disk. Returns the position in the

1462

file where it was written."""

1463

size, sio = self._record_to_data(version_id, digest, lines)

1464

# write to disk

1465

if not self._need_to_create:

1466

start_pos = self._transport.append_file(self._filename, sio)

1467

else:

1468

self._transport.put_file_non_atomic(self._filename, sio,

1469

create_parent_dir=self._create_parent_dir,

1470

mode=self._file_mode,

1471

dir_mode=self._dir_mode)

1472

self._need_to_create = False

1473

start_pos = 0

1474

if self._do_cache:

1475

self._cache[version_id] = sio.getvalue()

1476

return start_pos, size

1477

1478

def _parse_record_header(self, version_id, raw_data):

1479

"""Parse a record header for consistency.

1480

1481

:return: the header and the decompressor stream.

1482

as (stream, header_record)

1483

"""

1484

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1485

rec = df.readline().split()

1486

if len(rec) != 4:

1487

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1488

if cache_utf8.decode(rec[1]) != version_id:

1489

raise KnitCorrupt(self._filename,

1490

'unexpected version, wanted %r, got %r' % (

1491

version_id, rec[1]))

1492

return df, rec

1493

1494

def _parse_record(self, version_id, data):

1495

# profiling notes:

1496

# 4168 calls in 2880 217 internal

1497

# 4168 calls to _parse_record_header in 2121

1498

# 4168 calls to readlines in 330

1499

df, rec = self._parse_record_header(version_id, data)

1500

record_contents = df.readlines()

1501

l = record_contents.pop()

1502

assert len(record_contents) == int(rec[2])

1503

if l != 'end %s\n' % cache_utf8.encode(version_id):

1504

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1505

% (l, version_id))

1506

df.close()

1507

return record_contents, rec[3]

1508

1509

def read_records_iter_raw(self, records):

1510

"""Read text records from data file and yield raw data.

1511

1512

This unpacks enough of the text record to validate the id is

1513

as expected but thats all.

1514

"""

1515

# setup an iterator of the external records:

1516

# uses readv so nice and fast we hope.

1517

if len(records):

1518

# grab the disk data needed.

1519

if self._cache:

1520

# Don't check _cache if it is empty

1521

needed_offsets = [(pos, size) for version_id, pos, size

1522

in records

1523

if version_id not in self._cache]

1524

else:

1525

needed_offsets = [(pos, size) for version_id, pos, size

1526

in records]

1527

1528

raw_records = self._transport.readv(self._filename, needed_offsets)

1529

1530

1531

for version_id, pos, size in records:

1532

if version_id in self._cache:

1533

# This data has already been validated

1534

data = self._cache[version_id]

1535

else:

1536

pos, data = raw_records.next()

1537

if self._do_cache:

1538

self._cache[version_id] = data

1539

1540

# validate the header

1541

df, rec = self._parse_record_header(version_id, data)

1542

df.close()

1543

yield version_id, data

1544

1545

def read_records_iter(self, records):

1546

"""Read text records from data file and yield result.

1547

1548

The result will be returned in whatever is the fastest to read.

1549

Not by the order requested. Also, multiple requests for the same

1550

record will only yield 1 response.

1551

:param records: A list of (version_id, pos, len) entries

1552

:return: Yields (version_id, contents, digest) in the order

1553

read, not the order requested

1554

"""

1555

if not records:

1556

return

1557

1558

if self._cache:

1559

# Skip records we have alread seen

1560

yielded_records = set()

1561

needed_records = set()

1562

for record in records:

1563

if record[0] in self._cache:

1564

if record[0] in yielded_records:

1565

continue

1566

yielded_records.add(record[0])

1567

data = self._cache[record[0]]

1568

content, digest = self._parse_record(record[0], data)

1569

yield (record[0], content, digest)

1570

else:

1571

needed_records.add(record)

1572

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1573

else:

1574

needed_records = sorted(set(records), key=operator.itemgetter(1))

1575

1576

if not needed_records:

1577

return

1578

1579

# The transport optimizes the fetching as well

1580

# (ie, reads continuous ranges.)

1581

readv_response = self._transport.readv(self._filename,

1582

[(pos, size) for version_id, pos, size in needed_records])

1583

1584

for (version_id, pos, size), (pos, data) in \

1585

izip(iter(needed_records), readv_response):

1586

content, digest = self._parse_record(version_id, data)

1587

if self._do_cache:

1588

self._cache[version_id] = data

1589

yield version_id, content, digest

1590

1591

def read_records(self, records):

1592

"""Read records into a dictionary."""

1593

components = {}

1594

for record_id, content, digest in \

1595

self.read_records_iter(records):

1596

components[record_id] = (content, digest)

1597

return components

1598

1599

1600

class InterKnit(InterVersionedFile):

1601

"""Optimised code paths for knit to knit operations."""

1602

1603

_matching_file_from_factory = KnitVersionedFile

1604

_matching_file_to_factory = KnitVersionedFile

1605

1606

@staticmethod

1607

def is_compatible(source, target):

1608

"""Be compatible with knits. """

1609

try:

1610

return (isinstance(source, KnitVersionedFile) and

1611

isinstance(target, KnitVersionedFile))

1612

except AttributeError:

1613

return False

1614

1615

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1616

"""See InterVersionedFile.join."""

1617

assert isinstance(self.source, KnitVersionedFile)

1618

assert isinstance(self.target, KnitVersionedFile)

1619

1620

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1621

1622

if not version_ids:

1623

return 0

1624

1625

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1626

try:

1627

version_ids = list(version_ids)

1628

if None in version_ids:

1629

version_ids.remove(None)

1630

1631

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1632

this_versions = set(self.target._index.get_versions())

1633

needed_versions = self.source_ancestry - this_versions

1634

cross_check_versions = self.source_ancestry.intersection(this_versions)

1635

mismatched_versions = set()

1636

for version in cross_check_versions:

1637

# scan to include needed parents.

1638

n1 = set(self.target.get_parents_with_ghosts(version))

1639

n2 = set(self.source.get_parents_with_ghosts(version))

1640

if n1 != n2:

1641

# FIXME TEST this check for cycles being introduced works

1642

# the logic is we have a cycle if in our graph we are an

1643

# ancestor of any of the n2 revisions.

1644

for parent in n2:

1645

if parent in n1:

1646

# safe

1647

continue

1648

else:

1649

parent_ancestors = self.source.get_ancestry(parent)

1650

if version in parent_ancestors:

1651

raise errors.GraphCycleError([parent, version])

1652

# ensure this parent will be available later.

1653

new_parents = n2.difference(n1)

1654

needed_versions.update(new_parents.difference(this_versions))

1655

mismatched_versions.add(version)

1656

1657

if not needed_versions and not mismatched_versions:

1658

return 0

1659

full_list = topo_sort(self.source.get_graph())

1660

1661

version_list = [i for i in full_list if (not self.target.has_version(i)

1662

and i in needed_versions)]

1663

1664

# plan the join:

1665

copy_queue = []

1666

copy_queue_records = []

1667

copy_set = set()

1668

for version_id in version_list:

1669

options = self.source._index.get_options(version_id)

1670

parents = self.source._index.get_parents_with_ghosts(version_id)

1671

# check that its will be a consistent copy:

1672

for parent in parents:

1673

# if source has the parent, we must :

1674

# * already have it or

1675

# * have it scheduled already

1676

# otherwise we don't care

1677

assert (self.target.has_version(parent) or

1678

parent in copy_set or

1679

not self.source.has_version(parent))

1680

data_pos, data_size = self.source._index.get_position(version_id)

1681

copy_queue_records.append((version_id, data_pos, data_size))

1682

copy_queue.append((version_id, options, parents))

1683

copy_set.add(version_id)

1684

1685

# data suck the join:

1686

count = 0

1687

total = len(version_list)

1688

raw_datum = []

1689

raw_records = []

1690

for (version_id, raw_data), \

1691

(version_id2, options, parents) in \

1692

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1693

copy_queue):

1694

assert version_id == version_id2, 'logic error, inconsistent results'

1695

count = count + 1

1696

pb.update("Joining knit", count, total)

1697

raw_records.append((version_id, options, parents, len(raw_data)))

1698

raw_datum.append(raw_data)

1699

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1700

1701

for version in mismatched_versions:

1702

# FIXME RBC 20060309 is this needed?

1703

n1 = set(self.target.get_parents_with_ghosts(version))

1704

n2 = set(self.source.get_parents_with_ghosts(version))

1705

# write a combined record to our history preserving the current

1706

# parents as first in the list

1707

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1708

self.target.fix_parents(version, new_parents)

1709

return count

1710

finally:

1711

pb.finished()

1712

1713

1714

InterVersionedFile.register_optimiser(InterKnit)

1715

1716

1717

class WeaveToKnit(InterVersionedFile):

1718

"""Optimised code paths for weave to knit operations."""

1719

1720

_matching_file_from_factory = bzrlib.weave.WeaveFile

1721

_matching_file_to_factory = KnitVersionedFile

1722

1723

@staticmethod

1724

def is_compatible(source, target):

1725

"""Be compatible with weaves to knits."""

1726

try:

1727

return (isinstance(source, bzrlib.weave.Weave) and

1728

isinstance(target, KnitVersionedFile))

1729

except AttributeError:

1730

return False

1731

1732

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1733

"""See InterVersionedFile.join."""

1734

assert isinstance(self.source, bzrlib.weave.Weave)

1735

assert isinstance(self.target, KnitVersionedFile)

1736

1737

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1738

1739

if not version_ids:

1740

return 0

1741

1742

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1743

try:

1744

version_ids = list(version_ids)

1745

1746

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1747

this_versions = set(self.target._index.get_versions())

1748

needed_versions = self.source_ancestry - this_versions

1749

cross_check_versions = self.source_ancestry.intersection(this_versions)

1750

mismatched_versions = set()

1751

for version in cross_check_versions:

1752

# scan to include needed parents.

1753

n1 = set(self.target.get_parents_with_ghosts(version))

1754

n2 = set(self.source.get_parents(version))

1755

# if all of n2's parents are in n1, then its fine.

1756

if n2.difference(n1):

1757

# FIXME TEST this check for cycles being introduced works

1758

# the logic is we have a cycle if in our graph we are an

1759

# ancestor of any of the n2 revisions.

1760

for parent in n2:

1761

if parent in n1:

1762

# safe

1763

continue

1764

else:

1765

parent_ancestors = self.source.get_ancestry(parent)

1766

if version in parent_ancestors:

1767

raise errors.GraphCycleError([parent, version])

1768

# ensure this parent will be available later.

1769

new_parents = n2.difference(n1)

1770

needed_versions.update(new_parents.difference(this_versions))

1771

mismatched_versions.add(version)

1772

1773

if not needed_versions and not mismatched_versions:

1774

return 0

1775

full_list = topo_sort(self.source.get_graph())

1776

1777

version_list = [i for i in full_list if (not self.target.has_version(i)

1778

and i in needed_versions)]

1779

1780

# do the join:

1781

count = 0

1782

total = len(version_list)

1783

for version_id in version_list:

1784

pb.update("Converting to knit", count, total)

1785

parents = self.source.get_parents(version_id)

1786

# check that its will be a consistent copy:

1787

for parent in parents:

1788

# if source has the parent, we must already have it

1789

assert (self.target.has_version(parent))

1790

self.target.add_lines(

1791

version_id, parents, self.source.get_lines(version_id))

1792

count = count + 1

1793

1794

for version in mismatched_versions:

1795

# FIXME RBC 20060309 is this needed?

1796

n1 = set(self.target.get_parents_with_ghosts(version))

1797

n2 = set(self.source.get_parents(version))

1798

# write a combined record to our history preserving the current

1799

# parents as first in the list

1800

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1801

self.target.fix_parents(version, new_parents)

1802

return count

1803

finally:

1804

pb.finished()

1805

1806

1807

InterVersionedFile.register_optimiser(WeaveToKnit)

1808

1809

1810

class KnitSequenceMatcher(difflib.SequenceMatcher):

1811

"""Knit tuned sequence matcher.

1812

1813

This is based on profiling of difflib which indicated some improvements

1814

for our usage pattern.

1815

"""

1816

1817

def find_longest_match(self, alo, ahi, blo, bhi):

1818

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1819

1820

If isjunk is not defined:

1821

1822

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1823

alo <= i <= i+k <= ahi

1824

blo <= j <= j+k <= bhi

1825

and for all (i',j',k') meeting those conditions,

1826

k >= k'

1827

i <= i'

1828

and if i == i', j <= j'

1829

1830

In other words, of all maximal matching blocks, return one that

1831

starts earliest in a, and of all those maximal matching blocks that

1832

start earliest in a, return the one that starts earliest in b.

1833

1834

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1835

>>> s.find_longest_match(0, 5, 0, 9)

1836

(0, 4, 5)

1837

1838

If isjunk is defined, first the longest matching block is

1839

determined as above, but with the additional restriction that no

1840

junk element appears in the block. Then that block is extended as

1841

far as possible by matching (only) junk elements on both sides. So

1842

the resulting block never matches on junk except as identical junk

1843

happens to be adjacent to an "interesting" match.

1844

1845

Here's the same example as before, but considering blanks to be

1846

junk. That prevents " abcd" from matching the " abcd" at the tail

1847

end of the second sequence directly. Instead only the "abcd" can

1848

match, and matches the leftmost "abcd" in the second sequence:

1849

1850

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1851

>>> s.find_longest_match(0, 5, 0, 9)

1852

(1, 0, 4)

1853

1854

If no blocks match, return (alo, blo, 0).

1855

1856

>>> s = SequenceMatcher(None, "ab", "c")

1857

>>> s.find_longest_match(0, 2, 0, 1)

1858

(0, 0, 0)

1859

"""

1860

1861

# CAUTION: stripping common prefix or suffix would be incorrect.

1862

# E.g.,

1863

# ab

1864

# acab

1865

# Longest matching block is "ab", but if common prefix is

1866

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1867

# strip, so ends up claiming that ab is changed to acab by

1868

# inserting "ca" in the middle. That's minimal but unintuitive:

1869

# "it's obvious" that someone inserted "ac" at the front.

1870

# Windiff ends up at the same place as diff, but by pairing up

1871

# the unique 'b's and then matching the first two 'a's.

1872

1873

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1874

besti, bestj, bestsize = alo, blo, 0

1875

# find longest junk-free match

1876

# during an iteration of the loop, j2len[j] = length of longest

1877

# junk-free match ending with a[i-1] and b[j]

1878

j2len = {}

1879

# nothing = []

1880

b2jget = b2j.get

1881

for i in xrange(alo, ahi):

1882

# look at all instances of a[i] in b; note that because

1883

# b2j has no junk keys, the loop is skipped if a[i] is junk

1884

j2lenget = j2len.get

1885

newj2len = {}

1886

1887

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1888

# following improvement

1889

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1890

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1891

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1892

# to

1893

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1894

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1895

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1896

1897

try:

1898

js = b2j[a[i]]

1899

except KeyError:

1900

pass

1901

else:

1902

for j in js:

1903

# a[i] matches b[j]

1904

if j >= blo:

1905

if j >= bhi:

1906

break

1907

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1908

if k > bestsize:

1909

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1910

j2len = newj2len

1911

1912

# Extend the best by non-junk elements on each end. In particular,

1913

# "popular" non-junk elements aren't in b2j, which greatly speeds

1914

# the inner loop above, but also means "the best" match so far

1915

# doesn't contain any junk *or* popular non-junk elements.

1916

while besti > alo and bestj > blo and \

1917

not isbjunk(b[bestj-1]) and \

1918

a[besti-1] == b[bestj-1]:

1919

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1920

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1921

not isbjunk(b[bestj+bestsize]) and \

1922

a[besti+bestsize] == b[bestj+bestsize]:

1923

bestsize += 1

1924

1925

# Now that we have a wholly interesting match (albeit possibly

1926

# empty!), we may as well suck up the matching junk on each

1927

# side of it too. Can't think of a good reason not to, and it

1928

# saves post-processing the (possibly considerable) expense of

1929

# figuring out what to do with it. In the case of an empty

1930

# interesting match, this is clearly the right thing to do,

1931

# because no other kind of match is possible in the regions.

1932

while besti > alo and bestj > blo and \

1933

isbjunk(b[bestj-1]) and \

1934

a[besti-1] == b[bestj-1]:

1935

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1936

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1937

isbjunk(b[bestj+bestsize]) and \

1938

a[besti+bestsize] == b[bestj+bestsize]:

1939

bestsize = bestsize + 1

1940

1941

return besti, bestj, bestsize

1942

Older »