/brz/remove-bazaar : revision 2022.1.1

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2006-09-20 14:51:03 UTC
mfrom: (0.8.23 version_info)
mto: This revision was merged to the branch mainline in revision 2028.
Revision ID: john@arbash-meinel.com-20060920145103-02725c6d6c886040

[merge] version-info plugin, and cleanup for layout in bzr

files added:
NEWS.developers

files removed:
bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/bundle/serializer/v09.py

bzrlib/debug.py

bzrlib/dirstate.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/lazy_regex.py

bzrlib/merge_directive.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/registry.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/strace.py

bzrlib/tag.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/wsgi.py

bzrlib/util/bencode.py

bzrlib/workingtree_4.py

bzrlib/xml7.py

doc/bazaar-vcs.org.kid

doc/http_smart_server.txt

doc/version_info.txt

man1

tools/rst2prettyhtml.py

tools/win32/info.txt

files renamed:
bzrlib/tests/test_commands.py => bzrlib/tests/test_command.py

bzrlib/transport/remote.py => bzrlib/transport/smart.py

bzrlib/cmd_version_info.py => bzrlib/version_info.py

bzrlib/win32utils.py => bzrlib/win32console.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

profile_imports.py

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

103

127

116

128

117

def annotate_iter(self):

129

118

"""Yield tuples of (origin, text) for each content line."""

130

return iter(self._lines)

119

for origin, text in self._lines:

120

yield origin, text

131

121

132

122

def annotate(self):

133

123

"""Return a list of (origin, text) tuples."""

135

125

136

126

def line_delta_iter(self, new_lines):

137

127

"""Generate line-based delta from this content to new_lines."""

138

new_texts = new_lines.text()

139

old_texts = self.text()

128

new_texts = [text for origin, text in new_lines._lines]

129

old_texts = [text for origin, text in self._lines]

140

130

s = KnitSequenceMatcher(None, old_texts, new_texts)

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

142

if tag == 'equal':

131

for op in s.get_opcodes():

132

if op[0] == 'equal':

143

133

continue

144

# ofrom, oto, length, data

145

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

134

# ofrom oto length data

135

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

146

136

147

137

def line_delta(self, new_lines):

148

138

return list(self.line_delta_iter(new_lines))

157

147

class _KnitFactory(object):

158

148

"""Base factory for creating content objects."""

159

149

160

def make(self, lines, version_id):

150

def make(self, lines, version):

161

151

num_lines = len(lines)

162

return KnitContent(zip([version_id] * num_lines, lines))

152

return KnitContent(zip([version] * num_lines, lines))

163

153

164

154

165

155

class KnitAnnotateFactory(_KnitFactory):

167

157

168

158

annotated = True

169

159

170

def parse_fulltext(self, content, version_id):

160

def parse_fulltext(self, content, version):

171

161

"""Convert fulltext to internal representation

172

162

173

163

fulltext content is of the format

175

165

internal representation is of the format:

176

166

(revid, plaintext)

177

167

"""

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

168

decode_utf8 = cache_utf8.decode

169

lines = []

170

for line in content:

171

origin, text = line.split(' ', 1)

172

lines.append((decode_utf8(origin), text))

183

173

return KnitContent(lines)

184

174

185

175

def parse_line_delta_iter(self, lines):

186

return iter(self.parse_line_delta(lines))

176

for result_item in self.parse_line_delta[lines]:

177

yield result_item

187

178

188

def parse_line_delta(self, lines, version_id):

179

def parse_line_delta(self, lines, version):

189

180

"""Convert a line based delta into internal representation.

190

181

191

182

line delta is in the form of:

195

186

internal representation is

196

187

(start, end, count, [1..count tuples (revid, newline)])

197

188

"""

189

decode_utf8 = cache_utf8.decode

198

190

result = []

199

191

lines = iter(lines)

200

192

next = lines.next

201

202

cache = {}

203

def cache_and_return(line):

204

origin, text = line.split(' ', 1)

205

return cache.setdefault(origin, origin), text

206

207

193

# walk through the lines parsing.

208

194

for header in lines:

209

195

start, end, count = [int(n) for n in header.split(',')]

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

196

contents = []

197

remaining = count

198

while remaining:

199

origin, text = next().split(' ', 1)

200

remaining -= 1

201

contents.append((decode_utf8(origin), text))

211

202

result.append((start, end, count, contents))

212

203

return result

213

204

214

def get_fulltext_content(self, lines):

215

"""Extract just the content lines from a fulltext."""

216

return (line.split(' ', 1)[1] for line in lines)

217

218

def get_linedelta_content(self, lines):

219

"""Extract just the content from a line delta.

220

221

This doesn't return all of the extra information stored in a delta.

222

Only the actual content lines.

223

"""

224

lines = iter(lines)

225

next = lines.next

226

for header in lines:

227

header = header.split(',')

228

count = int(header[2])

229

for i in xrange(count):

230

origin, text = next().split(' ', 1)

231

yield text

232

233

205

def lower_fulltext(self, content):

234

206

"""convert a fulltext content record into a serializable form.

235

207

236

208

see parse_fulltext which this inverts.

237

209

"""

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

210

encode_utf8 = cache_utf8.encode

211

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

241

212

242

213

def lower_line_delta(self, delta):

243

214

"""convert a delta into a serializable form.

244

215

245

216

See parse_line_delta which this inverts.

246

217

"""

247

# TODO: jam 20070209 We only do the caching thing to make sure that

248

# the origin is a valid utf-8 line, eventually we could remove it

218

encode_utf8 = cache_utf8.encode

249

219

out = []

250

220

for start, end, c, lines in delta:

251

221

out.append('%d,%d,%d\n' % (start, end, c))

252

out.extend(origin + ' ' + text

222

out.extend(encode_utf8(origin) + ' ' + text

253

223

for origin, text in lines)

254

224

return out

255

225

259

229

260

230

annotated = False

261

231

262

def parse_fulltext(self, content, version_id):

232

def parse_fulltext(self, content, version):

263

233

"""This parses an unannotated fulltext.

264

234

265

235

Note that this is not a noop - the internal representation

266

236

has (versionid, line) - its just a constant versionid.

267

237

"""

268

return self.make(content, version_id)

238

return self.make(content, version)

269

239

270

def parse_line_delta_iter(self, lines, version_id):

271

cur = 0

272

num_lines = len(lines)

273

while cur < num_lines:

274

header = lines[cur]

275

cur += 1

240

def parse_line_delta_iter(self, lines, version):

241

while lines:

242

header = lines.pop(0)

276

243

start, end, c = [int(n) for n in header.split(',')]

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

278

cur += c

279

280

def parse_line_delta(self, lines, version_id):

281

return list(self.parse_line_delta_iter(lines, version_id))

282

283

def get_fulltext_content(self, lines):

284

"""Extract just the content lines from a fulltext."""

285

return iter(lines)

286

287

def get_linedelta_content(self, lines):

288

"""Extract just the content from a line delta.

289

290

This doesn't return all of the extra information stored in a delta.

291

Only the actual content lines.

292

"""

293

lines = iter(lines)

294

next = lines.next

295

for header in lines:

296

header = header.split(',')

297

count = int(header[2])

298

for i in xrange(count):

299

yield next()

300

244

yield start, end, c, zip([version] * c, lines[:c])

245

del lines[:c]

246

247

def parse_line_delta(self, lines, version):

248

return list(self.parse_line_delta_iter(lines, version))

249

301

250

def lower_fulltext(self, content):

302

251

return content.text()

303

252

357

306

self.writable = (access_mode == 'w')

358

307

self.delta = delta

359

308

360

self._max_delta_chain = 200

361

362

309

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

363

310

access_mode, create=create, file_mode=file_mode,

364

311

create_parent_dir=create_parent_dir, delay_create=delay_create,

372

319

return '%s(%s)' % (self.__class__.__name__,

373

320

self.transport.abspath(self.filename))

374

321

375

def _check_should_delta(self, first_parents):

376

"""Iterate back through the parent listing, looking for a fulltext.

377

378

This is used when we want to decide whether to add a delta or a new

379

fulltext. It searches for _max_delta_chain parents. When it finds a

380

fulltext parent, it sees if the total size of the deltas leading up to

381

it is large enough to indicate that we want a new full text anyway.

382

383

Return True if we should create a new delta, False if we should use a

384

full text.

385

"""

386

delta_size = 0

387

fulltext_size = None

388

delta_parents = first_parents

389

for count in xrange(self._max_delta_chain):

390

parent = delta_parents[0]

391

method = self._index.get_method(parent)

392

pos, size = self._index.get_position(parent)

393

if method == 'fulltext':

394

fulltext_size = size

395

break

396

delta_size += size

397

delta_parents = self._index.get_parents(parent)

398

else:

399

# We couldn't find a fulltext, so we must create a new one

400

return False

401

402

return fulltext_size > delta_size

403

404

322

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

405

323

"""See VersionedFile._add_delta()."""

406

324

self._check_add(version_id, []) # should we check the lines ?

438

356

# To speed the extract of texts the delta chain is limited

439

357

# to a fixed number of deltas. This should minimize both

440

358

# I/O and the time spend applying deltas.

441

# The window was changed to a maximum of 200 deltas, but also added

442

# was a check that the total compressed size of the deltas is

443

# smaller than the compressed size of the fulltext.

444

if not self._check_should_delta([delta_parent]):

445

# We don't want a delta here, just do a normal insertion.

359

count = 0

360

delta_parents = [delta_parent]

361

while count < 25:

362

parent = delta_parents[0]

363

method = self._index.get_method(parent)

364

if method == 'fulltext':

365

break

366

delta_parents = self._index.get_parents(parent)

367

count = count + 1

368

if method == 'line-delta':

369

# did not find a fulltext in the delta limit.

370

# just do a normal insertion.

446

371

return super(KnitVersionedFile, self)._add_delta(version_id,

447

372

parents,

448

373

delta_parent,

503

428

return KnitVersionedFile(name, transport, factory=self.factory,

504

429

delta=self.delta, create=True)

505

430

506

def _fix_parents(self, version_id, new_parents):

431

def _fix_parents(self, version, new_parents):

507

432

"""Fix the parents list for version.

508

433

509

434

This is done by appending a new version to the index

511

436

the parents list must be a superset of the current

512

437

list.

513

438

"""

514

current_values = self._index._cache[version_id]

439

current_values = self._index._cache[version]

515

440

assert set(current_values[4]).difference(set(new_parents)) == set()

516

self._index.add_version(version_id,

441

self._index.add_version(version,

517

442

current_values[1],

518

443

current_values[2],

519

444

current_values[3],

521

446

522

447

def get_delta(self, version_id):

523

448

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

525

self.check_not_reserved_id(version_id)

526

449

if not self.has_version(version_id):

527

450

raise RevisionNotPresent(version_id, self.filename)

528

451

533

456

parent = None

534

457

data_pos, data_size = self._index.get_position(version_id)

535

458

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

459

version_idx = self._index.lookup(version_id)

536

460

noeol = 'no-eol' in self._index.get_options(version_id)

537

461

if 'fulltext' == self._index.get_method(version_id):

538

new_content = self.factory.parse_fulltext(data, version_id)

462

new_content = self.factory.parse_fulltext(data, version_idx)

539

463

if parent is not None:

540

464

reference_content = self._get_content(parent)

541

465

old_texts = reference_content.text()

545

469

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

546

470

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

547

471

else:

548

delta = self.factory.parse_line_delta(data, version_id)

472

delta = self.factory.parse_line_delta(data, version_idx)

549

473

return parent, sha1, noeol, delta

550

474

551

475

def get_graph_with_ghosts(self):

555

479

556

480

def get_sha1(self, version_id):

557

481

"""See VersionedFile.get_sha1()."""

558

version_id = osutils.safe_revision_id(version_id)

559

482

record_map = self._get_record_map([version_id])

560

483

method, content, digest, next = record_map[version_id]

561

484

return digest

567

490

568

491

def has_ghost(self, version_id):

569

492

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

571

493

# maybe we have it

572

494

if self.has_version(version_id):

573

495

return False

586

508

587

509

def has_version(self, version_id):

588

510

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

590

511

return self._index.has_version(version_id)

591

512

592

513

__contains__ = has_version

600

521

delta_seq = None

601

522

for parent_id in parents:

602

523

merge_content = self._get_content(parent_id, parent_texts)

603

seq = patiencediff.PatienceSequenceMatcher(

604

None, merge_content.text(), content.text())

524

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

605

525

if delta_seq is None:

606

526

# setup a delta seq to reuse.

607

527

delta_seq = seq

618

538

reference_content = self._get_content(parents[0], parent_texts)

619

539

new_texts = content.text()

620

540

old_texts = reference_content.text()

621

delta_seq = patiencediff.PatienceSequenceMatcher(

622

None, old_texts, new_texts)

541

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

623

542

return self._make_line_delta(delta_seq, content)

624

543

625

544

def _make_line_delta(self, delta_seq, new_content):

673

592

674

593

def _check_versions_present(self, version_ids):

675

594

"""Check that all specified versions are present."""

676

self._index.check_versions_present(version_ids)

595

version_ids = set(version_ids)

596

for r in list(version_ids):

597

if self._index.has_version(r):

598

version_ids.remove(r)

599

if version_ids:

600

raise RevisionNotPresent(list(version_ids)[0], self.filename)

677

601

678

602

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

679

603

"""See VersionedFile.add_lines_with_ghosts()."""

692

616

### FIXME escape. RBC 20060228

693

617

if contains_whitespace(version_id):

694

618

raise InvalidRevisionId(version_id, self.filename)

695

self.check_not_reserved_id(version_id)

696

619

if self.has_version(version_id):

697

620

raise RevisionAlreadyPresent(version_id, self.filename)

698

621

self._check_lines_not_unicode(lines)

742

665

# To speed the extract of texts the delta chain is limited

743

666

# to a fixed number of deltas. This should minimize both

744

667

# I/O and the time spend applying deltas.

745

delta = self._check_should_delta(present_parents)

668

count = 0

669

delta_parents = present_parents

670

while count < 25:

671

parent = delta_parents[0]

672

method = self._index.get_method(parent)

673

if method == 'fulltext':

674

break

675

delta_parents = self._index.get_parents(parent)

676

count = count + 1

677

if method == 'line-delta':

678

delta = False

746

679

747

assert isinstance(version_id, str)

748

680

lines = self.factory.make(lines, version_id)

749

681

if delta or (self.factory.annotated and len(present_parents) > 0):

750

682

# Merge annotations from parent texts if so is needed.

806

738

807

739

def get_line_list(self, version_ids):

808

740

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

810

for version_id in version_ids:

811

self.check_not_reserved_id(version_id)

812

741

text_map, content_map = self._get_content_maps(version_ids)

813

742

return [text_map[v] for v in version_ids]

814

743

842

771

if component_id in content_map:

843

772

content = content_map[component_id]

844

773

else:

774

version_idx = self._index.lookup(component_id)

845

775

if method == 'fulltext':

846

776

assert content is None

847

content = self.factory.parse_fulltext(data, version_id)

777

content = self.factory.parse_fulltext(data, version_idx)

848

778

elif method == 'line-delta':

849

delta = self.factory.parse_line_delta(data, version_id)

779

delta = self.factory.parse_line_delta(data[:],

780

version_idx)

850

781

content = content.copy()

851

782

content._lines = self._apply_delta(content._lines,

852

783

delta)

867

798

text_map[version_id] = text

868

799

return text_map, final_content

869

800

870

def iter_lines_added_or_present_in_versions(self, version_ids=None,

871

pb=None):

801

def iter_lines_added_or_present_in_versions(self, version_ids=None):

872

802

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

873

803

if version_ids is None:

874

804

version_ids = self.versions()

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

877

if pb is None:

878

pb = progress.DummyProgress()

879

805

# we don't care about inclusions, the caller cares.

880

806

# but we need to setup a list of records to visit.

881

807

# we need version_id, position, length

882

808

version_id_records = []

883

requested_versions = set(version_ids)

809

requested_versions = list(version_ids)

884

810

# filter for available versions

885

811

for version_id in requested_versions:

886

812

if not self.has_version(version_id):

887

813

raise RevisionNotPresent(version_id, self.filename)

888

814

# get a in-component-order queue:

815

version_ids = []

889

816

for version_id in self.versions():

890

817

if version_id in requested_versions:

818

version_ids.append(version_id)

891

819

data_pos, length = self._index.get_position(version_id)

892

820

version_id_records.append((version_id, data_pos, length))

893

821

822

pb = bzrlib.ui.ui_factory.nested_progress_bar()

823

count = 0

894

824

total = len(version_id_records)

895

for version_idx, (version_id, data, sha_value) in \

896

enumerate(self._data.read_records_iter(version_id_records)):

897

pb.update('Walking content.', version_idx, total)

898

method = self._index.get_method(version_id)

899

900

assert method in ('fulltext', 'line-delta')

901

if method == 'fulltext':

902

line_iterator = self.factory.get_fulltext_content(data)

903

else:

904

line_iterator = self.factory.get_linedelta_content(data)

905

for line in line_iterator:

906

yield line

907

908

pb.update('Walking content.', total, total)

825

try:

826

pb.update('Walking content.', count, total)

827

for version_id, data, sha_value in \

828

self._data.read_records_iter(version_id_records):

829

pb.update('Walking content.', count, total)

830

method = self._index.get_method(version_id)

831

version_idx = self._index.lookup(version_id)

832

assert method in ('fulltext', 'line-delta')

833

if method == 'fulltext':

834

content = self.factory.parse_fulltext(data, version_idx)

835

for line in content.text():

836

yield line

837

else:

838

delta = self.factory.parse_line_delta(data, version_idx)

839

for start, end, count, lines in delta:

840

for origin, line in lines:

841

yield line

842

count +=1

843

pb.update('Walking content.', total, total)

844

pb.finished()

845

except:

846

pb.update('Walking content.', total, total)

847

pb.finished()

848

raise

909

849

910

850

def num_versions(self):

911

851

"""See VersionedFile.num_versions()."""

915

855

916

856

def annotate_iter(self, version_id):

917

857

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

919

858

content = self._get_content(version_id)

920

859

for origin, text in content.annotate_iter():

921

860

yield origin, text

925

864

# perf notes:

926

865

# optimism counts!

927

866

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

929

867

try:

930

868

return self._index.get_parents(version_id)

931

869

except KeyError:

933

871

934

872

def get_parents_with_ghosts(self, version_id):

935

873

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

937

874

try:

938

875

return self._index.get_parents_with_ghosts(version_id)

939

876

except KeyError:

945

882

versions = [versions]

946

883

if not versions:

947

884

return []

948

versions = [osutils.safe_revision_id(v) for v in versions]

885

self._check_versions_present(versions)

949

886

return self._index.get_ancestry(versions)

950

887

951

888

def get_ancestry_with_ghosts(self, versions):

954

891

versions = [versions]

955

892

if not versions:

956

893

return []

957

versions = [osutils.safe_revision_id(v) for v in versions]

894

self._check_versions_present(versions)

958

895

return self._index.get_ancestry_with_ghosts(versions)

959

896

960

897

#@deprecated_method(zero_eight)

980

917

981

918

def plan_merge(self, ver_a, ver_b):

982

919

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

985

920

ancestors_b = set(self.get_ancestry(ver_b))

986

921

def status_a(revision, text):

987

922

if revision in ancestors_b:

1033

968

self._create_parent_dir = create_parent_dir

1034

969

self._need_to_create = False

1035

970

1036

def _full_path(self):

1037

"""Return the full path to this file."""

1038

return self._transport.base + self._filename

1039

1040

971

def check_header(self, fp):

1041

972

line = fp.readline()

1042

if line == '':

1043

# An empty file can actually be treated as though the file doesn't

1044

# exist yet.

1045

raise errors.NoSuchFile(self._full_path())

1046

973

if line != self.HEADER:

1047

raise KnitHeaderError(badline=line,

1048

filename=self._transport.abspath(self._filename))

974

raise KnitHeaderError(badline=line)

1049

975

1050

976

def commit(self):

1051

977

"""Commit is a nop."""

1096

1022

The ' :' marker is the end of record marker.

1097

1023

1098

1024

partial writes:

1099

when a write is interrupted to the index file, it will result in a line

1100

that does not end in ' :'. If the ' :' is not present at the end of a line,

1101

or at the end of the file, then the record that is missing it will be

1102

ignored by the parser.

1025

when a write is interrupted to the index file, it will result in a line that

1026

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1027

the end of the file, then the record that is missing it will be ignored by

1028

the parser.

1103

1029

1104

1030

When writing new records to the index file, the data is preceded by '\n'

1105

1031

to ensure that records always start on new lines even if the last write was

1114

1040

1115

1041

def _cache_version(self, version_id, options, pos, size, parents):

1116

1042

"""Cache a version record in the history array and index cache.

1117

1118

This is inlined into _load_data for performance. KEEP IN SYNC.

1043

1044

This is inlined into __init__ for performance. KEEP IN SYNC.

1119

1045

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1120

1046

indexes).

1121

1047

"""

1126

1052

self._history.append(version_id)

1127

1053

else:

1128

1054

index = self._cache[version_id][5]

1129

self._cache[version_id] = (version_id,

1055

self._cache[version_id] = (version_id,

1130

1056

options,

1131

1057

pos,

1132

1058

size,

1145

1071

# so - wc -l of a knit index is != the number of unique names

1146

1072

# in the knit.

1147

1073

self._history = []

1074

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1148

1075

try:

1149

fp = self._transport.get(self._filename)

1076

count = 0

1077

total = 1

1150

1078

try:

1151

# _load_data may raise NoSuchFile if the target knit is

1152

# completely empty.

1153

self._load_data(fp)

1154

finally:

1155

fp.close()

1156

except NoSuchFile:

1157

if mode != 'w' or not create:

1158

raise

1159

elif delay_create:

1160

self._need_to_create = True

1161

else:

1162

self._transport.put_bytes_non_atomic(

1163

self._filename, self.HEADER, mode=self._file_mode)

1164

1165

def _load_data(self, fp):

1166

cache = self._cache

1167

history = self._history

1168

1169

self.check_header(fp)

1170

# readlines reads the whole file at once:

1171

# bad for transports like http, good for local disk

1172

# we save 60 ms doing this one change (

1173

# from calling readline each time to calling

1174

# readlines once.

1175

# probably what we want for nice behaviour on

1176

# http is a incremental readlines that yields, or

1177

# a check for local vs non local indexes,

1178

history_top = len(history) - 1

1179

for line in fp.readlines():

1180

rec = line.split()

1181

if len(rec) < 5 or rec[-1] != ':':

1182

# corrupt line.

1183

# FIXME: in the future we should determine if its a

1184

# short write - and ignore it

1185

# or a different failure, and raise. RBC 20060407

1186

continue

1187

1188

parents = []

1189

for value in rec[4:-1]:

1190

if value[0] == '.':

1191

# uncompressed reference

1192

parent_id = value[1:]

1079

pb.update('read knit index', count, total)

1080

fp = self._transport.get(self._filename)

1081

try:

1082

self.check_header(fp)

1083

# readlines reads the whole file at once:

1084

# bad for transports like http, good for local disk

1085

# we save 60 ms doing this one change (

1086

# from calling readline each time to calling

1087

# readlines once.

1088

# probably what we want for nice behaviour on

1089

# http is a incremental readlines that yields, or

1090

# a check for local vs non local indexes,

1091

for l in fp.readlines():

1092

rec = l.split()

1093

if len(rec) < 5 or rec[-1] != ':':

1094

# corrupt line.

1095

# FIXME: in the future we should determine if its a

1096

# short write - and ignore it

1097

# or a different failure, and raise. RBC 20060407

1098

continue

1099

count += 1

1100

total += 1

1101

#pb.update('read knit index', count, total)

1102

# See self._parse_parents

1103

parents = []

1104

for value in rec[4:-1]:

1105

if '.' == value[0]:

1106

# uncompressed reference

1107

parents.append(value[1:])

1108

else:

1109

# this is 15/4000ms faster than isinstance,

1110

# (in lsprof)

1111

# this function is called thousands of times a

1112

# second so small variations add up.

1113

assert value.__class__ is str

1114

parents.append(self._history[int(value)])

1115

# end self._parse_parents

1116

# self._cache_version(rec[0],

1117

# rec[1].split(','),

1118

# int(rec[2]),

1119

# int(rec[3]),

1120

# parents)

1121

# --- self._cache_version

1122

# only want the _history index to reference the 1st

1123

# index entry for version_id

1124

version_id = rec[0]

1125

if version_id not in self._cache:

1126

index = len(self._history)

1127

self._history.append(version_id)

1128

else:

1129

index = self._cache[version_id][5]

1130

self._cache[version_id] = (version_id,

1131

rec[1].split(','),

1132

int(rec[2]),

1133

int(rec[3]),

1134

parents,

1135

index)

1136

# --- self._cache_version

1137

finally:

1138

fp.close()

1139

except NoSuchFile, e:

1140

if mode != 'w' or not create:

1141

raise

1142

if delay_create:

1143

self._need_to_create = True

1193

1144

else:

1194

parent_id = history[int(value)]

1195

parents.append(parent_id)

1196

1197

version_id, options, pos, size = rec[:4]

1198

version_id = version_id

1199

1200

# See self._cache_version

1201

# only want the _history index to reference the 1st

1202

# index entry for version_id

1203

if version_id not in cache:

1204

history_top += 1

1205

index = history_top

1206

history.append(version_id)

1145

self._transport.put_bytes_non_atomic(self._filename,

1146

self.HEADER, mode=self._file_mode)

1147

1148

finally:

1149

pb.update('read knit index', total, total)

1150

pb.finished()

1151

1152

def _parse_parents(self, compressed_parents):

1153

"""convert a list of string parent values into version ids.

1154

1155

ints are looked up in the index.

1156

.FOO values are ghosts and converted in to FOO.

1157

1158

NOTE: the function is retained here for clarity, and for possible

1159

use in partial index reads. However bulk processing now has

1160

it inlined in __init__ for inner-loop optimisation.

1161

"""

1162

result = []

1163

for value in compressed_parents:

1164

if value[-1] == '.':

1165

# uncompressed reference

1166

result.append(value[1:])

1207

1167

else:

1208

index = cache[version_id][5]

1209

cache[version_id] = (version_id,

1210

options.split(','),

1211

int(pos),

1212

int(size),

1213

parents,

1214

index)

1215

# end self._cache_version

1168

# this is 15/4000ms faster than isinstance,

1169

# this function is called thousands of times a

1170

# second so small variations add up.

1171

assert value.__class__ is str

1172

result.append(self._history[int(value)])

1173

return result

1216

1174

1217

1175

def get_graph(self):

1218

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1176

graph = []

1177

for version_id, index in self._cache.iteritems():

1178

graph.append((version_id, index[4]))

1179

return graph

1219

1180

1220

1181

def get_ancestry(self, versions):

1221

1182

"""See VersionedFile.get_ancestry."""

1222

1183

# get a graph of all the mentioned versions:

1223

1184

graph = {}

1224

1185

pending = set(versions)

1225

cache = self._cache

1226

while pending:

1186

while len(pending):

1227

1187

version = pending.pop()

1188

parents = self._cache[version][4]

1189

# got the parents ok

1228

1190

# trim ghosts

1229

try:

1230

parents = [p for p in cache[version][4] if p in cache]

1231

except KeyError:

1232

raise RevisionNotPresent(version, self._filename)

1233

# if not completed and not a ghost

1234

pending.update([p for p in parents if p not in graph])

1191

parents = [parent for parent in parents if parent in self._cache]

1192

for parent in parents:

1193

# if not completed and not a ghost

1194

if parent not in graph:

1195

pending.add(parent)

1235

1196

graph[version] = parents

1236

1197

return topo_sort(graph.items())

1237

1198

1238

1199

def get_ancestry_with_ghosts(self, versions):

1239

1200

"""See VersionedFile.get_ancestry_with_ghosts."""

1240

1201

# get a graph of all the mentioned versions:

1241

self.check_versions_present(versions)

1242

cache = self._cache

1243

1202

graph = {}

1244

1203

pending = set(versions)

1245

while pending:

1204

while len(pending):

1246

1205

version = pending.pop()

1247

1206

try:

1248

parents = cache[version][4]

1207

parents = self._cache[version][4]

1249

1208

except KeyError:

1250

1209

# ghost, fake it

1251

1210

graph[version] = []

1211

pass

1252

1212

else:

1253

# if not completed

1254

pending.update([p for p in parents if p not in graph])

1213

# got the parents ok

1214

for parent in parents:

1215

if parent not in graph:

1216

pending.add(parent)

1255

1217

graph[version] = parents

1256

1218

return topo_sort(graph.items())

1257

1219

1271

1233

return self._cache[version_id][5]

1272

1234

1273

1235

def _version_list_to_index(self, versions):

1236

encode_utf8 = cache_utf8.encode

1274

1237

result_list = []

1275

cache = self._cache

1276

1238

for version in versions:

1277

if version in cache:

1239

if version in self._cache:

1278

1240

# -- inlined lookup() --

1279

result_list.append(str(cache[version][5]))

1241

result_list.append(str(self._cache[version][5]))

1280

1242

# -- end lookup () --

1281

1243

else:

1282

result_list.append('.' + version)

1244

result_list.append('.' + encode_utf8(version))

1283

1245

return ' '.join(result_list)

1284

1246

1285

1247

def add_version(self, version_id, options, pos, size, parents):

1293

1255

(version_id, options, pos, size, parents).

1294

1256

"""

1295

1257

lines = []

1296

orig_history = self._history[:]

1297

orig_cache = self._cache.copy()

1298

1299

try:

1300

for version_id, options, pos, size, parents in versions:

1301

line = "\n%s %s %s %s %s :" % (version_id,

1302

','.join(options),

1303

pos,

1304

size,

1305

self._version_list_to_index(parents))

1306

assert isinstance(line, str), \

1307

'content must be utf-8 encoded: %r' % (line,)

1308

lines.append(line)

1309

self._cache_version(version_id, options, pos, size, parents)

1310

if not self._need_to_create:

1311

self._transport.append_bytes(self._filename, ''.join(lines))

1312

else:

1313

sio = StringIO()

1314

sio.write(self.HEADER)

1315

sio.writelines(lines)

1316

sio.seek(0)

1317

self._transport.put_file_non_atomic(self._filename, sio,

1318

create_parent_dir=self._create_parent_dir,

1319

mode=self._file_mode,

1320

dir_mode=self._dir_mode)

1321

self._need_to_create = False

1322

except:

1323

# If any problems happen, restore the original values and re-raise

1324

self._history = orig_history

1325

self._cache = orig_cache

1326

raise

1327

1258

encode_utf8 = cache_utf8.encode

1259

for version_id, options, pos, size, parents in versions:

1260

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1261

','.join(options),

1262

pos,

1263

size,

1264

self._version_list_to_index(parents))

1265

assert isinstance(line, str), \

1266

'content must be utf-8 encoded: %r' % (line,)

1267

lines.append(line)

1268

if not self._need_to_create:

1269

self._transport.append_bytes(self._filename, ''.join(lines))

1270

else:

1271

sio = StringIO()

1272

sio.write(self.HEADER)

1273

sio.writelines(lines)

1274

sio.seek(0)

1275

self._transport.put_file_non_atomic(self._filename, sio,

1276

create_parent_dir=self._create_parent_dir,

1277

mode=self._file_mode,

1278

dir_mode=self._dir_mode)

1279

self._need_to_create = False

1280

1281

# cache after writing, so that a failed write leads to missing cache

1282

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1283

# failure, reload the index or flush it or some such, to prevent

1284

# writing records that did complete twice.

1285

for version_id, options, pos, size, parents in versions:

1286

self._cache_version(version_id, options, pos, size, parents)

1287

1328

1288

def has_version(self, version_id):

1329

1289

"""True if the version is in the index."""

1330

return version_id in self._cache

1290

return (version_id in self._cache)

1331

1291

1332

1292

def get_position(self, version_id):

1333

1293

"""Return data position and size of specified version."""

1334

entry = self._cache[version_id]

1335

return entry[2], entry[3]

1294

return (self._cache[version_id][2], \

1295

self._cache[version_id][3])

1336

1296

1337

1297

def get_method(self, version_id):

1338

1298

"""Return compression method of specified version."""

1340

1300

if 'fulltext' in options:

1341

1301

return 'fulltext'

1342

1302

else:

1343

if 'line-delta' not in options:

1344

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1303

assert 'line-delta' in options

1345

1304

return 'line-delta'

1346

1305

1347

1306

def get_options(self, version_id):

1358

1317

1359

1318

def check_versions_present(self, version_ids):

1360

1319

"""Check that all specified versions are present."""

1361

cache = self._cache

1362

for version_id in version_ids:

1363

if version_id not in cache:

1364

raise RevisionNotPresent(version_id, self._filename)

1320

version_ids = set(version_ids)

1321

for version_id in list(version_ids):

1322

if version_id in self._cache:

1323

version_ids.remove(version_id)

1324

if version_ids:

1325

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1365

1326

1366

1327

1367

1328

class _KnitData(_KnitComponentFile):

1412

1373

sio = StringIO()

1413

1374

data_file = GzipFile(None, mode='wb', fileobj=sio)

1414

1375

1415

assert isinstance(version_id, str)

1376

version_id_utf8 = cache_utf8.encode(version_id)

1416

1377

data_file.writelines(chain(

1417

["version %s %d %s\n" % (version_id,

1378

["version %s %d %s\n" % (version_id_utf8,

1418

1379

len(lines),

1419

1380

digest)],

1420

1381

lines,

1421

["end %s\n" % version_id]))

1382

["end %s\n" % version_id_utf8]))

1422

1383

data_file.close()

1423

1384

length= sio.tell()

1424

1385

1466

1427

as (stream, header_record)

1467

1428

"""

1468

1429

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1469

try:

1470

rec = self._check_header(version_id, df.readline())

1471

except Exception, e:

1472

raise KnitCorrupt(self._filename,

1473

"While reading {%s} got %s(%s)"

1474

% (version_id, e.__class__.__name__, str(e)))

1430

rec = df.readline().split()

1431

if len(rec) != 4:

1432

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1433

if cache_utf8.decode(rec[1]) != version_id:

1434

raise KnitCorrupt(self._filename,

1435

'unexpected version, wanted %r, got %r' % (

1436

version_id, rec[1]))

1475

1437

return df, rec

1476

1438

1477

def _check_header(self, version_id, line):

1478

rec = line.split()

1479

if len(rec) != 4:

1480

raise KnitCorrupt(self._filename,

1481

'unexpected number of elements in record header')

1482

if rec[1] != version_id:

1483

raise KnitCorrupt(self._filename,

1484

'unexpected version, wanted %r, got %r'

1485

% (version_id, rec[1]))

1486

return rec

1487

1488

1439

def _parse_record(self, version_id, data):

1489

1440

# profiling notes:

1490

1441

# 4168 calls in 2880 217 internal

1491

1442

# 4168 calls to _parse_record_header in 2121

1492

1443

# 4168 calls to readlines in 330

1493

df = GzipFile(mode='rb', fileobj=StringIO(data))

1494

1495

try:

1496

record_contents = df.readlines()

1497

except Exception, e:

1498

raise KnitCorrupt(self._filename,

1499

"While reading {%s} got %s(%s)"

1500

% (version_id, e.__class__.__name__, str(e)))

1501

header = record_contents.pop(0)

1502

rec = self._check_header(version_id, header)

1503

1504

last_line = record_contents.pop()

1505

if len(record_contents) != int(rec[2]):

1506

raise KnitCorrupt(self._filename,

1507

'incorrect number of lines %s != %s'

1508

' for version {%s}'

1509

% (len(record_contents), int(rec[2]),

1510

version_id))

1511

if last_line != 'end %s\n' % rec[1]:

1512

raise KnitCorrupt(self._filename,

1513

'unexpected version end line %r, wanted %r'

1514

% (last_line, version_id))

1444

df, rec = self._parse_record_header(version_id, data)

1445

record_contents = df.readlines()

1446

l = record_contents.pop()

1447

assert len(record_contents) == int(rec[2])

1448

if l != 'end %s\n' % cache_utf8.encode(version_id):

1449

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1450

% (l, version_id))

1515

1451

df.close()

1516

1452

return record_contents, rec[3]

1517

1453

1535

1471

in records]

1536

1472

1537

1473

raw_records = self._transport.readv(self._filename, needed_offsets)

1474

1538

1475

1539

1476

for version_id, pos, size in records:

1540

1477

if version_id in self._cache:

1630

1567

if not version_ids:

1631

1568

return 0

1632

1569

1633

pb = ui.ui_factory.nested_progress_bar()

1570

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1634

1571

try:

1635

1572

version_ids = list(version_ids)

1636

1573

if None in version_ids:

1747

1684

if not version_ids:

1748

1685

return 0

1749

1686

1750

pb = ui.ui_factory.nested_progress_bar()

1687

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1751

1688

try:

1752

1689

version_ids = list(version_ids)

1753

1690

1947

1884

bestsize = bestsize + 1

1948

1885

1949

1886

return besti, bestj, bestsize

1887

Older »