/brz/remove-bazaar : revision 1908.3.21

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2006-10-06 02:04:17 UTC
mfrom: (1908.10.1 bench_usecases.merge2)
mto: This revision was merged to the branch mainline in revision 2068.
Revision ID: mbp@sourcefrog.net-20061006020417-4949ca86f4417a4d

merge additional fix from cfbolz

files added:
COPYING.txt

bzr.ico

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/ignores.py

bzrlib/inspect_for_copy.py

bzrlib/lazy_import.py

bzrlib/memorytree.py

bzrlib/mutabletree.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/revisiontree.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/response.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/treebuilder.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/xml6.py

doc/README.1st

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/using_aliases.txt

doc/version_info.txt

profile_imports.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
bzrlib/util/configobj/validate.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

notes

files renamed:
bzrlib/tests/test_annotate.py => bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/test_revprops.py => bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/test_basis_inventory.py => bzrlib/tests/workingtree_implementations/test_basis_inventory.py

tutorial.txt => doc/tutorial.txt

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/elementtree/ElementTree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/newinventory.py

generate_docs.py

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

import bzrlib.errors as errors

from bzrlib import (

cache_utf8,

errors,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import *

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.weave

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.tsort import topo_sort

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

100

# TODO: accommodate binaries, perhaps by storing a byte count

101

102

# TODO: function to check whole file

103

119

128

"""Generate line-based delta from this content to new_lines."""

120

129

new_texts = [text for origin, text in new_lines._lines]

121

130

old_texts = [text for origin, text in self._lines]

122

s = SequenceMatcher(None, old_texts, new_texts)

131

s = KnitSequenceMatcher(None, old_texts, new_texts)

123

132

for op in s.get_opcodes():

124

133

if op[0] == 'equal':

125

134

continue

132

141

def text(self):

133

142

return [text for origin, text in self._lines]

134

143

144

def copy(self):

145

return KnitContent(self._lines[:])

146

135

147

136

148

class _KnitFactory(object):

137

149

"""Base factory for creating content objects."""

154

166

internal representation is of the format:

155

167

(revid, plaintext)

156

168

"""

169

decode_utf8 = cache_utf8.decode

157

170

lines = []

158

171

for line in content:

159

172

origin, text = line.split(' ', 1)

160

lines.append((origin.decode('utf-8'), text))

173

lines.append((decode_utf8(origin), text))

161

174

return KnitContent(lines)

162

175

163

176

def parse_line_delta_iter(self, lines):

171

184

intstart intend intcount

172

185

1..count lines:

173

186

revid(utf8) newline\n

174

internal represnetation is

187

internal representation is

175

188

(start, end, count, [1..count tuples (revid, newline)])

176

189

"""

190

decode_utf8 = cache_utf8.decode

177

191

result = []

178

192

lines = iter(lines)

179

193

next = lines.next

185

199

while remaining:

186

200

origin, text = next().split(' ', 1)

187

201

remaining -= 1

188

contents.append((origin.decode('utf-8'), text))

202

contents.append((decode_utf8(origin), text))

189

203

result.append((start, end, count, contents))

190

204

return result

191

205

194

208

195

209

see parse_fulltext which this inverts.

196

210

"""

197

return ['%s %s' % (o.encode('utf-8'), t) for o, t in content._lines]

211

encode_utf8 = cache_utf8.encode

212

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

198

213

199

214

def lower_line_delta(self, delta):

200

215

"""convert a delta into a serializable form.

201

216

202

217

See parse_line_delta which this inverts.

203

218

"""

219

encode_utf8 = cache_utf8.encode

204

220

out = []

205

221

for start, end, c, lines in delta:

206

222

out.append('%d,%d,%d\n' % (start, end, c))

207

for origin, text in lines:

208

out.append('%s %s' % (origin.encode('utf-8'), text))

223

out.extend(encode_utf8(origin) + ' ' + text

224

for origin, text in lines)

209

225

return out

210

226

211

227

264

280

stored and retrieved.

265

281

"""

266

282

267

def __init__(self, relpath, transport, file_mode=None, access_mode=None, factory=None,

268

basis_knit=None, delta=True, create=False):

283

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

284

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

285

create=False, create_parent_dir=False, delay_create=False,

286

dir_mode=None):

269

287

"""Construct a knit at location specified by relpath.

270

288

271

289

:param create: If not True, only open an existing knit.

290

:param create_parent_dir: If True, create the parent directory if

291

creating the file fails. (This is used for stores with

292

hash-prefixes that may not exist yet)

293

:param delay_create: The calling code is aware that the knit won't

294

actually be created until the first data is stored.

272

295

"""

296

if deprecated_passed(basis_knit):

297

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

298

" deprecated as of bzr 0.9.",

299

DeprecationWarning, stacklevel=2)

273

300

if access_mode is None:

274

301

access_mode = 'w'

275

302

super(KnitVersionedFile, self).__init__(access_mode)

276

303

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

277

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

278

type(basis_knit)

279

280

304

self.transport = transport

281

305

self.filename = relpath

282

self.basis_knit = basis_knit

283

306

self.factory = factory or KnitAnnotateFactory()

284

307

self.writable = (access_mode == 'w')

285

308

self.delta = delta

286

309

287

310

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

288

access_mode, create=create)

311

access_mode, create=create, file_mode=file_mode,

312

create_parent_dir=create_parent_dir, delay_create=delay_create,

313

dir_mode=dir_mode)

289

314

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

290

access_mode, create=not len(self.versions()))

315

access_mode, create=create and not len(self), file_mode=file_mode,

316

create_parent_dir=create_parent_dir, delay_create=delay_create,

317

dir_mode=dir_mode)

291

318

319

def __repr__(self):

320

return '%s(%s)' % (self.__class__.__name__,

321

self.transport.abspath(self.filename))

322

292

323

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

293

324

"""See VersionedFile._add_delta()."""

294

325

self._check_add(version_id, []) # should we check the lines ?

351

382

where, size = self._data.add_record(version_id, digest, store_lines)

352

383

self._index.add_version(version_id, options, where, size, parents)

353

384

385

def _add_raw_records(self, records, data):

386

"""Add all the records 'records' with data pre-joined in 'data'.

387

388

:param records: A list of tuples(version_id, options, parents, size).

389

:param data: The data for the records. When it is written, the records

390

are adjusted to have pos pointing into data by the sum of

391

the preceding records sizes.

392

"""

393

# write all the data

394

pos = self._data.add_raw_record(data)

395

offset = 0

396

index_entries = []

397

for (version_id, options, parents, size) in records:

398

index_entries.append((version_id, options, pos+offset,

399

size, parents))

400

if self._data._do_cache:

401

self._data._cache[version_id] = data[offset:offset+size]

402

offset += size

403

self._index.add_versions(index_entries)

404

405

def enable_cache(self):

406

"""Start caching data for this knit"""

407

self._data.enable_cache()

408

354

409

def clear_cache(self):

355

410

"""Clear the data cache only."""

356

411

self._data.clear_cache()

359

414

"""See VersionedFile.copy_to()."""

360

415

# copy the current index to a temp index to avoid racing with local

361

416

# writes

362

transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename))

417

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

418

self.transport.get(self._index._filename))

363

419

# copy the data file

364

transport.put(name + DATA_SUFFIX, self._data._open_file())

365

# rename the copied index into place

366

transport.rename(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

420

f = self._data._open_file()

421

try:

422

transport.put_file(name + DATA_SUFFIX, f)

423

finally:

424

f.close()

425

# move the copied index into place

426

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

367

427

368

428

def create_empty(self, name, transport, mode=None):

369

return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)

429

return KnitVersionedFile(name, transport, factory=self.factory,

430

delta=self.delta, create=True)

370

431

371

432

def _fix_parents(self, version, new_parents):

372

433

"""Fix the parents list for version.

406

467

else:

407

468

old_texts = []

408

469

new_texts = new_content.text()

409

delta_seq = SequenceMatcher(None, old_texts, new_texts)

470

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

410

471

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

411

472

else:

412

473

delta = self.factory.parse_line_delta(data, version_idx)

417

478

graph_items = self._index.get_graph()

418

479

return dict(graph_items)

419

480

481

def get_sha1(self, version_id):

482

"""See VersionedFile.get_sha1()."""

483

record_map = self._get_record_map([version_id])

484

method, content, digest, next = record_map[version_id]

485

return digest

486

420

487

@staticmethod

421

488

def get_suffixes():

422

489

"""See VersionedFile.get_suffixes()."""

455

522

delta_seq = None

456

523

for parent_id in parents:

457

524

merge_content = self._get_content(parent_id, parent_texts)

458

seq = SequenceMatcher(None, merge_content.text(), content.text())

525

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

459

526

if delta_seq is None:

460

527

# setup a delta seq to reuse.

461

528

delta_seq = seq

472

539

reference_content = self._get_content(parents[0], parent_texts)

473

540

new_texts = content.text()

474

541

old_texts = reference_content.text()

475

delta_seq = SequenceMatcher(None, old_texts, new_texts)

542

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

476

543

return self._make_line_delta(delta_seq, content)

477

544

478

545

def _make_line_delta(self, delta_seq, new_content):

484

551

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

485

552

return diff_hunks

486

553

487

def _get_components(self, version_id):

488

"""Return a list of (version_id, method, data) tuples that

489

makes up version specified by version_id of the knit.

490

491

The components should be applied in the order of the returned

492

list.

493

494

The basis knit will be used to the largest extent possible

495

since it is assumed that accesses to it is faster.

554

def _get_components_positions(self, version_ids):

555

"""Produce a map of position data for the components of versions.

556

557

This data is intended to be used for retrieving the knit records.

558

559

A dict of version_id to (method, data_pos, data_size, next) is

560

returned.

561

method is the way referenced data should be applied.

562

data_pos is the position of the data in the knit.

563

data_size is the size of the data in the knit.

564

next is the build-parent of the version, or None for fulltexts.

496

565

"""

497

#profile notes:

498

# 4168 calls in 14912, 2289 internal

499

# 4168 in 9711 to read_records

500

# 52554 in 1250 to get_parents

501

# 170166 in 865 to list.append

502

503

# needed_revisions holds a list of (method, version_id) of

504

# versions that is needed to be fetched to construct the final

505

# version of the file.

506

507

# basis_revisions is a list of versions that needs to be

508

# fetched but exists in the basis knit.

509

510

basis = self.basis_knit

511

needed_versions = []

512

basis_versions = []

513

cursor = version_id

514

515

while 1:

516

picked_knit = self

517

if basis and basis._index.has_version(cursor):

518

picked_knit = basis

519

basis_versions.append(cursor)

520

method = picked_knit._index.get_method(cursor)

521

needed_versions.append((method, cursor))

522

if method == 'fulltext':

523

break

524

cursor = picked_knit.get_parents(cursor)[0]

525

526

components = {}

527

if basis_versions:

528

records = []

529

for comp_id in basis_versions:

530

data_pos, data_size = basis._index.get_data_position(comp_id)

531

records.append((piece_id, data_pos, data_size))

532

components.update(basis._data.read_records(records))

533

534

records = []

535

for comp_id in [vid for method, vid in needed_versions

536

if vid not in basis_versions]:

537

data_pos, data_size = self._index.get_position(comp_id)

538

records.append((comp_id, data_pos, data_size))

539

components.update(self._data.read_records(records))

540

541

# get_data_records returns a mapping with the version id as

542

# index and the value as data. The order the components need

543

# to be applied is held by needed_versions (reversed).

544

out = []

545

for method, comp_id in reversed(needed_versions):

546

out.append((comp_id, method, components[comp_id]))

547

548

return out

549

566

component_data = {}

567

for version_id in version_ids:

568

cursor = version_id

569

570

while cursor is not None and cursor not in component_data:

571

method = self._index.get_method(cursor)

572

if method == 'fulltext':

573

next = None

574

else:

575

next = self.get_parents(cursor)[0]

576

data_pos, data_size = self._index.get_position(cursor)

577

component_data[cursor] = (method, data_pos, data_size, next)

578

cursor = next

579

return component_data

580

550

581

def _get_content(self, version_id, parent_texts={}):

551

582

"""Returns a content object that makes up the specified

552

583

version."""

557

588

if cached_version is not None:

558

589

return cached_version

559

590

560

if self.basis_knit and version_id in self.basis_knit:

561

return self.basis_knit._get_content(version_id)

562

563

content = None

564

components = self._get_components(version_id)

565

for component_id, method, (data, digest) in components:

566

version_idx = self._index.lookup(component_id)

567

if method == 'fulltext':

568

assert content is None

569

content = self.factory.parse_fulltext(data, version_idx)

570

elif method == 'line-delta':

571

delta = self.factory.parse_line_delta(data, version_idx)

572

content._lines = self._apply_delta(content._lines, delta)

573

574

if 'no-eol' in self._index.get_options(version_id):

575

line = content._lines[-1][1].rstrip('\n')

576

content._lines[-1] = (content._lines[-1][0], line)

577

578

if sha_strings(content.text()) != digest:

579

import pdb;pdb.set_trace()

580

raise KnitCorrupt(self.filename, 'sha-1 does not match %s' % version_id)

581

582

return content

591

text_map, contents_map = self._get_content_maps([version_id])

592

return contents_map[version_id]

583

593

584

594

def _check_versions_present(self, version_ids):

585

595

"""Check that all specified versions are present."""

606

616

assert self.writable, "knit is not opened for write"

607

617

### FIXME escape. RBC 20060228

608

618

if contains_whitespace(version_id):

609

raise InvalidRevisionId(version_id)

619

raise InvalidRevisionId(version_id, self.filename)

610

620

if self.has_version(version_id):

611

621

raise RevisionAlreadyPresent(version_id, self.filename)

612

613

if False or __debug__:

614

for l in lines:

615

assert '\n' not in l[:-1]

622

self._check_lines_not_unicode(lines)

623

self._check_lines_are_lines(lines)

616

624

617

625

def _add(self, version_id, lines, parents, delta, parent_texts):

618

626

"""Add a set of lines on top of version specified by parents.

692

700

693

701

def _clone_text(self, new_version_id, old_version_id, parents):

694

702

"""See VersionedFile.clone_text()."""

695

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

703

# FIXME RBC 20060228 make fast by only inserting an index with null

704

# delta.

696

705

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

697

706

698

707

def get_lines(self, version_id):

699

708

"""See VersionedFile.get_lines()."""

700

return self._get_content(version_id).text()

701

702

def iter_lines_added_or_present_in_versions(self, version_ids=None):

709

return self.get_line_list([version_id])[0]

710

711

def _get_record_map(self, version_ids):

712

"""Produce a dictionary of knit records.

713

714

The keys are version_ids, the values are tuples of (method, content,

715

digest, next).

716

method is the way the content should be applied.

717

content is a KnitContent object.

718

digest is the SHA1 digest of this version id after all steps are done

719

next is the build-parent of the version, i.e. the leftmost ancestor.

720

If the method is fulltext, next will be None.

721

"""

722

position_map = self._get_components_positions(version_ids)

723

# c = component_id, m = method, p = position, s = size, n = next

724

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

725

record_map = {}

726

for component_id, content, digest in \

727

self._data.read_records_iter(records):

728

method, position, size, next = position_map[component_id]

729

record_map[component_id] = method, content, digest, next

730

731

return record_map

732

733

def get_text(self, version_id):

734

"""See VersionedFile.get_text"""

735

return self.get_texts([version_id])[0]

736

737

def get_texts(self, version_ids):

738

return [''.join(l) for l in self.get_line_list(version_ids)]

739

740

def get_line_list(self, version_ids):

741

"""Return the texts of listed versions as a list of strings."""

742

text_map, content_map = self._get_content_maps(version_ids)

743

return [text_map[v] for v in version_ids]

744

745

def _get_content_maps(self, version_ids):

746

"""Produce maps of text and KnitContents

747

748

:return: (text_map, content_map) where text_map contains the texts for

749

the requested versions and content_map contains the KnitContents.

750

Both dicts take version_ids as their keys.

751

"""

752

for version_id in version_ids:

753

if not self.has_version(version_id):

754

raise RevisionNotPresent(version_id, self.filename)

755

record_map = self._get_record_map(version_ids)

756

757

text_map = {}

758

content_map = {}

759

final_content = {}

760

for version_id in version_ids:

761

components = []

762

cursor = version_id

763

while cursor is not None:

764

method, data, digest, next = record_map[cursor]

765

components.append((cursor, method, data, digest))

766

if cursor in content_map:

767

break

768

cursor = next

769

770

content = None

771

for component_id, method, data, digest in reversed(components):

772

if component_id in content_map:

773

content = content_map[component_id]

774

else:

775

version_idx = self._index.lookup(component_id)

776

if method == 'fulltext':

777

assert content is None

778

content = self.factory.parse_fulltext(data, version_idx)

779

elif method == 'line-delta':

780

delta = self.factory.parse_line_delta(data[:],

781

version_idx)

782

content = content.copy()

783

content._lines = self._apply_delta(content._lines,

784

delta)

785

content_map[component_id] = content

786

787

if 'no-eol' in self._index.get_options(version_id):

788

content = content.copy()

789

line = content._lines[-1][1].rstrip('\n')

790

content._lines[-1] = (content._lines[-1][0], line)

791

final_content[version_id] = content

792

793

# digest here is the digest from the last applied component.

794

text = content.text()

795

if sha_strings(text) != digest:

796

raise KnitCorrupt(self.filename,

797

'sha-1 does not match %s' % version_id)

798

799

text_map[version_id] = text

800

return text_map, final_content

801

802

def iter_lines_added_or_present_in_versions(self, version_ids=None,

803

pb=None):

703

804

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

704

805

if version_ids is None:

705

806

version_ids = self.versions()

706

# we dont care about inclusions, the caller cares.

807

if pb is None:

808

pb = progress.DummyProgress()

809

# we don't care about inclusions, the caller cares.

707

810

# but we need to setup a list of records to visit.

708

811

# we need version_id, position, length

709

812

version_id_records = []

720

823

data_pos, length = self._index.get_position(version_id)

721

824

version_id_records.append((version_id, data_pos, length))

722

825

723

pb = bzrlib.ui.ui_factory.nested_progress_bar()

724

826

count = 0

725

827

total = len(version_id_records)

726

try:

828

pb.update('Walking content.', count, total)

829

for version_id, data, sha_value in \

830

self._data.read_records_iter(version_id_records):

727

831

pb.update('Walking content.', count, total)

728

for version_id, data, sha_value in \

729

self._data.read_records_iter(version_id_records):

730

pb.update('Walking content.', count, total)

731

method = self._index.get_method(version_id)

732

version_idx = self._index.lookup(version_id)

733

assert method in ('fulltext', 'line-delta')

734

if method == 'fulltext':

735

content = self.factory.parse_fulltext(data, version_idx)

736

for line in content.text():

832

method = self._index.get_method(version_id)

833

version_idx = self._index.lookup(version_id)

834

assert method in ('fulltext', 'line-delta')

835

if method == 'fulltext':

836

content = self.factory.parse_fulltext(data, version_idx)

837

for line in content.text():

838

yield line

839

else:

840

delta = self.factory.parse_line_delta(data, version_idx)

841

for start, end, count, lines in delta:

842

for origin, line in lines:

737

843

yield line

738

else:

739

delta = self.factory.parse_line_delta(data, version_idx)

740

for start, end, count, lines in delta:

741

for origin, line in lines:

742

yield line

743

count +=1

744

pb.update('Walking content.', total, total)

745

pb.finished()

746

except:

747

pb.update('Walking content.', total, total)

748

pb.finished()

749

raise

844

count +=1

845

pb.update('Walking content.', total, total)

750

846

751

847

def num_versions(self):

752

848

"""See VersionedFile.num_versions()."""

816

912

for lineno, insert_id, dset, line in w.walk(version_ids):

817

913

yield lineno, insert_id, dset, line

818

914

915

def plan_merge(self, ver_a, ver_b):

916

"""See VersionedFile.plan_merge."""

917

ancestors_b = set(self.get_ancestry(ver_b))

918

def status_a(revision, text):

919

if revision in ancestors_b:

920

return 'killed-b', text

921

else:

922

return 'new-a', text

923

924

ancestors_a = set(self.get_ancestry(ver_a))

925

def status_b(revision, text):

926

if revision in ancestors_a:

927

return 'killed-a', text

928

else:

929

return 'new-b', text

930

931

annotated_a = self.annotate(ver_a)

932

annotated_b = self.annotate(ver_b)

933

plain_a = [t for (a, t) in annotated_a]

934

plain_b = [t for (a, t) in annotated_b]

935

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

936

a_cur = 0

937

b_cur = 0

938

for ai, bi, l in blocks:

939

# process all mismatched sections

940

# (last mismatched section is handled because blocks always

941

# includes a 0-length last block)

942

for revision, text in annotated_a[a_cur:ai]:

943

yield status_a(revision, text)

944

for revision, text in annotated_b[b_cur:bi]:

945

yield status_b(revision, text)

946

947

# and now the matched section

948

a_cur = ai + l

949

b_cur = bi + l

950

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

951

assert text_a == text_b

952

yield "unchanged", text_a

953

819

954

820

955

class _KnitComponentFile(object):

821

956

"""One of the files used to implement a knit database"""

822

957

823

def __init__(self, transport, filename, mode):

958

def __init__(self, transport, filename, mode, file_mode=None,

959

create_parent_dir=False, dir_mode=None):

824

960

self._transport = transport

825

961

self._filename = filename

826

962

self._mode = mode

827

828

def write_header(self):

829

if self._transport.append(self._filename, StringIO(self.HEADER)):

830

raise KnitCorrupt(self._filename, 'misaligned after writing header')

963

self._file_mode = file_mode

964

self._dir_mode = dir_mode

965

self._create_parent_dir = create_parent_dir

966

self._need_to_create = False

831

967

832

968

def check_header(self, fp):

833

969

line = fp.readline()

866

1002

867

1003

The index file on disc contains a header, followed by one line per knit

868

1004

record. The same revision can be present in an index file more than once.

869

The first occurence gets assigned a sequence number starting from 0.

1005

The first occurrence gets assigned a sequence number starting from 0.

870

1006

871

1007

The format of a single line is

872

1008

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

888

1024

the end of the file, then the record that is missing it will be ignored by

889

1025

the parser.

890

1026

891

When writing new records to the index file, the data is preceeded by '\n'

1027

When writing new records to the index file, the data is preceded by '\n'

892

1028

to ensure that records always start on new lines even if the last write was

893

1029

interrupted. As a result its normal for the last line in the index to be

894

1030

missing a trailing newline. One can be added with no harmful effects.

895

1031

"""

896

1032

897

HEADER = "# bzr knit index 7\n"

1033

HEADER = "# bzr knit index 8\n"

898

1034

899

1035

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

900

1036

# __slots__ = ['_cache', '_history', '_transport', '_filename']

920

1056

parents,

921

1057

index)

922

1058

923

def __init__(self, transport, filename, mode, create=False):

924

_KnitComponentFile.__init__(self, transport, filename, mode)

1059

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1060

create_parent_dir=False, delay_create=False, dir_mode=None):

1061

_KnitComponentFile.__init__(self, transport, filename, mode,

1062

file_mode=file_mode,

1063

create_parent_dir=create_parent_dir,

1064

dir_mode=dir_mode)

925

1065

self._cache = {}

926

1066

# position in _history is the 'official' index for a revision

927

1067

# but the values may have come from a newer entry.

928

# so - wc -l of a knit index is != the number of uniqe names

929

# in the weave.

1068

# so - wc -l of a knit index is != the number of unique names

1069

# in the knit.

930

1070

self._history = []

931

1071

pb = bzrlib.ui.ui_factory.nested_progress_bar()

932

1072

try:

935

1075

try:

936

1076

pb.update('read knit index', count, total)

937

1077

fp = self._transport.get(self._filename)

938

self.check_header(fp)

939

# readlines reads the whole file at once:

940

# bad for transports like http, good for local disk

941

# we save 60 ms doing this one change (

942

# from calling readline each time to calling

943

# readlines once.

944

# probably what we want for nice behaviour on

945

# http is a incremental readlines that yields, or

946

# a check for local vs non local indexes,

947

for l in fp.readlines():

948

rec = l.split()

949

if len(rec) < 5 or rec[-1] != ':':

950

# corrupt line.

951

# FIXME: in the future we should determine if its a

952

# short write - and ignore it

953

# or a different failure, and raise. RBC 20060407

954

continue

955

count += 1

956

total += 1

957

#pb.update('read knit index', count, total)

958

# See self._parse_parents

959

parents = []

960

for value in rec[4:-1]:

961

if '.' == value[0]:

962

# uncompressed reference

963

parents.append(value[1:])

1078

try:

1079

self.check_header(fp)

1080

# readlines reads the whole file at once:

1081

# bad for transports like http, good for local disk

1082

# we save 60 ms doing this one change (

1083

# from calling readline each time to calling

1084

# readlines once.

1085

# probably what we want for nice behaviour on

1086

# http is a incremental readlines that yields, or

1087

# a check for local vs non local indexes,

1088

for l in fp.readlines():

1089

rec = l.split()

1090

if len(rec) < 5 or rec[-1] != ':':

1091

# corrupt line.

1092

# FIXME: in the future we should determine if its a

1093

# short write - and ignore it

1094

# or a different failure, and raise. RBC 20060407

1095

continue

1096

count += 1

1097

total += 1

1098

#pb.update('read knit index', count, total)

1099

# See self._parse_parents

1100

parents = []

1101

for value in rec[4:-1]:

1102

if '.' == value[0]:

1103

# uncompressed reference

1104

parents.append(value[1:])

1105

else:

1106

# this is 15/4000ms faster than isinstance,

1107

# (in lsprof)

1108

# this function is called thousands of times a

1109

# second so small variations add up.

1110

assert value.__class__ is str

1111

parents.append(self._history[int(value)])

1112

# end self._parse_parents

1113

# self._cache_version(rec[0],

1114

# rec[1].split(','),

1115

# int(rec[2]),

1116

# int(rec[3]),

1117

# parents)

1118

# --- self._cache_version

1119

# only want the _history index to reference the 1st

1120

# index entry for version_id

1121

version_id = rec[0]

1122

if version_id not in self._cache:

1123

index = len(self._history)

1124

self._history.append(version_id)

964

1125

else:

965

# this is 15/4000ms faster than isinstance,

966

# (in lsprof)

967

# this function is called thousands of times a

968

# second so small variations add up.

969

assert value.__class__ is str

970

parents.append(self._history[int(value)])

971

# end self._parse_parents

972

# self._cache_version(rec[0],

973

# rec[1].split(','),

974

# int(rec[2]),

975

# int(rec[3]),

976

# parents)

977

# --- self._cache_version

978

# only want the _history index to reference the 1st

979

# index entry for version_id

980

version_id = rec[0]

981

if version_id not in self._cache:

982

index = len(self._history)

983

self._history.append(version_id)

984

else:

985

index = self._cache[version_id][5]

986

self._cache[version_id] = (version_id,

987

rec[1].split(','),

988

int(rec[2]),

989

int(rec[3]),

990

parents,

991

index)

992

# --- self._cache_version

1126

index = self._cache[version_id][5]

1127

self._cache[version_id] = (version_id,

1128

rec[1].split(','),

1129

int(rec[2]),

1130

int(rec[3]),

1131

parents,

1132

index)

1133

# --- self._cache_version

1134

finally:

1135

fp.close()

993

1136

except NoSuchFile, e:

994

1137

if mode != 'w' or not create:

995

1138

raise

996

self.write_header()

1139

if delay_create:

1140

self._need_to_create = True

1141

else:

1142

self._transport.put_bytes_non_atomic(self._filename,

1143

self.HEADER, mode=self._file_mode)

1144

997

1145

finally:

998

1146

pb.update('read knit index', total, total)

999

1147

pb.finished()

1082

1230

return self._cache[version_id][5]

1083

1231

1084

1232

def _version_list_to_index(self, versions):

1233

encode_utf8 = cache_utf8.encode

1085

1234

result_list = []

1086

1235

for version in versions:

1087

1236

if version in self._cache:

1089

1238

result_list.append(str(self._cache[version][5]))

1090

1239

# -- end lookup () --

1091

1240

else:

1092

result_list.append('.' + version.encode('utf-8'))

1241

result_list.append('.' + encode_utf8(version))

1093

1242

return ' '.join(result_list)

1094

1243

1095

1244

def add_version(self, version_id, options, pos, size, parents):

1096

1245

"""Add a version record to the index."""

1097

self._cache_version(version_id, options, pos, size, parents)

1098

1099

content = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),

1100

','.join(options),

1101

pos,

1102

size,

1103

self._version_list_to_index(parents))

1104

assert isinstance(content, str), 'content must be utf-8 encoded'

1105

self._transport.append(self._filename, StringIO(content))

1106

1246

self.add_versions(((version_id, options, pos, size, parents),))

1247

1248

def add_versions(self, versions):

1249

"""Add multiple versions to the index.

1250

1251

:param versions: a list of tuples:

1252

(version_id, options, pos, size, parents).

1253

"""

1254

lines = []

1255

encode_utf8 = cache_utf8.encode

1256

for version_id, options, pos, size, parents in versions:

1257

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1258

','.join(options),

1259

pos,

1260

size,

1261

self._version_list_to_index(parents))

1262

assert isinstance(line, str), \

1263

'content must be utf-8 encoded: %r' % (line,)

1264

lines.append(line)

1265

if not self._need_to_create:

1266

self._transport.append_bytes(self._filename, ''.join(lines))

1267

else:

1268

sio = StringIO()

1269

sio.write(self.HEADER)

1270

sio.writelines(lines)

1271

sio.seek(0)

1272

self._transport.put_file_non_atomic(self._filename, sio,

1273

create_parent_dir=self._create_parent_dir,

1274

mode=self._file_mode,

1275

dir_mode=self._dir_mode)

1276

self._need_to_create = False

1277

1278

# cache after writing, so that a failed write leads to missing cache

1279

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1280

# failure, reload the index or flush it or some such, to prevent

1281

# writing records that did complete twice.

1282

for version_id, options, pos, size, parents in versions:

1283

self._cache_version(version_id, options, pos, size, parents)

1284

1107

1285

def has_version(self, version_id):

1108

1286

"""True if the version is in the index."""

1109

return self._cache.has_key(version_id)

1287

return (version_id in self._cache)

1110

1288

1111

1289

def get_position(self, version_id):

1112

1290

"""Return data position and size of specified version."""

1131

1309

if parent in self._cache]

1132

1310

1133

1311

def get_parents_with_ghosts(self, version_id):

1134

"""Return parents of specified version wth ghosts."""

1312

"""Return parents of specified version with ghosts."""

1135

1313

return self._cache[version_id][4]

1136

1314

1137

1315

def check_versions_present(self, version_ids):

1147

1325

class _KnitData(_KnitComponentFile):

1148

1326

"""Contents of the knit data file"""

1149

1327

1150

HEADER = "# bzr knit data 7\n"

1151

1152

def __init__(self, transport, filename, mode, create=False):

1153

_KnitComponentFile.__init__(self, transport, filename, mode)

1154

self._file = None

1328

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1329

create_parent_dir=False, delay_create=False,

1330

dir_mode=None):

1331

_KnitComponentFile.__init__(self, transport, filename, mode,

1332

file_mode=file_mode,

1333

create_parent_dir=create_parent_dir,

1334

dir_mode=dir_mode)

1155

1335

self._checked = False

1336

# TODO: jam 20060713 conceptually, this could spill to disk

1337

# if the cached size gets larger than a certain amount

1338

# but it complicates the model a bit, so for now just use

1339

# a simple dictionary

1340

self._cache = {}

1341

self._do_cache = False

1156

1342

if create:

1157

self._transport.put(self._filename, StringIO(''))

1158

self._records = {}

1343

if delay_create:

1344

self._need_to_create = create

1345

else:

1346

self._transport.put_bytes_non_atomic(self._filename, '',

1347

mode=self._file_mode)

1348

1349

def enable_cache(self):

1350

"""Enable caching of reads."""

1351

self._do_cache = True

1159

1352

1160

1353

def clear_cache(self):

1161

1354

"""Clear the record cache."""

1162

self._records = {}

1355

self._do_cache = False

1356

self._cache = {}

1163

1357

1164

1358

def _open_file(self):

1165

if self._file is None:

1166

try:

1167

self._file = self._transport.get(self._filename)

1168

except NoSuchFile:

1169

pass

1170

return self._file

1359

try:

1360

return self._transport.get(self._filename)

1361

except NoSuchFile:

1362

pass

1363

return None

1171

1364

1172

1365

def _record_to_data(self, version_id, digest, lines):

1173

1366

"""Convert version_id, digest, lines into a raw data block.

1176

1369

"""

1177

1370

sio = StringIO()

1178

1371

data_file = GzipFile(None, mode='wb', fileobj=sio)

1372

1373

version_id_utf8 = cache_utf8.encode(version_id)

1179

1374

data_file.writelines(chain(

1180

["version %s %d %s\n" % (version_id.encode('utf-8'),

1375

["version %s %d %s\n" % (version_id_utf8,

1181

1376

len(lines),

1182

1377

digest)],

1183

1378

lines,

1184

["end %s\n" % version_id.encode('utf-8')]))

1379

["end %s\n" % version_id_utf8]))

1185

1380

data_file.close()

1186

1381

length= sio.tell()

1187

1382

1189

1384

return length, sio

1190

1385

1191

1386

def add_raw_record(self, raw_data):

1192

"""Append a prepared record to the data file."""

1387

"""Append a prepared record to the data file.

1388

1389

:return: the offset in the data file raw_data was written.

1390

"""

1193

1391

assert isinstance(raw_data, str), 'data must be plain bytes'

1194

start_pos = self._transport.append(self._filename, StringIO(raw_data))

1195

return start_pos, len(raw_data)

1392

if not self._need_to_create:

1393

return self._transport.append_bytes(self._filename, raw_data)

1394

else:

1395

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1396

create_parent_dir=self._create_parent_dir,

1397

mode=self._file_mode,

1398

dir_mode=self._dir_mode)

1399

self._need_to_create = False

1400

return 0

1196

1401

1197

1402

def add_record(self, version_id, digest, lines):

1198

1403

"""Write new text record to disk. Returns the position in the

1199

1404

file where it was written."""

1200

1405

size, sio = self._record_to_data(version_id, digest, lines)

1201

# cache

1202

self._records[version_id] = (digest, lines)

1203

1406

# write to disk

1204

start_pos = self._transport.append(self._filename, sio)

1407

if not self._need_to_create:

1408

start_pos = self._transport.append_file(self._filename, sio)

1409

else:

1410

self._transport.put_file_non_atomic(self._filename, sio,

1411

create_parent_dir=self._create_parent_dir,

1412

mode=self._file_mode,

1413

dir_mode=self._dir_mode)

1414

self._need_to_create = False

1415

start_pos = 0

1416

if self._do_cache:

1417

self._cache[version_id] = sio.getvalue()

1205

1418

return start_pos, size

1206

1419

1207

1420

def _parse_record_header(self, version_id, raw_data):

1214

1427

rec = df.readline().split()

1215

1428

if len(rec) != 4:

1216

1429

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1217

if rec[1].decode('utf-8')!= version_id:

1430

if cache_utf8.decode(rec[1]) != version_id:

1218

1431

raise KnitCorrupt(self._filename,

1219

1432

'unexpected version, wanted %r, got %r' % (

1220

1433

version_id, rec[1]))

1229

1442

record_contents = df.readlines()

1230

1443

l = record_contents.pop()

1231

1444

assert len(record_contents) == int(rec[2])

1232

if l.decode('utf-8') != 'end %s\n' % version_id:

1445

if l != 'end %s\n' % cache_utf8.encode(version_id):

1233

1446

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1234

1447

% (l, version_id))

1235

1448

df.close()

1240

1453

1241

1454

This unpacks enough of the text record to validate the id is

1242

1455

as expected but thats all.

1243

1244

It will actively recompress currently cached records on the

1245

basis that that is cheaper than I/O activity.

1246

1456

"""

1247

needed_records = []

1248

for version_id, pos, size in records:

1249

if version_id not in self._records:

1250

needed_records.append((version_id, pos, size))

1251

1252

1457

# setup an iterator of the external records:

1253

1458

# uses readv so nice and fast we hope.

1254

if len(needed_records):

1459

if len(records):

1255

1460

# grab the disk data needed.

1256

raw_records = self._transport.readv(self._filename,

1257

[(pos, size) for version_id, pos, size in needed_records])

1461

if self._cache:

1462

# Don't check _cache if it is empty

1463

needed_offsets = [(pos, size) for version_id, pos, size

1464

in records

1465

if version_id not in self._cache]

1466

else:

1467

needed_offsets = [(pos, size) for version_id, pos, size

1468

in records]

1469

1470

raw_records = self._transport.readv(self._filename, needed_offsets)

1471

1258

1472

1259

1473

for version_id, pos, size in records:

1260

if version_id in self._records:

1261

# compress a new version

1262

size, sio = self._record_to_data(version_id,

1263

self._records[version_id][0],

1264

self._records[version_id][1])

1265

yield version_id, sio.getvalue()

1474

if version_id in self._cache:

1475

# This data has already been validated

1476

data = self._cache[version_id]

1266

1477

else:

1267

1478

pos, data = raw_records.next()

1479

if self._do_cache:

1480

self._cache[version_id] = data

1481

1268

1482

# validate the header

1269

1483

df, rec = self._parse_record_header(version_id, data)

1270

1484

df.close()

1271

yield version_id, data

1272

1485

yield version_id, data

1273

1486

1274

1487

def read_records_iter(self, records):

1275

1488

"""Read text records from data file and yield result.

1276

1489

1277

Each passed record is a tuple of (version_id, pos, len) and

1278

will be read in the given order. Yields (version_id,

1279

contents, digest).

1490

The result will be returned in whatever is the fastest to read.

1491

Not by the order requested. Also, multiple requests for the same

1492

record will only yield 1 response.

1493

:param records: A list of (version_id, pos, len) entries

1494

:return: Yields (version_id, contents, digest) in the order

1495

read, not the order requested

1280

1496

"""

1281

# profiling notes:

1282

# 60890 calls for 4168 extractions in 5045, 683 internal.

1283

# 4168 calls to readv in 1411

1284

# 4168 calls to parse_record in 2880

1285

1286

needed_records = []

1287

for version_id, pos, size in records:

1288

if version_id not in self._records:

1289

needed_records.append((version_id, pos, size))

1290

1291

if len(needed_records):

1292

# We take it that the transport optimizes the fetching as good

1293

# as possible (ie, reads continous ranges.)

1294

response = self._transport.readv(self._filename,

1295

[(pos, size) for version_id, pos, size in needed_records])

1296

1297

for (record_id, pos, size), (pos, data) in izip(iter(needed_records), response):

1298

content, digest = self._parse_record(record_id, data)

1299

self._records[record_id] = (digest, content)

1300

1301

for version_id, pos, size in records:

1302

yield version_id, list(self._records[version_id][1]), self._records[version_id][0]

1497

if not records:

1498

return

1499

1500

if self._cache:

1501

# Skip records we have alread seen

1502

yielded_records = set()

1503

needed_records = set()

1504

for record in records:

1505

if record[0] in self._cache:

1506

if record[0] in yielded_records:

1507

continue

1508

yielded_records.add(record[0])

1509

data = self._cache[record[0]]

1510

content, digest = self._parse_record(record[0], data)

1511

yield (record[0], content, digest)

1512

else:

1513

needed_records.add(record)

1514

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1515

else:

1516

needed_records = sorted(set(records), key=operator.itemgetter(1))

1517

1518

if not needed_records:

1519

return

1520

1521

# The transport optimizes the fetching as well

1522

# (ie, reads continuous ranges.)

1523

readv_response = self._transport.readv(self._filename,

1524

[(pos, size) for version_id, pos, size in needed_records])

1525

1526

for (version_id, pos, size), (pos, data) in \

1527

izip(iter(needed_records), readv_response):

1528

content, digest = self._parse_record(version_id, data)

1529

if self._do_cache:

1530

self._cache[version_id] = data

1531

yield version_id, content, digest

1303

1532

1304

1533

def read_records(self, records):

1305

1534

"""Read records into a dictionary."""

1306

1535

components = {}

1307

for record_id, content, digest in self.read_records_iter(records):

1536

for record_id, content, digest in \

1537

self.read_records_iter(records):

1308

1538

components[record_id] = (content, digest)

1309

1539

return components

1310

1540

1312

1542

class InterKnit(InterVersionedFile):

1313

1543

"""Optimised code paths for knit to knit operations."""

1314

1544

1315

_matching_file_factory = KnitVersionedFile

1545

_matching_file_from_factory = KnitVersionedFile

1546

_matching_file_to_factory = KnitVersionedFile

1316

1547

1317

1548

@staticmethod

1318

1549

def is_compatible(source, target):

1328

1559

assert isinstance(self.source, KnitVersionedFile)

1329

1560

assert isinstance(self.target, KnitVersionedFile)

1330

1561

1331

if version_ids is None:

1332

version_ids = self.source.versions()

1333

else:

1334

if not ignore_missing:

1335

self.source._check_versions_present(version_ids)

1336

else:

1337

version_ids = set(self.source.versions()).intersection(

1338

set(version_ids))

1562

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1339

1563

1340

1564

if not version_ids:

1341

1565

return 0

1372

1596

needed_versions.update(new_parents.difference(this_versions))

1373

1597

mismatched_versions.add(version)

1374

1598

1375

if not needed_versions and not cross_check_versions:

1599

if not needed_versions and not mismatched_versions:

1376

1600

return 0

1377

1601

full_list = topo_sort(self.source.get_graph())

1378

1602

1391

1615

# if source has the parent, we must :

1392

1616

# * already have it or

1393

1617

# * have it scheduled already

1394

# otherwise we dont care

1618

# otherwise we don't care

1395

1619

assert (self.target.has_version(parent) or

1396

1620

parent in copy_set or

1397

1621

not self.source.has_version(parent))

1403

1627

# data suck the join:

1404

1628

count = 0

1405

1629

total = len(version_list)

1406

# we want the raw gzip for bulk copying, but the record validated

1407

# just enough to be sure its the right one.

1408

# TODO: consider writev or write combining to reduce

1409

# death of a thousand cuts feeling.

1630

raw_datum = []

1631

raw_records = []

1410

1632

for (version_id, raw_data), \

1411

1633

(version_id2, options, parents) in \

1412

1634

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1414

1636

assert version_id == version_id2, 'logic error, inconsistent results'

1415

1637

count = count + 1

1416

1638

pb.update("Joining knit", count, total)

1417

pos, size = self.target._data.add_raw_record(raw_data)

1418

self.target._index.add_version(version_id, options, pos, size, parents)

1639

raw_records.append((version_id, options, parents, len(raw_data)))

1640

raw_datum.append(raw_data)

1641

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1419

1642

1420

1643

for version in mismatched_versions:

1421

1644

# FIXME RBC 20060309 is this needed?

1433

1656

InterVersionedFile.register_optimiser(InterKnit)

1434

1657

1435

1658

1436

class SequenceMatcher(difflib.SequenceMatcher):

1659

class WeaveToKnit(InterVersionedFile):

1660

"""Optimised code paths for weave to knit operations."""

1661

1662

_matching_file_from_factory = bzrlib.weave.WeaveFile

1663

_matching_file_to_factory = KnitVersionedFile

1664

1665

@staticmethod

1666

def is_compatible(source, target):

1667

"""Be compatible with weaves to knits."""

1668

try:

1669

return (isinstance(source, bzrlib.weave.Weave) and

1670

isinstance(target, KnitVersionedFile))

1671

except AttributeError:

1672

return False

1673

1674

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1675

"""See InterVersionedFile.join."""

1676

assert isinstance(self.source, bzrlib.weave.Weave)

1677

assert isinstance(self.target, KnitVersionedFile)

1678

1679

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1680

1681

if not version_ids:

1682

return 0

1683

1684

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1685

try:

1686

version_ids = list(version_ids)

1687

1688

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1689

this_versions = set(self.target._index.get_versions())

1690

needed_versions = self.source_ancestry - this_versions

1691

cross_check_versions = self.source_ancestry.intersection(this_versions)

1692

mismatched_versions = set()

1693

for version in cross_check_versions:

1694

# scan to include needed parents.

1695

n1 = set(self.target.get_parents_with_ghosts(version))

1696

n2 = set(self.source.get_parents(version))

1697

# if all of n2's parents are in n1, then its fine.

1698

if n2.difference(n1):

1699

# FIXME TEST this check for cycles being introduced works

1700

# the logic is we have a cycle if in our graph we are an

1701

# ancestor of any of the n2 revisions.

1702

for parent in n2:

1703

if parent in n1:

1704

# safe

1705

continue

1706

else:

1707

parent_ancestors = self.source.get_ancestry(parent)

1708

if version in parent_ancestors:

1709

raise errors.GraphCycleError([parent, version])

1710

# ensure this parent will be available later.

1711

new_parents = n2.difference(n1)

1712

needed_versions.update(new_parents.difference(this_versions))

1713

mismatched_versions.add(version)

1714

1715

if not needed_versions and not mismatched_versions:

1716

return 0

1717

full_list = topo_sort(self.source.get_graph())

1718

1719

version_list = [i for i in full_list if (not self.target.has_version(i)

1720

and i in needed_versions)]

1721

1722

# do the join:

1723

count = 0

1724

total = len(version_list)

1725

for version_id in version_list:

1726

pb.update("Converting to knit", count, total)

1727

parents = self.source.get_parents(version_id)

1728

# check that its will be a consistent copy:

1729

for parent in parents:

1730

# if source has the parent, we must already have it

1731

assert (self.target.has_version(parent))

1732

self.target.add_lines(

1733

version_id, parents, self.source.get_lines(version_id))

1734

count = count + 1

1735

1736

for version in mismatched_versions:

1737

# FIXME RBC 20060309 is this needed?

1738

n1 = set(self.target.get_parents_with_ghosts(version))

1739

n2 = set(self.source.get_parents(version))

1740

# write a combined record to our history preserving the current

1741

# parents as first in the list

1742

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1743

self.target.fix_parents(version, new_parents)

1744

return count

1745

finally:

1746

pb.finished()

1747

1748

1749

InterVersionedFile.register_optimiser(WeaveToKnit)

1750

1751

1752

class KnitSequenceMatcher(difflib.SequenceMatcher):

1437

1753

"""Knit tuned sequence matcher.

1438

1754

1439

1755

This is based on profiling of difflib which indicated some improvements

1510

1826

j2lenget = j2len.get

1511

1827

newj2len = {}

1512

1828

1513

# changing b2j.get(a[i], nothing) to a try:Keyerror pair produced the

1829

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1514

1830

# following improvement

1515

1831

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1516

1832

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

Older »