/brz/remove-bazaar : revision 1594.2.7

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2006-03-08 07:03:47 UTC
mto: (1596.2.3 integration) (1594.3.1 versioned-file-performance)
mto: This revision was merged to the branch mainline in revision 1601.
Revision ID: robertc@robertcollins.net-20060308070347-df89b46b3b5cf86a

Add versionedfile.fix_parents api for correcting data post hoc.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py.moved

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

generate_docs.py

notes

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

import difflib

from difflib import SequenceMatcher

from gzip import GzipFile

import os

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.tsort import topo_sort

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

# TODO: function to check whole file

# TODO: atomically append data, then measure backwards from the cursor

# position after writing to work out where it was located. we may need to

# bypass python file buffering.

DATA_SUFFIX = '.knit'

INDEX_SUFFIX = '.kndx'

100

class KnitContent(object):

101

"""Content of a knit version to which deltas can be applied."""

102

103

def __init__(self, lines):

104

self._lines = lines

105

106

def annotate_iter(self):

107

"""Yield tuples of (origin, text) for each content line."""

108

for origin, text in self._lines:

109

yield origin, text

110

111

def annotate(self):

112

"""Return a list of (origin, text) tuples."""

113

return list(self.annotate_iter())

114

115

def apply_delta(self, delta):

116

"""Apply delta to this content."""

117

offset = 0

118

for start, end, count, lines in delta:

119

self._lines[offset+start:offset+end] = lines

120

offset = offset + (start - end) + count

121

122

def line_delta_iter(self, new_lines):

123

"""Generate line-based delta from new_lines to this content."""

124

new_texts = [text for origin, text in new_lines._lines]

125

old_texts = [text for origin, text in self._lines]

126

s = difflib.SequenceMatcher(None, old_texts, new_texts)

127

for op in s.get_opcodes():

128

if op[0] == 'equal':

129

continue

130

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

131

132

def line_delta(self, new_lines):

133

return list(self.line_delta_iter(new_lines))

134

135

def text(self):

136

return [text for origin, text in self._lines]

137

138

139

class _KnitFactory(object):

140

"""Base factory for creating content objects."""

141

142

def make(self, lines, version):

143

num_lines = len(lines)

144

return KnitContent(zip([version] * num_lines, lines))

145

146

147

class KnitAnnotateFactory(_KnitFactory):

148

"""Factory for creating annotated Content objects."""

149

150

annotated = True

151

152

def parse_fulltext(self, content, version):

153

lines = []

154

for line in content:

155

origin, text = line.split(' ', 1)

156

lines.append((int(origin), text))

157

return KnitContent(lines)

158

159

def parse_line_delta_iter(self, lines):

160

while lines:

161

header = lines.pop(0)

162

start, end, c = [int(n) for n in header.split(',')]

163

contents = []

164

for i in range(c):

165

origin, text = lines.pop(0).split(' ', 1)

166

contents.append((int(origin), text))

167

yield start, end, c, contents

168

169

def parse_line_delta(self, lines, version):

170

return list(self.parse_line_delta_iter(lines))

171

172

def lower_fulltext(self, content):

173

return ['%d %s' % (o, t) for o, t in content._lines]

174

175

def lower_line_delta(self, delta):

176

out = []

177

for start, end, c, lines in delta:

178

out.append('%d,%d,%d\n' % (start, end, c))

179

for origin, text in lines:

180

out.append('%d %s' % (origin, text))

181

return out

182

183

184

class KnitPlainFactory(_KnitFactory):

185

"""Factory for creating plain Content objects."""

186

187

annotated = False

188

189

def parse_fulltext(self, content, version):

190

return self.make(content, version)

191

192

def parse_line_delta_iter(self, lines, version):

193

while lines:

194

header = lines.pop(0)

195

start, end, c = [int(n) for n in header.split(',')]

196

yield start, end, c, zip([version] * c, lines[:c])

197

del lines[:c]

198

199

def parse_line_delta(self, lines, version):

200

return list(self.parse_line_delta_iter(lines, version))

201

202

def lower_fulltext(self, content):

203

return content.text()

204

205

def lower_line_delta(self, delta):

206

out = []

207

for start, end, c, lines in delta:

208

out.append('%d,%d,%d\n' % (start, end, c))

209

out.extend([text for origin, text in lines])

210

return out

211

212

213

def make_empty_knit(transport, relpath):

214

"""Construct a empty knit at the specified location."""

215

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

216

k._data._open_file()

217

218

219

class KnitVersionedFile(VersionedFile):

220

"""Weave-like structure with faster random access.

221

222

A knit stores a number of texts and a summary of the relationships

223

between them. Texts are identified by a string version-id. Texts

224

are normally stored and retrieved as a series of lines, but can

225

also be passed as single strings.

226

227

Lines are stored with the trailing newline (if any) included, to

228

avoid special cases for files with no final newline. Lines are

229

composed of 8-bit characters, not unicode. The combination of

230

these approaches should mean any 'binary' file can be safely

231

stored and retrieved.

232

"""

233

234

def __init__(self, relpath, transport, file_mode=None, access_mode=None, factory=None,

235

basis_knit=None, delta=True, create=False):

236

"""Construct a knit at location specified by relpath.

237

238

:param create: If not True, only open an existing knit.

239

"""

240

if access_mode is None:

241

access_mode = 'w'

242

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

243

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

244

type(basis_knit)

245

246

self.transport = transport

247

self.filename = relpath

248

self.basis_knit = basis_knit

249

self.factory = factory or KnitAnnotateFactory()

250

self.writable = (access_mode == 'w')

251

self.delta = delta

252

253

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

254

access_mode, create=create)

255

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

256

access_mode, create=not len(self.versions()))

257

258

def copy_to(self, name, transport):

259

"""See VersionedFile.copy_to()."""

260

# copy the current index to a temp index to avoid racing with local

261

# writes

262

transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename))

263

# copy the data file

264

transport.put(name + DATA_SUFFIX, self._data._open_file())

265

# rename the copied index into place

266

transport.rename(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

267

268

def create_empty(self, name, transport, mode=None):

269

return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)

270

271

def fix_parents(self, version, new_parents):

272

"""Fix the parents list for version.

273

274

This is done by appending a new version to the index

275

with identical data except for the parents list.

276

the parents list must be a superset of the current

277

list.

278

"""

279

current_values = self._index._cache[version]

280

assert set(current_values[4]).difference(set(new_parents)) == set()

281

self._index.add_version(version,

282

current_values[1],

283

current_values[2],

284

current_values[3],

285

new_parents)

286

287

@staticmethod

288

def get_suffixes():

289

"""See VersionedFile.get_suffixes()."""

290

return [DATA_SUFFIX, INDEX_SUFFIX]

291

292

def versions(self):

293

"""See VersionedFile.versions."""

294

return self._index.get_versions()

295

296

def has_version(self, version_id):

297

"""See VersionedFile.has_version."""

298

return self._index.has_version(version_id)

299

300

__contains__ = has_version

301

302

def _merge_annotations(self, content, parents):

303

"""Merge annotations for content. This is done by comparing

304

the annotations based on changed to the text."""

305

for parent_id in parents:

306

merge_content = self._get_content(parent_id)

307

seq = SequenceMatcher(None, merge_content.text(), content.text())

308

for i, j, n in seq.get_matching_blocks():

309

if n == 0:

310

continue

311

content._lines[j:j+n] = merge_content._lines[i:i+n]

312

313

def _get_components(self, version_id):

314

"""Return a list of (version_id, method, data) tuples that

315

makes up version specified by version_id of the knit.

316

317

The components should be applied in the order of the returned

318

list.

319

320

The basis knit will be used to the largest extent possible

321

since it is assumed that accesses to it is faster.

322

"""

323

# needed_revisions holds a list of (method, version_id) of

324

# versions that is needed to be fetched to construct the final

325

# version of the file.

326

327

# basis_revisions is a list of versions that needs to be

328

# fetched but exists in the basis knit.

329

330

basis = self.basis_knit

331

needed_versions = []

332

basis_versions = []

333

cursor = version_id

334

335

while 1:

336

picked_knit = self

337

if basis and basis._index.has_version(cursor):

338

picked_knit = basis

339

basis_versions.append(cursor)

340

method = picked_knit._index.get_method(cursor)

341

needed_versions.append((method, cursor))

342

if method == 'fulltext':

343

break

344

cursor = picked_knit.get_parents(cursor)[0]

345

346

components = {}

347

if basis_versions:

348

records = []

349

for comp_id in basis_versions:

350

data_pos, data_size = basis._index.get_data_position(comp_id)

351

records.append((piece_id, data_pos, data_size))

352

components.update(basis._data.read_records(records))

353

354

records = []

355

for comp_id in [vid for method, vid in needed_versions

356

if vid not in basis_versions]:

357

data_pos, data_size = self._index.get_position(comp_id)

358

records.append((comp_id, data_pos, data_size))

359

components.update(self._data.read_records(records))

360

361

# get_data_records returns a mapping with the version id as

362

# index and the value as data. The order the components need

363

# to be applied is held by needed_versions (reversed).

364

out = []

365

for method, comp_id in reversed(needed_versions):

366

out.append((comp_id, method, components[comp_id]))

367

368

return out

369

370

def _get_content(self, version_id):

371

"""Returns a content object that makes up the specified

372

version."""

373

if not self.has_version(version_id):

374

raise RevisionNotPresent(version_id, self.filename)

375

376

if self.basis_knit and version_id in self.basis_knit:

377

return self.basis_knit._get_content(version_id)

378

379

content = None

380

components = self._get_components(version_id)

381

for component_id, method, (data, digest) in components:

382

version_idx = self._index.lookup(component_id)

383

if method == 'fulltext':

384

assert content is None

385

content = self.factory.parse_fulltext(data, version_idx)

386

elif method == 'line-delta':

387

delta = self.factory.parse_line_delta(data, version_idx)

388

content.apply_delta(delta)

389

390

if 'no-eol' in self._index.get_options(version_id):

391

line = content._lines[-1][1].rstrip('\n')

392

content._lines[-1] = (content._lines[-1][0], line)

393

394

if sha_strings(content.text()) != digest:

395

raise KnitCorrupt(self.filename, 'sha-1 does not match')

396

397

return content

398

399

def _check_versions_present(self, version_ids):

400

"""Check that all specified versions are present."""

401

version_ids = set(version_ids)

402

for r in list(version_ids):

403

if self._index.has_version(r):

404

version_ids.remove(r)

405

if version_ids:

406

raise RevisionNotPresent(list(version_ids)[0], self.filename)

407

408

def add_lines(self, version_id, parents, lines):

409

"""See VersionedFile.add_lines."""

410

assert self.writable, "knit is not opened for write"

411

### FIXME escape. RBC 20060228

412

if contains_whitespace(version_id):

413

raise InvalidRevisionId(version_id)

414

if self.has_version(version_id):

415

raise RevisionAlreadyPresent(version_id, self.filename)

416

417

if False or __debug__:

418

for l in lines:

419

assert '\n' not in l[:-1]

420

421

self._check_versions_present(parents)

422

return self._add(version_id, lines[:], parents, self.delta)

423

424

def _add(self, version_id, lines, parents, delta):

425

"""Add a set of lines on top of version specified by parents.

426

427

If delta is true, compress the text as a line-delta against

428

the first parent.

429

"""

430

if delta and not parents:

431

delta = False

432

433

digest = sha_strings(lines)

434

options = []

435

if lines:

436

if lines[-1][-1] != '\n':

437

options.append('no-eol')

438

lines[-1] = lines[-1] + '\n'

439

440

lines = self.factory.make(lines, len(self._index))

441

if self.factory.annotated and len(parents) > 0:

442

# Merge annotations from parent texts if so is needed.

443

self._merge_annotations(lines, parents)

444

445

if parents and delta:

446

# To speed the extract of texts the delta chain is limited

447

# to a fixed number of deltas. This should minimize both

448

# I/O and the time spend applying deltas.

449

count = 0

450

delta_parents = parents

451

while count < 25:

452

parent = delta_parents[0]

453

method = self._index.get_method(parent)

454

if method == 'fulltext':

455

break

456

delta_parents = self._index.get_parents(parent)

457

count = count + 1

458

if method == 'line-delta':

459

delta = False

460

461

if delta:

462

options.append('line-delta')

463

content = self._get_content(parents[0])

464

delta_hunks = content.line_delta(lines)

465

store_lines = self.factory.lower_line_delta(delta_hunks)

466

else:

467

options.append('fulltext')

468

store_lines = self.factory.lower_fulltext(lines)

469

470

where, size = self._data.add_record(version_id, digest, store_lines)

471

self._index.add_version(version_id, options, where, size, parents)

472

473

def check(self, progress_bar=None):

474

"""See VersionedFile.check()."""

475

476

def clone_text(self, new_version_id, old_version_id, parents):

477

"""See VersionedFile.clone_text()."""

478

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

479

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

480

481

def get_lines(self, version_id):

482

"""See VersionedFile.get_lines()."""

483

return self._get_content(version_id).text()

484

485

def iter_lines_added_or_present_in_versions(self, version_ids=None):

486

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

487

if version_ids is None:

488

version_ids = self.versions()

489

# we dont care about inclusions, the caller cares.

490

# but we need to setup a list of records to visit.

491

# we need version_id, position, length

492

version_id_records = []

493

for version_id in version_ids:

494

if not self.has_version(version_id):

495

raise RevisionNotPresent(version_id, self.filename)

496

data_pos, length = self._index.get_position(version_id)

497

version_id_records.append((version_id, data_pos, length))

498

for version_id, data, sha_value in \

499

self._data.read_records_iter(version_id_records):

500

method = self._index.get_method(version_id)

501

version_idx = self._index.lookup(version_id)

502

assert method in ('fulltext', 'line-delta')

503

if method == 'fulltext':

504

content = self.factory.parse_fulltext(data, version_idx)

505

for line in content.text():

506

yield line

507

else:

508

delta = self.factory.parse_line_delta(data, version_idx)

509

for start, end, count, lines in delta:

510

for origin, line in lines:

511

yield line

512

513

def num_versions(self):

514

"""See VersionedFile.num_versions()."""

515

return self._index.num_versions()

516

517

__len__ = num_versions

518

519

def annotate_iter(self, version_id):

520

"""See VersionedFile.annotate_iter."""

521

content = self._get_content(version_id)

522

for origin, text in content.annotate_iter():

523

yield self._index.idx_to_name(origin), text

524

525

def get_parents(self, version_id):

526

"""See VersionedFile.get_parents."""

527

self._check_versions_present([version_id])

528

return list(self._index.get_parents(version_id))

529

530

def get_ancestry(self, versions):

531

"""See VersionedFile.get_ancestry."""

532

if isinstance(versions, basestring):

533

versions = [versions]

534

if not versions:

535

return []

536

self._check_versions_present(versions)

537

return self._index.get_ancestry(versions)

538

539

def _reannotate_line_delta(self, other, lines, new_version_id,

540

new_version_idx):

541

"""Re-annotate line-delta and return new delta."""

542

new_delta = []

543

for start, end, count, contents \

544

in self.factory.parse_line_delta_iter(lines):

545

new_lines = []

546

for origin, line in contents:

547

old_version_id = other._index.idx_to_name(origin)

548

if old_version_id == new_version_id:

549

idx = new_version_idx

550

else:

551

idx = self._index.lookup(old_version_id)

552

new_lines.append((idx, line))

553

new_delta.append((start, end, count, new_lines))

554

555

return self.factory.lower_line_delta(new_delta)

556

557

def _reannotate_fulltext(self, other, lines, new_version_id,

558

new_version_idx):

559

"""Re-annotate fulltext and return new version."""

560

content = self.factory.parse_fulltext(lines, new_version_idx)

561

new_lines = []

562

for origin, line in content.annotate_iter():

563

old_version_id = other._index.idx_to_name(origin)

564

if old_version_id == new_version_id:

565

idx = new_version_idx

566

else:

567

idx = self._index.lookup(old_version_id)

568

new_lines.append((idx, line))

569

570

return self.factory.lower_fulltext(KnitContent(new_lines))

571

572

#@deprecated_method(zero_eight)

573

def walk(self, version_ids):

574

"""See VersionedFile.walk."""

575

# We take the short path here, and extract all relevant texts

576

# and put them in a weave and let that do all the work. Far

577

# from optimal, but is much simpler.

578

# FIXME RB 20060228 this really is inefficient!

579

from bzrlib.weave import Weave

580

581

w = Weave(self.filename)

582

ancestry = self.get_ancestry(version_ids)

583

sorted_graph = topo_sort(self._index.get_graph())

584

version_list = [vid for vid in sorted_graph if vid in ancestry]

585

586

for version_id in version_list:

587

lines = self.get_lines(version_id)

588

w.add_lines(version_id, self.get_parents(version_id), lines)

589

590

for lineno, insert_id, dset, line in w.walk(version_ids):

591

yield lineno, insert_id, dset, line

592

593

594

class _KnitComponentFile(object):

595

"""One of the files used to implement a knit database"""

596

597

def __init__(self, transport, filename, mode):

598

self._transport = transport

599

self._filename = filename

600

self._mode = mode

601

602

def write_header(self):

603

old_len = self._transport.append(self._filename, StringIO(self.HEADER))

604

if old_len != 0:

605

raise KnitCorrupt(self._filename, 'misaligned after writing header')

606

607

def check_header(self, fp):

608

line = fp.read(len(self.HEADER))

609

if line != self.HEADER:

610

raise KnitHeaderError(badline=line)

611

612

def commit(self):

613

"""Commit is a nop."""

614

615

def __repr__(self):

616

return '%s(%s)' % (self.__class__.__name__, self._filename)

617

618

619

class _KnitIndex(_KnitComponentFile):

620

"""Manages knit index file.

621

622

The index is already kept in memory and read on startup, to enable

623

fast lookups of revision information. The cursor of the index

624

file is always pointing to the end, making it easy to append

625

entries.

626

627

_cache is a cache for fast mapping from version id to a Index

628

object.

629

630

_history is a cache for fast mapping from indexes to version ids.

631

632

The index data format is dictionary compressed when it comes to

633

parent references; a index entry may only have parents that with a

634

lover index number. As a result, the index is topological sorted.

635

636

Duplicate entries may be written to the index for a single version id

637

if this is done then the latter one completely replaces the former:

638

this allows updates to correct version and parent information.

639

Note that the two entries may share the delta, and that successive

640

annotations and references MUST point to the first entry.

641

"""

642

643

HEADER = "# bzr knit index 7\n"

644

645

def _cache_version(self, version_id, options, pos, size, parents):

646

val = (version_id, options, pos, size, parents)

647

self._cache[version_id] = val

648

if not version_id in self._history:

649

self._history.append(version_id)

650

651

def _iter_index(self, fp):

652

lines = fp.read()

653

for l in lines.splitlines(False):

654

yield l.split()

655

656

def __init__(self, transport, filename, mode, create=False):

657

_KnitComponentFile.__init__(self, transport, filename, mode)

658

self._cache = {}

659

# position in _history is the 'official' index for a revision

660

# but the values may have come from a newer entry.

661

# so - wc -l of a knit index is != the number of uniqe names

662

# in the weave.

663

self._history = []

664

try:

665

fp = self._transport.get(self._filename)

666

self.check_header(fp)

667

for rec in self._iter_index(fp):

668

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

669

[self._history[int(i)] for i in rec[4:]])

670

except NoSuchFile, e:

671

if mode != 'w' or not create:

672

raise

673

self.write_header()

674

675

def get_graph(self):

676

graph = []

677

for version_id, index in self._cache.iteritems():

678

graph.append((version_id, index[4]))

679

return graph

680

681

def get_ancestry(self, versions):

682

"""See VersionedFile.get_ancestry."""

683

# get a graph of all the mentioned versions:

684

graph = {}

685

pending = set(versions)

686

while len(pending):

687

version = pending.pop()

688

parents = self._cache[version][4]

689

for parent in parents:

690

if parent not in graph:

691

pending.add(parent)

692

graph[version] = parents

693

return topo_sort(graph.items())

694

695

def num_versions(self):

696

return len(self._history)

697

698

__len__ = num_versions

699

700

def get_versions(self):

701

return self._history

702

703

def idx_to_name(self, idx):

704

return self._history[idx]

705

706

def lookup(self, version_id):

707

assert version_id in self._cache

708

return self._history.index(version_id)

709

710

def add_version(self, version_id, options, pos, size, parents):

711

"""Add a version record to the index."""

712

self._cache_version(version_id, options, pos, size, parents)

713

714

content = "%s %s %s %s %s\n" % (version_id,

715

','.join(options),

716

pos,

717

size,

718

' '.join([str(self.lookup(vid)) for

719

vid in parents]))

720

self._transport.append(self._filename, StringIO(content))

721

722

def has_version(self, version_id):

723

"""True if the version is in the index."""

724

return self._cache.has_key(version_id)

725

726

def get_position(self, version_id):

727

"""Return data position and size of specified version."""

728

return (self._cache[version_id][2], \

729

self._cache[version_id][3])

730

731

def get_method(self, version_id):

732

"""Return compression method of specified version."""

733

options = self._cache[version_id][1]

734

if 'fulltext' in options:

735

return 'fulltext'

736

else:

737

assert 'line-delta' in options

738

return 'line-delta'

739

740

def get_options(self, version_id):

741

return self._cache[version_id][1]

742

743

def get_parents(self, version_id):

744

"""Return parents of specified version."""

745

return self._cache[version_id][4]

746

747

def check_versions_present(self, version_ids):

748

"""Check that all specified versions are present."""

749

version_ids = set(version_ids)

750

for version_id in list(version_ids):

751

if version_id in self._cache:

752

version_ids.remove(version_id)

753

if version_ids:

754

raise RevisionNotPresent(list(version_ids)[0], self.filename)

755

756

757

class _KnitData(_KnitComponentFile):

758

"""Contents of the knit data file"""

759

760

HEADER = "# bzr knit data 7\n"

761

762

def __init__(self, transport, filename, mode, create=False):

763

_KnitComponentFile.__init__(self, transport, filename, mode)

764

self._file = None

765

self._checked = False

766

if create:

767

self._transport.put(self._filename, StringIO(''))

768

769

def _open_file(self):

770

if self._file is None:

771

try:

772

self._file = self._transport.get(self._filename)

773

except NoSuchFile:

774

pass

775

return self._file

776

777

def add_record(self, version_id, digest, lines):

778

"""Write new text record to disk. Returns the position in the

779

file where it was written."""

780

sio = StringIO()

781

data_file = GzipFile(None, mode='wb', fileobj=sio)

782

print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)

783

data_file.writelines(lines)

784

print >>data_file, "end %s\n" % version_id

785

data_file.close()

786

787

content = sio.getvalue()

788

start_pos = self._transport.append(self._filename, StringIO(content))

789

return start_pos, len(content)

790

791

def _parse_record(self, version_id, data):

792

df = GzipFile(mode='rb', fileobj=StringIO(data))

793

rec = df.readline().split()

794

if len(rec) != 4:

795

raise KnitCorrupt(self._filename, 'unexpected number of records')

796

if rec[1] != version_id:

797

raise KnitCorrupt(self.file.name,

798

'unexpected version, wanted %r' % version_id)

799

lines = int(rec[2])

800

record_contents = self._read_record_contents(df, lines)

801

l = df.readline()

802

if l != 'end %s\n' % version_id:

803

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

804

% (l, version_id))

805

return record_contents, rec[3]

806

807

def _read_record_contents(self, df, record_lines):

808

"""Read and return n lines from datafile."""

809

r = []

810

for i in range(record_lines):

811

r.append(df.readline())

812

return r

813

814

def read_records_iter(self, records):

815

"""Read text records from data file and yield result.

816

817

Each passed record is a tuple of (version_id, pos, len) and

818

will be read in the given order. Yields (version_id,

819

contents, digest).

820

"""

821

822

class ContinuousRange:

823

def __init__(self, rec_id, pos, size):

824

self.start_pos = pos

825

self.end_pos = pos + size

826

self.versions = [(rec_id, pos, size)]

827

828

def add(self, rec_id, pos, size):

829

if self.end_pos != pos:

830

return False

831

self.end_pos = pos + size

832

self.versions.append((rec_id, pos, size))

833

return True

834

835

def split(self, fp):

836

for rec_id, pos, size in self.versions:

837

yield rec_id, fp.read(size)

838

839

# We take it that the transport optimizes the fetching as good

840

# as possible (ie, reads continous ranges.)

841

response = self._transport.readv(self._filename,

842

[(pos, size) for version_id, pos, size in records])

843

844

for (record_id, pos, size), (pos, data) in zip(records, response):

845

content, digest = self._parse_record(record_id, data)

846

yield record_id, content, digest

847

848

def read_records(self, records):

849

"""Read records into a dictionary."""

850

components = {}

851

for record_id, content, digest in self.read_records_iter(records):

852

components[record_id] = (content, digest)

853

return components

854

855

856

class InterKnit(InterVersionedFile):

857

"""Optimised code paths for knit to knit operations."""

858

859

_matching_file_factory = KnitVersionedFile

860

861

@staticmethod

862

def is_compatible(source, target):

863

"""Be compatible with knits. """

864

try:

865

return (isinstance(source, KnitVersionedFile) and

866

isinstance(target, KnitVersionedFile))

867

except AttributeError:

868

return False

869

870

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

871

"""See InterVersionedFile.join."""

872

assert isinstance(self.source, KnitVersionedFile)

873

assert isinstance(self.target, KnitVersionedFile)

874

875

if version_ids is None:

876

version_ids = self.source.versions()

877

else:

878

if not ignore_missing:

879

self.source._check_versions_present(version_ids)

880

else:

881

version_ids = set(self.source.versions()).intersection(

882

set(version_ids))

883

884

if not version_ids:

885

return 0

886

887

if pb is None:

888

from bzrlib.progress import DummyProgress

889

pb = DummyProgress()

890

891

version_ids = list(version_ids)

892

if None in version_ids:

893

version_ids.remove(None)

894

895

self.source_ancestry = set(self.source.get_ancestry(version_ids))

896

this_versions = set(self.target._index.get_versions())

897

needed_versions = self.source_ancestry - this_versions

898

cross_check_versions = self.source_ancestry.intersection(this_versions)

899

mismatched_versions = set()

900

for version in cross_check_versions:

901

# scan to include needed parents.

902

n1 = set(self.target.get_parents(version))

903

n2 = set(self.source.get_parents(version))

904

if n1 != n2:

905

# FIXME TEST this check for cycles being introduced works

906

# the logic is we have a cycle if in our graph we are an

907

# ancestor of any of the n2 revisions.

908

for parent in n2:

909

if parent in n1:

910

# safe

911

continue

912

else:

913

parent_ancestors = self.source.get_ancestry(parent)

914

if version in parent_ancestors:

915

raise errors.GraphCycleError([parent, version])

916

# ensure this parent will be available later.

917

new_parents = n2.difference(n1)

918

needed_versions.update(new_parents.difference(this_versions))

919

mismatched_versions.add(version)

920

921

if not needed_versions and not cross_check_versions:

922

return 0

923

full_list = topo_sort(self.source._index.get_graph())

924

925

version_list = [i for i in full_list if (not self.target.has_version(i)

926

and i in needed_versions)]

927

928

records = []

929

for version_id in version_list:

930

data_pos, data_size = self.source._index.get_position(version_id)

931

records.append((version_id, data_pos, data_size))

932

933

count = 0

934

for version_id, lines, digest \

935

in self.source._data.read_records_iter(records):

936

options = self.source._index.get_options(version_id)

937

parents = self.source._index.get_parents(version_id)

938

939

for parent in parents:

940

assert self.target.has_version(parent)

941

942

if self.target.factory.annotated:

943

# FIXME jrydberg: it should be possible to skip

944

# re-annotating components if we know that we are

945

# going to pull all revisions in the same order.

946

new_version_id = version_id

947

new_version_idx = self.target._index.num_versions()

948

if 'fulltext' in options:

949

lines = self.target._reannotate_fulltext(self.source, lines,

950

new_version_id, new_version_idx)

951

elif 'line-delta' in options:

952

lines = self.target._reannotate_line_delta(self.source, lines,

953

new_version_id, new_version_idx)

954

955

count = count + 1

956

pb.update("Joining knit", count, len(version_list))

957

958

pos, size = self.target._data.add_record(version_id, digest, lines)

959

self.target._index.add_version(version_id, options, pos, size, parents)

960

961

for version in mismatched_versions:

962

n1 = set(self.target.get_parents(version))

963

n2 = set(self.source.get_parents(version))

964

# write a combined record to our history preserving the current

965

# parents as first in the list

966

new_parents = self.target.get_parents(version) + list(n2.difference(n1))

967

self.target.fix_parents(version, new_parents)

968

pb.clear()

969

return count

970

971

972

InterVersionedFile.register_optimiser(InterKnit)

Older »