/brz/remove-bazaar : revision 1594.2.6

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2006-03-08 04:59:20 UTC
mto: (1596.2.3 integration) (1594.3.1 versioned-file-performance)
mto: This revision was merged to the branch mainline in revision 1601.
Revision ID: robertc@robertcollins.net-20060308045920-ecdaf527c1095d76

Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py.moved

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

generate_docs.py

notes

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

import difflib

from difflib import SequenceMatcher

from gzip import GzipFile

import os

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.tsort import topo_sort

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

# TODO: function to check whole file

# TODO: atomically append data, then measure backwards from the cursor

# position after writing to work out where it was located. we may need to

# bypass python file buffering.

DATA_SUFFIX = '.knit'

INDEX_SUFFIX = '.kndx'

100

class KnitContent(object):

101

"""Content of a knit version to which deltas can be applied."""

102

103

def __init__(self, lines):

104

self._lines = lines

105

106

def annotate_iter(self):

107

"""Yield tuples of (origin, text) for each content line."""

108

for origin, text in self._lines:

109

yield origin, text

110

111

def annotate(self):

112

"""Return a list of (origin, text) tuples."""

113

return list(self.annotate_iter())

114

115

def apply_delta(self, delta):

116

"""Apply delta to this content."""

117

offset = 0

118

for start, end, count, lines in delta:

119

self._lines[offset+start:offset+end] = lines

120

offset = offset + (start - end) + count

121

122

def line_delta_iter(self, new_lines):

123

"""Generate line-based delta from new_lines to this content."""

124

new_texts = [text for origin, text in new_lines._lines]

125

old_texts = [text for origin, text in self._lines]

126

s = difflib.SequenceMatcher(None, old_texts, new_texts)

127

for op in s.get_opcodes():

128

if op[0] == 'equal':

129

continue

130

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

131

132

def line_delta(self, new_lines):

133

return list(self.line_delta_iter(new_lines))

134

135

def text(self):

136

return [text for origin, text in self._lines]

137

138

139

class _KnitFactory(object):

140

"""Base factory for creating content objects."""

141

142

def make(self, lines, version):

143

num_lines = len(lines)

144

return KnitContent(zip([version] * num_lines, lines))

145

146

147

class KnitAnnotateFactory(_KnitFactory):

148

"""Factory for creating annotated Content objects."""

149

150

annotated = True

151

152

def parse_fulltext(self, content, version):

153

lines = []

154

for line in content:

155

origin, text = line.split(' ', 1)

156

lines.append((int(origin), text))

157

return KnitContent(lines)

158

159

def parse_line_delta_iter(self, lines):

160

while lines:

161

header = lines.pop(0)

162

start, end, c = [int(n) for n in header.split(',')]

163

contents = []

164

for i in range(c):

165

origin, text = lines.pop(0).split(' ', 1)

166

contents.append((int(origin), text))

167

yield start, end, c, contents

168

169

def parse_line_delta(self, lines, version):

170

return list(self.parse_line_delta_iter(lines))

171

172

def lower_fulltext(self, content):

173

return ['%d %s' % (o, t) for o, t in content._lines]

174

175

def lower_line_delta(self, delta):

176

out = []

177

for start, end, c, lines in delta:

178

out.append('%d,%d,%d\n' % (start, end, c))

179

for origin, text in lines:

180

out.append('%d %s' % (origin, text))

181

return out

182

183

184

class KnitPlainFactory(_KnitFactory):

185

"""Factory for creating plain Content objects."""

186

187

annotated = False

188

189

def parse_fulltext(self, content, version):

190

return self.make(content, version)

191

192

def parse_line_delta_iter(self, lines, version):

193

while lines:

194

header = lines.pop(0)

195

start, end, c = [int(n) for n in header.split(',')]

196

yield start, end, c, zip([version] * c, lines[:c])

197

del lines[:c]

198

199

def parse_line_delta(self, lines, version):

200

return list(self.parse_line_delta_iter(lines, version))

201

202

def lower_fulltext(self, content):

203

return content.text()

204

205

def lower_line_delta(self, delta):

206

out = []

207

for start, end, c, lines in delta:

208

out.append('%d,%d,%d\n' % (start, end, c))

209

out.extend([text for origin, text in lines])

210

return out

211

212

213

def make_empty_knit(transport, relpath):

214

"""Construct a empty knit at the specified location."""

215

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

216

k._data._open_file()

217

218

219

class KnitVersionedFile(VersionedFile):

220

"""Weave-like structure with faster random access.

221

222

A knit stores a number of texts and a summary of the relationships

223

between them. Texts are identified by a string version-id. Texts

224

are normally stored and retrieved as a series of lines, but can

225

also be passed as single strings.

226

227

Lines are stored with the trailing newline (if any) included, to

228

avoid special cases for files with no final newline. Lines are

229

composed of 8-bit characters, not unicode. The combination of

230

these approaches should mean any 'binary' file can be safely

231

stored and retrieved.

232

"""

233

234

def __init__(self, relpath, transport, file_mode=None, access_mode=None, factory=None,

235

basis_knit=None, delta=True, create=False):

236

"""Construct a knit at location specified by relpath.

237

238

:param create: If not True, only open an existing knit.

239

"""

240

if access_mode is None:

241

access_mode = 'w'

242

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

243

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

244

type(basis_knit)

245

246

self.transport = transport

247

self.filename = relpath

248

self.basis_knit = basis_knit

249

self.factory = factory or KnitAnnotateFactory()

250

self.writable = (access_mode == 'w')

251

self.delta = delta

252

253

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

254

access_mode, create=create)

255

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

256

access_mode, create=not len(self.versions()))

257

258

def copy_to(self, name, transport):

259

"""See VersionedFile.copy_to()."""

260

# copy the current index to a temp index to avoid racing with local

261

# writes

262

transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename))

263

# copy the data file

264

transport.put(name + DATA_SUFFIX, self._data._open_file())

265

# rename the copied index into place

266

transport.rename(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

267

268

def create_empty(self, name, transport, mode=None):

269

return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)

270

271

@staticmethod

272

def get_suffixes():

273

"""See VersionedFile.get_suffixes()."""

274

return [DATA_SUFFIX, INDEX_SUFFIX]

275

276

def versions(self):

277

"""See VersionedFile.versions."""

278

return self._index.get_versions()

279

280

def has_version(self, version_id):

281

"""See VersionedFile.has_version."""

282

return self._index.has_version(version_id)

283

284

__contains__ = has_version

285

286

def _merge_annotations(self, content, parents):

287

"""Merge annotations for content. This is done by comparing

288

the annotations based on changed to the text."""

289

for parent_id in parents:

290

merge_content = self._get_content(parent_id)

291

seq = SequenceMatcher(None, merge_content.text(), content.text())

292

for i, j, n in seq.get_matching_blocks():

293

if n == 0:

294

continue

295

content._lines[j:j+n] = merge_content._lines[i:i+n]

296

297

def _get_components(self, version_id):

298

"""Return a list of (version_id, method, data) tuples that

299

makes up version specified by version_id of the knit.

300

301

The components should be applied in the order of the returned

302

list.

303

304

The basis knit will be used to the largest extent possible

305

since it is assumed that accesses to it is faster.

306

"""

307

# needed_revisions holds a list of (method, version_id) of

308

# versions that is needed to be fetched to construct the final

309

# version of the file.

310

311

# basis_revisions is a list of versions that needs to be

312

# fetched but exists in the basis knit.

313

314

basis = self.basis_knit

315

needed_versions = []

316

basis_versions = []

317

cursor = version_id

318

319

while 1:

320

picked_knit = self

321

if basis and basis._index.has_version(cursor):

322

picked_knit = basis

323

basis_versions.append(cursor)

324

method = picked_knit._index.get_method(cursor)

325

needed_versions.append((method, cursor))

326

if method == 'fulltext':

327

break

328

cursor = picked_knit.get_parents(cursor)[0]

329

330

components = {}

331

if basis_versions:

332

records = []

333

for comp_id in basis_versions:

334

data_pos, data_size = basis._index.get_data_position(comp_id)

335

records.append((piece_id, data_pos, data_size))

336

components.update(basis._data.read_records(records))

337

338

records = []

339

for comp_id in [vid for method, vid in needed_versions

340

if vid not in basis_versions]:

341

data_pos, data_size = self._index.get_position(comp_id)

342

records.append((comp_id, data_pos, data_size))

343

components.update(self._data.read_records(records))

344

345

# get_data_records returns a mapping with the version id as

346

# index and the value as data. The order the components need

347

# to be applied is held by needed_versions (reversed).

348

out = []

349

for method, comp_id in reversed(needed_versions):

350

out.append((comp_id, method, components[comp_id]))

351

352

return out

353

354

def _get_content(self, version_id):

355

"""Returns a content object that makes up the specified

356

version."""

357

if not self.has_version(version_id):

358

raise RevisionNotPresent(version_id, self.filename)

359

360

if self.basis_knit and version_id in self.basis_knit:

361

return self.basis_knit._get_content(version_id)

362

363

content = None

364

components = self._get_components(version_id)

365

for component_id, method, (data, digest) in components:

366

version_idx = self._index.lookup(component_id)

367

if method == 'fulltext':

368

assert content is None

369

content = self.factory.parse_fulltext(data, version_idx)

370

elif method == 'line-delta':

371

delta = self.factory.parse_line_delta(data, version_idx)

372

content.apply_delta(delta)

373

374

if 'no-eol' in self._index.get_options(version_id):

375

line = content._lines[-1][1].rstrip('\n')

376

content._lines[-1] = (content._lines[-1][0], line)

377

378

if sha_strings(content.text()) != digest:

379

raise KnitCorrupt(self.filename, 'sha-1 does not match')

380

381

return content

382

383

def _check_versions_present(self, version_ids):

384

"""Check that all specified versions are present."""

385

version_ids = set(version_ids)

386

for r in list(version_ids):

387

if self._index.has_version(r):

388

version_ids.remove(r)

389

if version_ids:

390

raise RevisionNotPresent(list(version_ids)[0], self.filename)

391

392

def add_lines(self, version_id, parents, lines):

393

"""See VersionedFile.add_lines."""

394

assert self.writable, "knit is not opened for write"

395

### FIXME escape. RBC 20060228

396

if contains_whitespace(version_id):

397

raise InvalidRevisionId(version_id)

398

if self.has_version(version_id):

399

raise RevisionAlreadyPresent(version_id, self.filename)

400

401

if False or __debug__:

402

for l in lines:

403

assert '\n' not in l[:-1]

404

405

self._check_versions_present(parents)

406

return self._add(version_id, lines[:], parents, self.delta)

407

408

def _add(self, version_id, lines, parents, delta):

409

"""Add a set of lines on top of version specified by parents.

410

411

If delta is true, compress the text as a line-delta against

412

the first parent.

413

"""

414

if delta and not parents:

415

delta = False

416

417

digest = sha_strings(lines)

418

options = []

419

if lines:

420

if lines[-1][-1] != '\n':

421

options.append('no-eol')

422

lines[-1] = lines[-1] + '\n'

423

424

lines = self.factory.make(lines, len(self._index))

425

if self.factory.annotated and len(parents) > 0:

426

# Merge annotations from parent texts if so is needed.

427

self._merge_annotations(lines, parents)

428

429

if parents and delta:

430

# To speed the extract of texts the delta chain is limited

431

# to a fixed number of deltas. This should minimize both

432

# I/O and the time spend applying deltas.

433

count = 0

434

delta_parents = parents

435

while count < 25:

436

parent = delta_parents[0]

437

method = self._index.get_method(parent)

438

if method == 'fulltext':

439

break

440

delta_parents = self._index.get_parents(parent)

441

count = count + 1

442

if method == 'line-delta':

443

delta = False

444

445

if delta:

446

options.append('line-delta')

447

content = self._get_content(parents[0])

448

delta_hunks = content.line_delta(lines)

449

store_lines = self.factory.lower_line_delta(delta_hunks)

450

else:

451

options.append('fulltext')

452

store_lines = self.factory.lower_fulltext(lines)

453

454

where, size = self._data.add_record(version_id, digest, store_lines)

455

self._index.add_version(version_id, options, where, size, parents)

456

457

def check(self, progress_bar=None):

458

"""See VersionedFile.check()."""

459

460

def clone_text(self, new_version_id, old_version_id, parents):

461

"""See VersionedFile.clone_text()."""

462

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

463

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

464

465

def get_lines(self, version_id):

466

"""See VersionedFile.get_lines()."""

467

return self._get_content(version_id).text()

468

469

def iter_lines_added_or_present_in_versions(self, version_ids=None):

470

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

471

if version_ids is None:

472

version_ids = self.versions()

473

# we dont care about inclusions, the caller cares.

474

# but we need to setup a list of records to visit.

475

# we need version_id, position, length

476

version_id_records = []

477

for version_id in version_ids:

478

if not self.has_version(version_id):

479

raise RevisionNotPresent(version_id, self.filename)

480

data_pos, length = self._index.get_position(version_id)

481

version_id_records.append((version_id, data_pos, length))

482

for version_id, data, sha_value in \

483

self._data.read_records_iter(version_id_records):

484

method = self._index.get_method(version_id)

485

version_idx = self._index.lookup(version_id)

486

assert method in ('fulltext', 'line-delta')

487

if method == 'fulltext':

488

content = self.factory.parse_fulltext(data, version_idx)

489

for line in content.text():

490

yield line

491

else:

492

delta = self.factory.parse_line_delta(data, version_idx)

493

for start, end, count, lines in delta:

494

for origin, line in lines:

495

yield line

496

497

def num_versions(self):

498

"""See VersionedFile.num_versions()."""

499

return self._index.num_versions()

500

501

__len__ = num_versions

502

503

def annotate_iter(self, version_id):

504

"""See VersionedFile.annotate_iter."""

505

content = self._get_content(version_id)

506

for origin, text in content.annotate_iter():

507

yield self._index.idx_to_name(origin), text

508

509

def get_parents(self, version_id):

510

"""See VersionedFile.get_parents."""

511

self._check_versions_present([version_id])

512

return list(self._index.get_parents(version_id))

513

514

def get_ancestry(self, versions):

515

"""See VersionedFile.get_ancestry."""

516

if isinstance(versions, basestring):

517

versions = [versions]

518

if not versions:

519

return []

520

self._check_versions_present(versions)

521

return self._index.get_ancestry(versions)

522

523

def _reannotate_line_delta(self, other, lines, new_version_id,

524

new_version_idx):

525

"""Re-annotate line-delta and return new delta."""

526

new_delta = []

527

for start, end, count, contents \

528

in self.factory.parse_line_delta_iter(lines):

529

new_lines = []

530

for origin, line in contents:

531

old_version_id = other._index.idx_to_name(origin)

532

if old_version_id == new_version_id:

533

idx = new_version_idx

534

else:

535

idx = self._index.lookup(old_version_id)

536

new_lines.append((idx, line))

537

new_delta.append((start, end, count, new_lines))

538

539

return self.factory.lower_line_delta(new_delta)

540

541

def _reannotate_fulltext(self, other, lines, new_version_id,

542

new_version_idx):

543

"""Re-annotate fulltext and return new version."""

544

content = self.factory.parse_fulltext(lines, new_version_idx)

545

new_lines = []

546

for origin, line in content.annotate_iter():

547

old_version_id = other._index.idx_to_name(origin)

548

if old_version_id == new_version_id:

549

idx = new_version_idx

550

else:

551

idx = self._index.lookup(old_version_id)

552

new_lines.append((idx, line))

553

554

return self.factory.lower_fulltext(KnitContent(new_lines))

555

556

#@deprecated_method(zero_eight)

557

def walk(self, version_ids):

558

"""See VersionedFile.walk."""

559

# We take the short path here, and extract all relevant texts

560

# and put them in a weave and let that do all the work. Far

561

# from optimal, but is much simpler.

562

# FIXME RB 20060228 this really is inefficient!

563

from bzrlib.weave import Weave

564

565

w = Weave(self.filename)

566

ancestry = self.get_ancestry(version_ids)

567

sorted_graph = topo_sort(self._index.get_graph())

568

version_list = [vid for vid in sorted_graph if vid in ancestry]

569

570

for version_id in version_list:

571

lines = self.get_lines(version_id)

572

w.add_lines(version_id, self.get_parents(version_id), lines)

573

574

for lineno, insert_id, dset, line in w.walk(version_ids):

575

yield lineno, insert_id, dset, line

576

577

578

class _KnitComponentFile(object):

579

"""One of the files used to implement a knit database"""

580

581

def __init__(self, transport, filename, mode):

582

self._transport = transport

583

self._filename = filename

584

self._mode = mode

585

586

def write_header(self):

587

old_len = self._transport.append(self._filename, StringIO(self.HEADER))

588

if old_len != 0:

589

raise KnitCorrupt(self._filename, 'misaligned after writing header')

590

591

def check_header(self, fp):

592

line = fp.read(len(self.HEADER))

593

if line != self.HEADER:

594

raise KnitHeaderError(badline=line)

595

596

def commit(self):

597

"""Commit is a nop."""

598

599

def __repr__(self):

600

return '%s(%s)' % (self.__class__.__name__, self._filename)

601

602

603

class _KnitIndex(_KnitComponentFile):

604

"""Manages knit index file.

605

606

The index is already kept in memory and read on startup, to enable

607

fast lookups of revision information. The cursor of the index

608

file is always pointing to the end, making it easy to append

609

entries.

610

611

_cache is a cache for fast mapping from version id to a Index

612

object.

613

614

_history is a cache for fast mapping from indexes to version ids.

615

616

The index data format is dictionary compressed when it comes to

617

parent references; a index entry may only have parents that with a

618

lover index number. As a result, the index is topological sorted.

619

620

Duplicate entries may be written to the index for a single version id

621

if this is done then the latter one completely replaces the former:

622

this allows updates to correct version and parent information.

623

Note that the two entries may share the delta, and that successive

624

annotations and references MUST point to the first entry.

625

"""

626

627

HEADER = "# bzr knit index 7\n"

628

629

def _cache_version(self, version_id, options, pos, size, parents):

630

val = (version_id, options, pos, size, parents)

631

self._cache[version_id] = val

632

if not version_id in self._history:

633

self._history.append(version_id)

634

635

def _iter_index(self, fp):

636

lines = fp.read()

637

for l in lines.splitlines(False):

638

yield l.split()

639

640

def __init__(self, transport, filename, mode, create=False):

641

_KnitComponentFile.__init__(self, transport, filename, mode)

642

self._cache = {}

643

# position in _history is the 'official' index for a revision

644

# but the values may have come from a newer entry.

645

# so - wc -l of a knit index is != the number of uniqe names

646

# in the weave.

647

self._history = []

648

try:

649

fp = self._transport.get(self._filename)

650

self.check_header(fp)

651

for rec in self._iter_index(fp):

652

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

653

[self._history[int(i)] for i in rec[4:]])

654

except NoSuchFile, e:

655

if mode != 'w' or not create:

656

raise

657

self.write_header()

658

659

def get_graph(self):

660

graph = []

661

for version_id, index in self._cache.iteritems():

662

graph.append((version_id, index[4]))

663

return graph

664

665

def get_ancestry(self, versions):

666

"""See VersionedFile.get_ancestry."""

667

# get a graph of all the mentioned versions:

668

graph = {}

669

pending = set(versions)

670

while len(pending):

671

version = pending.pop()

672

parents = self._cache[version][4]

673

for parent in parents:

674

if parent not in graph:

675

pending.add(parent)

676

graph[version] = parents

677

return topo_sort(graph.items())

678

679

def num_versions(self):

680

return len(self._history)

681

682

__len__ = num_versions

683

684

def get_versions(self):

685

return self._history

686

687

def idx_to_name(self, idx):

688

return self._history[idx]

689

690

def lookup(self, version_id):

691

assert version_id in self._cache

692

return self._history.index(version_id)

693

694

def add_version(self, version_id, options, pos, size, parents):

695

"""Add a version record to the index."""

696

self._cache_version(version_id, options, pos, size, parents)

697

698

content = "%s %s %s %s %s\n" % (version_id,

699

','.join(options),

700

pos,

701

size,

702

' '.join([str(self.lookup(vid)) for

703

vid in parents]))

704

self._transport.append(self._filename, StringIO(content))

705

706

def has_version(self, version_id):

707

"""True if the version is in the index."""

708

return self._cache.has_key(version_id)

709

710

def get_position(self, version_id):

711

"""Return data position and size of specified version."""

712

return (self._cache[version_id][2], \

713

self._cache[version_id][3])

714

715

def get_method(self, version_id):

716

"""Return compression method of specified version."""

717

options = self._cache[version_id][1]

718

if 'fulltext' in options:

719

return 'fulltext'

720

else:

721

assert 'line-delta' in options

722

return 'line-delta'

723

724

def get_options(self, version_id):

725

return self._cache[version_id][1]

726

727

def get_parents(self, version_id):

728

"""Return parents of specified version."""

729

return self._cache[version_id][4]

730

731

def check_versions_present(self, version_ids):

732

"""Check that all specified versions are present."""

733

version_ids = set(version_ids)

734

for version_id in list(version_ids):

735

if version_id in self._cache:

736

version_ids.remove(version_id)

737

if version_ids:

738

raise RevisionNotPresent(list(version_ids)[0], self.filename)

739

740

741

class _KnitData(_KnitComponentFile):

742

"""Contents of the knit data file"""

743

744

HEADER = "# bzr knit data 7\n"

745

746

def __init__(self, transport, filename, mode, create=False):

747

_KnitComponentFile.__init__(self, transport, filename, mode)

748

self._file = None

749

self._checked = False

750

if create:

751

self._transport.put(self._filename, StringIO(''))

752

753

def _open_file(self):

754

if self._file is None:

755

try:

756

self._file = self._transport.get(self._filename)

757

except NoSuchFile:

758

pass

759

return self._file

760

761

def add_record(self, version_id, digest, lines):

762

"""Write new text record to disk. Returns the position in the

763

file where it was written."""

764

sio = StringIO()

765

data_file = GzipFile(None, mode='wb', fileobj=sio)

766

print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)

767

data_file.writelines(lines)

768

print >>data_file, "end %s\n" % version_id

769

data_file.close()

770

771

content = sio.getvalue()

772

start_pos = self._transport.append(self._filename, StringIO(content))

773

return start_pos, len(content)

774

775

def _parse_record(self, version_id, data):

776

df = GzipFile(mode='rb', fileobj=StringIO(data))

777

rec = df.readline().split()

778

if len(rec) != 4:

779

raise KnitCorrupt(self._filename, 'unexpected number of records')

780

if rec[1] != version_id:

781

raise KnitCorrupt(self.file.name,

782

'unexpected version, wanted %r' % version_id)

783

lines = int(rec[2])

784

record_contents = self._read_record_contents(df, lines)

785

l = df.readline()

786

if l != 'end %s\n' % version_id:

787

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

788

% (l, version_id))

789

return record_contents, rec[3]

790

791

def _read_record_contents(self, df, record_lines):

792

"""Read and return n lines from datafile."""

793

r = []

794

for i in range(record_lines):

795

r.append(df.readline())

796

return r

797

798

def read_records_iter(self, records):

799

"""Read text records from data file and yield result.

800

801

Each passed record is a tuple of (version_id, pos, len) and

802

will be read in the given order. Yields (version_id,

803

contents, digest).

804

"""

805

806

class ContinuousRange:

807

def __init__(self, rec_id, pos, size):

808

self.start_pos = pos

809

self.end_pos = pos + size

810

self.versions = [(rec_id, pos, size)]

811

812

def add(self, rec_id, pos, size):

813

if self.end_pos != pos:

814

return False

815

self.end_pos = pos + size

816

self.versions.append((rec_id, pos, size))

817

return True

818

819

def split(self, fp):

820

for rec_id, pos, size in self.versions:

821

yield rec_id, fp.read(size)

822

823

# We take it that the transport optimizes the fetching as good

824

# as possible (ie, reads continous ranges.)

825

response = self._transport.readv(self._filename,

826

[(pos, size) for version_id, pos, size in records])

827

828

for (record_id, pos, size), (pos, data) in zip(records, response):

829

content, digest = self._parse_record(record_id, data)

830

yield record_id, content, digest

831

832

def read_records(self, records):

833

"""Read records into a dictionary."""

834

components = {}

835

for record_id, content, digest in self.read_records_iter(records):

836

components[record_id] = (content, digest)

837

return components

838

839

840

class InterKnit(InterVersionedFile):

841

"""Optimised code paths for knit to knit operations."""

842

843

_matching_file_factory = KnitVersionedFile

844

845

@staticmethod

846

def is_compatible(source, target):

847

"""Be compatible with knits. """

848

try:

849

return (isinstance(source, KnitVersionedFile) and

850

isinstance(target, KnitVersionedFile))

851

except AttributeError:

852

return False

853

854

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

855

"""See InterVersionedFile.join."""

856

assert isinstance(self.source, KnitVersionedFile)

857

assert isinstance(self.target, KnitVersionedFile)

858

859

if version_ids is None:

860

version_ids = self.source.versions()

861

else:

862

if not ignore_missing:

863

self.source._check_versions_present(version_ids)

864

else:

865

version_ids = set(self.source.versions()).intersection(

866

set(version_ids))

867

868

if not version_ids:

869

return 0

870

871

if pb is None:

872

from bzrlib.progress import DummyProgress

873

pb = DummyProgress()

874

875

version_ids = list(version_ids)

876

if None in version_ids:

877

version_ids.remove(None)

878

879

self.source_ancestry = set(self.source.get_ancestry(version_ids))

880

this_versions = set(self.target._index.get_versions())

881

needed_versions = self.source_ancestry - this_versions

882

cross_check_versions = self.source_ancestry.intersection(this_versions)

883

mismatched_versions = set()

884

for version in cross_check_versions:

885

# scan to include needed parents.

886

n1 = set(self.target.get_parents(version))

887

n2 = set(self.source.get_parents(version))

888

if n1 != n2:

889

# FIXME TEST this check for cycles being introduced works

890

# the logic is we have a cycle if in our graph we are an

891

# ancestor of any of the n2 revisions.

892

for parent in n2:

893

if parent in n1:

894

# safe

895

continue

896

else:

897

parent_ancestors = self.source.get_ancestry(parent)

898

if version in parent_ancestors:

899

raise errors.GraphCycleError([parent, version])

900

# ensure this parent will be available later.

901

new_parents = n2.difference(n1)

902

needed_versions.update(new_parents.difference(this_versions))

903

mismatched_versions.add(version)

904

905

if not needed_versions and not cross_check_versions:

906

return 0

907

full_list = topo_sort(self.source._index.get_graph())

908

909

version_list = [i for i in full_list if (not self.target.has_version(i)

910

and i in needed_versions)]

911

912

records = []

913

for version_id in version_list:

914

data_pos, data_size = self.source._index.get_position(version_id)

915

records.append((version_id, data_pos, data_size))

916

917

count = 0

918

for version_id, lines, digest \

919

in self.source._data.read_records_iter(records):

920

options = self.source._index.get_options(version_id)

921

parents = self.source._index.get_parents(version_id)

922

923

for parent in parents:

924

assert self.target.has_version(parent)

925

926

if self.target.factory.annotated:

927

# FIXME jrydberg: it should be possible to skip

928

# re-annotating components if we know that we are

929

# going to pull all revisions in the same order.

930

new_version_id = version_id

931

new_version_idx = self.target._index.num_versions()

932

if 'fulltext' in options:

933

lines = self.target._reannotate_fulltext(self.source, lines,

934

new_version_id, new_version_idx)

935

elif 'line-delta' in options:

936

lines = self.target._reannotate_line_delta(self.source, lines,

937

new_version_id, new_version_idx)

938

939

count = count + 1

940

pb.update("Joining knit", count, len(version_list))

941

942

pos, size = self.target._data.add_record(version_id, digest, lines)

943

self.target._index.add_version(version_id, options, pos, size, parents)

944

945

for version in mismatched_versions:

946

n1 = set(self.target.get_parents(version))

947

n2 = set(self.source.get_parents(version))

948

# write a combined record to our history.

949

new_parents = self.target.get_parents(version) + list(n2.difference(n1))

950

current_values = self.target._index._cache[version]

951

self.target._index.add_version(version,

952

current_values[1],

953

current_values[2],

954

current_values[3],

955

new_parents)

956

pb.clear()

957

return count

958

959

960

InterVersionedFile.register_optimiser(InterKnit)

Older »