/brz/remove-bazaar : revision 1563.2.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2006-02-28 08:34:43 UTC
mto: (1594.2.4 integration)
mto: This revision was merged to the branch mainline in revision 1596.
Revision ID: robertc@robertcollins.net-20060228083443-f6aae35b7de8f26f

Remove unused transaction references from knit.py and the versionedfile interface.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

generate_docs.py

notes

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

"""

import os

import difflib

from difflib import SequenceMatcher

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile

from bzrlib.tsort import topo_sort

from StringIO import StringIO

from gzip import GzipFile

import sha

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

# TODO: function to check whole file

# TODO: atomically append data, then measure backwards from the cursor

# position after writing to work out where it was located. we may need to

# bypass python file buffering.

DATA_SUFFIX = '.knit'

INDEX_SUFFIX = '.kndx'

class KnitContent(object):

"""Content of a knit version to which deltas can be applied."""

def __init__(self, lines):

self._lines = lines

def annotate_iter(self):

"""Yield tuples of (origin, text) for each content line."""

for origin, text in self._lines:

yield origin, text

def annotate(self):

"""Return a list of (origin, text) tuples."""

return list(self.annotate_iter())

def apply_delta(self, delta):

"""Apply delta to this content."""

offset = 0

for start, end, count, lines in delta:

self._lines[offset+start:offset+end] = lines

offset = offset + (start - end) + count

def line_delta_iter(self, new_lines):

"""Generate line-based delta from new_lines to this content."""

new_texts = [text for origin, text in new_lines._lines]

old_texts = [text for origin, text in self._lines]

s = difflib.SequenceMatcher(None, old_texts, new_texts)

for op in s.get_opcodes():

if op[0] == 'equal':

continue

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

def line_delta(self, new_lines):

return list(self.line_delta_iter(new_lines))

def text(self):

return [text for origin, text in self._lines]

class _KnitFactory(object):

100

"""Base factory for creating content objects."""

101

102

def make(self, lines, version):

103

num_lines = len(lines)

104

return KnitContent(zip([version] * num_lines, lines))

105

106

107

class KnitAnnotateFactory(_KnitFactory):

108

"""Factory for creating annotated Content objects."""

109

110

annotated = True

111

112

def parse_fulltext(self, content, version):

113

lines = []

114

for line in content:

115

origin, text = line.split(' ', 1)

116

lines.append((int(origin), text))

117

return KnitContent(lines)

118

119

def parse_line_delta_iter(self, lines):

120

while lines:

121

header = lines.pop(0)

122

start, end, c = [int(n) for n in header.split(',')]

123

contents = []

124

for i in range(c):

125

origin, text = lines.pop(0).split(' ', 1)

126

contents.append((int(origin), text))

127

yield start, end, c, contents

128

129

def parse_line_delta(self, lines, version):

130

return list(self.parse_line_delta_iter(lines))

131

132

def lower_fulltext(self, content):

133

return ['%d %s' % (o, t) for o, t in content._lines]

134

135

def lower_line_delta(self, delta):

136

out = []

137

for start, end, c, lines in delta:

138

out.append('%d,%d,%d\n' % (start, end, c))

139

for origin, text in lines:

140

out.append('%d %s' % (origin, text))

141

return out

142

143

144

class KnitPlainFactory(_KnitFactory):

145

"""Factory for creating plain Content objects."""

146

147

annotated = False

148

149

def parse_fulltext(self, content, version):

150

return self.make(content, version)

151

152

def parse_line_delta_iter(self, lines, version):

153

while lines:

154

header = lines.pop(0)

155

start, end, c = [int(n) for n in header.split(',')]

156

yield start, end, c, zip([version] * c, lines[:c])

157

del lines[:c]

158

159

def parse_line_delta(self, lines, version):

160

return list(self.parse_line_delta_iter(lines, version))

161

162

def lower_fulltext(self, content):

163

return content.text()

164

165

def lower_line_delta(self, delta):

166

out = []

167

for start, end, c, lines in delta:

168

out.append('%d,%d,%d\n' % (start, end, c))

169

out.extend([text for origin, text in lines])

170

return out

171

172

173

def make_empty_knit(transport, relpath):

174

"""Construct a empty knit at the specified location."""

175

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

176

k._data._open_file()

177

178

179

class KnitVersionedFile(VersionedFile):

180

"""Weave-like structure with faster random access.

181

182

A knit stores a number of texts and a summary of the relationships

183

between them. Texts are identified by a string version-id. Texts

184

are normally stored and retrieved as a series of lines, but can

185

also be passed as single strings.

186

187

Lines are stored with the trailing newline (if any) included, to

188

avoid special cases for files with no final newline. Lines are

189

composed of 8-bit characters, not unicode. The combination of

190

these approaches should mean any 'binary' file can be safely

191

stored and retrieved.

192

"""

193

194

def __init__(self, transport, relpath, mode, factory,

195

basis_knit=None, delta=True):

196

"""Construct a knit at location specified by relpath."""

197

assert mode in ('r', 'w'), "invalid mode specified"

198

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

199

type(basis_knit)

200

201

self.transport = transport

202

self.filename = relpath

203

self.basis_knit = basis_knit

204

self.factory = factory

205

self.writable = (mode == 'w')

206

self.delta = delta

207

208

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

209

mode)

210

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

211

mode)

212

213

def versions(self):

214

"""See VersionedFile.versions."""

215

return self._index.get_versions()

216

217

def has_version(self, version_id):

218

"""See VersionedFile.has_version."""

219

return self._index.has_version(version_id)

220

221

__contains__ = has_version

222

223

def _merge_annotations(self, content, parents):

224

"""Merge annotations for content. This is done by comparing

225

the annotations based on changed to the text."""

226

for parent_id in parents:

227

merge_content = self._get_content(parent_id)

228

seq = SequenceMatcher(None, merge_content.text(), content.text())

229

for i, j, n in seq.get_matching_blocks():

230

if n == 0:

231

continue

232

content._lines[j:j+n] = merge_content._lines[i:i+n]

233

234

def _get_components(self, version_id):

235

"""Return a list of (version_id, method, data) tuples that

236

makes up version specified by version_id of the knit.

237

238

The components should be applied in the order of the returned

239

list.

240

241

The basis knit will be used to the largest extent possible

242

since it is assumed that accesses to it is faster.

243

"""

244

# needed_revisions holds a list of (method, version_id) of

245

# versions that is needed to be fetched to construct the final

246

# version of the file.

247

248

# basis_revisions is a list of versions that needs to be

249

# fetched but exists in the basis knit.

250

251

basis = self.basis_knit

252

needed_versions = []

253

basis_versions = []

254

cursor = version_id

255

256

while 1:

257

picked_knit = self

258

if basis and basis._index.has_version(cursor):

259

picked_knit = basis

260

basis_versions.append(cursor)

261

method = picked_knit._index.get_method(cursor)

262

needed_versions.append((method, cursor))

263

if method == 'fulltext':

264

break

265

cursor = picked_knit.get_parents(cursor)[0]

266

267

components = {}

268

if basis_versions:

269

records = []

270

for comp_id in basis_versions:

271

data_pos, data_size = basis._index.get_data_position(comp_id)

272

records.append((piece_id, data_pos, data_size))

273

components.update(basis._data.read_records(records))

274

275

records = []

276

for comp_id in [vid for method, vid in needed_versions

277

if vid not in basis_versions]:

278

data_pos, data_size = self._index.get_position(comp_id)

279

records.append((comp_id, data_pos, data_size))

280

components.update(self._data.read_records(records))

281

282

# get_data_records returns a mapping with the version id as

283

# index and the value as data. The order the components need

284

# to be applied is held by needed_versions (reversed).

285

out = []

286

for method, comp_id in reversed(needed_versions):

287

out.append((comp_id, method, components[comp_id]))

288

289

return out

290

291

def _get_content(self, version_id):

292

"""Returns a content object that makes up the specified

293

version."""

294

if not self.has_version(version_id):

295

raise RevisionNotPresent(version_id, self.filename)

296

297

if self.basis_knit and version_id in self.basis_knit:

298

return self.basis_knit._get_content(version_id)

299

300

content = None

301

components = self._get_components(version_id)

302

for component_id, method, (data, digest) in components:

303

version_idx = self._index.lookup(component_id)

304

if method == 'fulltext':

305

assert content is None

306

content = self.factory.parse_fulltext(data, version_idx)

307

elif method == 'line-delta':

308

delta = self.factory.parse_line_delta(data, version_idx)

309

content.apply_delta(delta)

310

311

if 'no-eol' in self._index.get_options(version_id):

312

line = content._lines[-1][1].rstrip('\n')

313

content._lines[-1] = (content._lines[-1][0], line)

314

315

if sha_strings(content.text()) != digest:

316

raise KnitCorrupt(self.filename, 'sha-1 does not match')

317

318

return content

319

320

def _check_versions_present(self, version_ids):

321

"""Check that all specified versions are present."""

322

version_ids = set(version_ids)

323

for r in list(version_ids):

324

if self._index.has_version(r):

325

version_ids.remove(r)

326

if version_ids:

327

raise RevisionNotPresent(list(version_ids)[0], self.filename)

328

329

def add_lines(self, version_id, parents, lines):

330

"""See VersionedFile.add_lines."""

331

assert self.writable, "knit is not opened for write"

332

### FIXME escape. RBC 20060228

333

if contains_whitespace(version_id):

334

raise InvalidRevisionId(version_id)

335

if self.has_version(version_id):

336

raise RevisionAlreadyPresent(version_id, self.filename)

337

338

if True or __debug__:

339

for l in lines:

340

assert '\n' not in l[:-1]

341

342

self._check_versions_present(parents)

343

return self._add(version_id, lines[:], parents, self.delta)

344

345

def _add(self, version_id, lines, parents, delta):

346

"""Add a set of lines on top of version specified by parents.

347

348

If delta is true, compress the text as a line-delta against

349

the first parent.

350

"""

351

if delta and not parents:

352

delta = False

353

354

digest = sha_strings(lines)

355

options = []

356

if lines:

357

if lines[-1][-1] != '\n':

358

options.append('no-eol')

359

lines[-1] = lines[-1] + '\n'

360

361

lines = self.factory.make(lines, len(self._index))

362

if self.factory.annotated and len(parents) > 0:

363

# Merge annotations from parent texts if so is needed.

364

self._merge_annotations(lines, parents)

365

366

if parents and delta:

367

# To speed the extract of texts the delta chain is limited

368

# to a fixed number of deltas. This should minimize both

369

# I/O and the time spend applying deltas.

370

count = 0

371

delta_parents = parents

372

while count < 25:

373

parent = delta_parents[0]

374

method = self._index.get_method(parent)

375

if method == 'fulltext':

376

break

377

delta_parents = self._index.get_parents(parent)

378

count = count + 1

379

if method == 'line-delta':

380

delta = False

381

382

if delta:

383

options.append('line-delta')

384

content = self._get_content(parents[0])

385

delta_hunks = content.line_delta(lines)

386

store_lines = self.factory.lower_line_delta(delta_hunks)

387

else:

388

options.append('fulltext')

389

store_lines = self.factory.lower_fulltext(lines)

390

391

where, size = self._data.add_record(version_id, digest, store_lines)

392

self._index.add_version(version_id, options, where, size, parents)

393

394

def clone_text(self, new_version_id, old_version_id, parents):

395

"""See VersionedFile.clone_text()."""

396

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

397

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

398

399

def get_lines(self, version_id):

400

"""See VersionedFile.get_lines()."""

401

return self._get_content(version_id).text()

402

403

def annotate_iter(self, version_id):

404

"""See VersionedFile.annotate_iter."""

405

content = self._get_content(version_id)

406

for origin, text in content.annotate_iter():

407

yield self._index.idx_to_name(origin), text

408

409

def get_parents(self, version_id):

410

"""See VersionedFile.get_parents."""

411

self._check_versions_present([version_id])

412

return list(self._index.get_parents(version_id))

413

414

def get_ancestry(self, versions):

415

"""See VersionedFile.get_ancestry."""

416

if isinstance(versions, basestring):

417

versions = [versions]

418

if not versions:

419

return []

420

self._check_versions_present(versions)

421

return self._index.get_ancestry(versions)

422

423

def _reannotate_line_delta(self, other, lines, new_version_id,

424

new_version_idx):

425

"""Re-annotate line-delta and return new delta."""

426

new_delta = []

427

for start, end, count, contents \

428

in self.factory.parse_line_delta_iter(lines):

429

new_lines = []

430

for origin, line in contents:

431

old_version_id = other._index.idx_to_name(origin)

432

if old_version_id == new_version_id:

433

idx = new_version_idx

434

else:

435

idx = self._index.lookup(old_version_id)

436

new_lines.append((idx, line))

437

new_delta.append((start, end, count, new_lines))

438

439

return self.factory.lower_line_delta(new_delta)

440

441

def _reannotate_fulltext(self, other, lines, new_version_id,

442

new_version_idx):

443

"""Re-annotate fulltext and return new version."""

444

content = self.factory.parse_fulltext(lines, new_version_idx)

445

new_lines = []

446

for origin, line in content.annotate_iter():

447

old_version_id = other._index.idx_to_name(origin)

448

if old_version_id == new_version_id:

449

idx = new_version_idx

450

else:

451

idx = self._index.lookup(old_version_id)

452

new_lines.append((idx, line))

453

454

return self.factory.lower_fulltext(KnitContent(new_lines))

455

456

def join(self, other, pb=None, msg=None, version_ids=None):

457

"""See VersionedFile.join."""

458

assert isinstance(other, KnitVersionedFile)

459

460

if version_ids is None:

461

version_ids = other.versions()

462

if not version_ids:

463

return 0

464

465

if pb is None:

466

from bzrlib.progress import DummyProgress

467

pb = DummyProgress()

468

469

version_ids = list(version_ids)

470

if None in version_ids:

471

version_ids.remove(None)

472

473

other_ancestry = set(other.get_ancestry(version_ids))

474

needed_versions = other_ancestry - set(self._index.get_versions())

475

if not needed_versions:

476

return 0

477

full_list = topo_sort(other._index.get_graph())

478

479

version_list = [i for i in full_list if (not self.has_version(i)

480

and i in needed_versions)]

481

482

records = []

483

for version_id in version_list:

484

data_pos, data_size = other._index.get_position(version_id)

485

records.append((version_id, data_pos, data_size))

486

487

count = 0

488

for version_id, lines, digest \

489

in other._data.read_records_iter(records):

490

options = other._index.get_options(version_id)

491

parents = other._index.get_parents(version_id)

492

493

for parent in parents:

494

assert self.has_version(parent)

495

496

if self.factory.annotated:

497

# FIXME jrydberg: it should be possible to skip

498

# re-annotating components if we know that we are

499

# going to pull all revisions in the same order.

500

new_version_id = version_id

501

new_version_idx = self._index.num_versions()

502

if 'fulltext' in options:

503

lines = self._reannotate_fulltext(other, lines,

504

new_version_id, new_version_idx)

505

elif 'line-delta' in options:

506

lines = self._reannotate_line_delta(other, lines,

507

new_version_id, new_version_idx)

508

509

count = count + 1

510

pb.update(self.filename, count, len(version_list))

511

512

pos, size = self._data.add_record(version_id, digest, lines)

513

self._index.add_version(version_id, options, pos, size, parents)

514

515

pb.clear()

516

return count

517

518

def walk(self, version_ids):

519

"""See VersionedFile.walk."""

520

# We take the short path here, and extract all relevant texts

521

# and put them in a weave and let that do all the work. Far

522

# from optimal, but is much simpler.

523

from bzrlib.weave import Weave

524

525

w = Weave(self.filename)

526

ancestry = self.get_ancestry(version_ids)

527

sorted_graph = topo_sort(self._index.get_graph())

528

version_list = [vid for vid in sorted_graph if vid in ancestry]

529

530

for version_id in version_list:

531

lines = self.get_lines(version_id)

532

w.add_lines(version_id, self.get_parents(version_id), lines)

533

534

for lineno, insert_id, dset, line in w.walk(version_ids):

535

yield lineno, insert_id, dset, line

536

537

538

class _KnitComponentFile(object):

539

"""One of the files used to implement a knit database"""

540

541

def __init__(self, transport, filename, mode):

542

self._transport = transport

543

self._filename = filename

544

self._mode = mode

545

546

def write_header(self):

547

old_len = self._transport.append(self._filename, self.HEADER)

548

if old_len != 0:

549

raise KnitCorrupt(self._filename, 'misaligned after writing header')

550

551

def check_header(self, fp):

552

line = fp.read(len(self.HEADER))

553

if line != self.HEADER:

554

raise KnitHeaderError(badline=line)

555

556

def commit(self):

557

"""Commit is a nop."""

558

559

def __repr__(self):

560

return '%s(%s)' % (self.__class__.__name__, self._filename)

561

562

563

class _KnitIndex(_KnitComponentFile):

564

"""Manages knit index file.

565

566

The index is already kept in memory and read on startup, to enable

567

fast lookups of revision information. The cursor of the index

568

file is always pointing to the end, making it easy to append

569

entries.

570

571

_cache is a cache for fast mapping from version id to a Index

572

object.

573

574

_history is a cache for fast mapping from indexes to version ids.

575

576

The index data format is dictionary compressed when it comes to

577

parent references; a index entry may only have parents that with a

578

lover index number. As a result, the index is topological sorted.

579

"""

580

581

HEADER = "# bzr knit index 7\n"

582

583

def _cache_version(self, version_id, options, pos, size, parents):

584

val = (version_id, options, pos, size, parents)

585

self._cache[version_id] = val

586

self._history.append(version_id)

587

588

def _iter_index(self, fp):

589

lines = fp.read()

590

for l in lines.splitlines(False):

591

yield l.split()

592

593

def __init__(self, transport, filename, mode):

594

_KnitComponentFile.__init__(self, transport, filename, mode)

595

self._cache = {}

596

self._history = []

597

try:

598

fp = self._transport.get(self._filename)

599

self.check_header(fp)

600

for rec in self._iter_index(fp):

601

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

602

[self._history[int(i)] for i in rec[4:]])

603

except NoSuchFile, e:

604

if mode != 'w':

605

raise e

606

self.write_header()

607

608

def get_graph(self):

609

graph = []

610

for version_id, index in self._cache.iteritems():

611

graph.append((version_id, index[4]))

612

return graph

613

614

def get_ancestry(self, versions):

615

"""See VersionedFile.get_ancestry."""

616

version_idxs = []

617

for version_id in versions:

618

version_idxs.append(self._history.index(version_id))

619

i = set(versions)

620

for v in xrange(max(version_idxs), 0, -1):

621

if self._history[v] in i:

622

# include all its parents

623

i.update(self._cache[self._history[v]][4])

624

return list(i)

625

626

def num_versions(self):

627

return len(self._history)

628

629

__len__ = num_versions

630

631

def get_versions(self):

632

return self._history

633

634

def idx_to_name(self, idx):

635

return self._history[idx]

636

637

def lookup(self, version_id):

638

assert version_id in self._cache

639

return self._history.index(version_id)

640

641

def add_version(self, version_id, options, pos, size, parents):

642

"""Add a version record to the index."""

643

self._cache_version(version_id, options, pos, size, parents)

644

645

content = "%s %s %s %s %s\n" % (version_id,

646

','.join(options),

647

pos,

648

size,

649

' '.join([str(self.lookup(vid)) for

650

vid in parents]))

651

self._transport.append(self._filename, content)

652

653

def has_version(self, version_id):

654

"""True if the version is in the index."""

655

return self._cache.has_key(version_id)

656

657

def get_position(self, version_id):

658

"""Return data position and size of specified version."""

659

return (self._cache[version_id][2], \

660

self._cache[version_id][3])

661

662

def get_method(self, version_id):

663

"""Return compression method of specified version."""

664

options = self._cache[version_id][1]

665

if 'fulltext' in options:

666

return 'fulltext'

667

else:

668

assert 'line-delta' in options

669

return 'line-delta'

670

671

def get_options(self, version_id):

672

return self._cache[version_id][1]

673

674

def get_parents(self, version_id):

675

"""Return parents of specified version."""

676

return self._cache[version_id][4]

677

678

def check_versions_present(self, version_ids):

679

"""Check that all specified versions are present."""

680

version_ids = set(version_ids)

681

for version_id in list(version_ids):

682

if version_id in self._cache:

683

version_ids.remove(version_id)

684

if version_ids:

685

raise RevisionNotPresent(list(version_ids)[0], self.filename)

686

687

688

class _KnitData(_KnitComponentFile):

689

"""Contents of the knit data file"""

690

691

HEADER = "# bzr knit data 7\n"

692

693

def __init__(self, transport, filename, mode):

694

_KnitComponentFile.__init__(self, transport, filename, mode)

695

self._file = None

696

self._checked = False

697

698

def _open_file(self):

699

if self._file is None:

700

try:

701

self._file = self._transport.get(self._filename)

702

except NoSuchFile:

703

pass

704

return self._file

705

706

def add_record(self, version_id, digest, lines):

707

"""Write new text record to disk. Returns the position in the

708

file where it was written."""

709

sio = StringIO()

710

data_file = GzipFile(None, mode='wb', fileobj=sio)

711

print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)

712

data_file.writelines(lines)

713

print >>data_file, "end %s\n" % version_id

714

data_file.close()

715

716

content = sio.getvalue()

717

start_pos = self._transport.append(self._filename, content)

718

return start_pos, len(content)

719

720

def _parse_record(self, version_id, data):

721

df = GzipFile(mode='rb', fileobj=StringIO(data))

722

rec = df.readline().split()

723

if len(rec) != 4:

724

raise KnitCorrupt(self._filename, 'unexpected number of records')

725

if rec[1] != version_id:

726

raise KnitCorrupt(self.file.name,

727

'unexpected version, wanted %r' % version_id)

728

lines = int(rec[2])

729

record_contents = self._read_record_contents(df, lines)

730

l = df.readline()

731

if l != 'end %s\n' % version_id:

732

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

733

% (l, version_id))

734

return record_contents, rec[3]

735

736

def _read_record_contents(self, df, record_lines):

737

"""Read and return n lines from datafile."""

738

r = []

739

for i in range(record_lines):

740

r.append(df.readline())

741

return r

742

743

def read_records_iter(self, records):

744

"""Read text records from data file and yield result.

745

746

Each passed record is a tuple of (version_id, pos, len) and

747

will be read in the given order. Yields (version_id,

748

contents, digest).

749

"""

750

751

class ContinuousRange:

752

def __init__(self, rec_id, pos, size):

753

self.start_pos = pos

754

self.end_pos = pos + size

755

self.versions = [(rec_id, pos, size)]

756

757

def add(self, rec_id, pos, size):

758

if self.end_pos != pos:

759

return False

760

self.end_pos = pos + size

761

self.versions.append((rec_id, pos, size))

762

return True

763

764

def split(self, fp):

765

for rec_id, pos, size in self.versions:

766

yield rec_id, fp.read(size)

767

768

fp = self._open_file()

769

770

# Loop through all records and try to collect as large

771

# continuous region as possible to read.

772

while records:

773

record_id, pos, size = records.pop(0)

774

continuous_range = ContinuousRange(record_id, pos, size)

775

while records:

776

record_id, pos, size = records[0]

777

if continuous_range.add(record_id, pos, size):

778

del records[0]

779

else:

780

break

781

fp.seek(continuous_range.start_pos, 0)

782

for record_id, data in continuous_range.split(fp):

783

content, digest = self._parse_record(record_id, data)

784

yield record_id, content, digest

785

786

self._file = None

787

788

def read_records(self, records):

789

"""Read records into a dictionary."""

790

components = {}

791

for record_id, content, digest in self.read_records_iter(records):

792

components[record_id] = (content, digest)

793

return components

794

Older »