/brz/remove-bazaar : revision 1563.2.12

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2006-03-02 01:26:22 UTC
mto: (1594.2.4 integration)
mto: This revision was merged to the branch mainline in revision 1596.
Revision ID: robertc@robertcollins.net-20060302012622-6d1d0b92fe94d9be

Checkpointing: created InterObject to factor out common inter object worker code, added InterVersionedFile and tests to allow making join work between any versionedfile.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/store/versioned/weave.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

generate_docs.py

notes

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

import difflib

from difflib import SequenceMatcher

from gzip import GzipFile

import os

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile

from bzrlib.tsort import topo_sort

# TODO: Split out code specific to this format into an associated object.

# TODO: Can we put in some kind of value to check that the index and data

# files belong together?

# TODO: accomodate binaries, perhaps by storing a byte count

# TODO: function to check whole file

# TODO: atomically append data, then measure backwards from the cursor

# position after writing to work out where it was located. we may need to

# bypass python file buffering.

DATA_SUFFIX = '.knit'

INDEX_SUFFIX = '.kndx'

100

# convenience factories for testing or use:

101

def AnnotatedKnitFactory(name, transport, mode=None):

102

"""Create a knit with path name in transport transport."""

103

return KnitVersionedFile(transport,

104

name,

105

'w',

106

KnitAnnotateFactory(),

107

delta=True)

108

109

110

class KnitContent(object):

111

"""Content of a knit version to which deltas can be applied."""

112

113

def __init__(self, lines):

114

self._lines = lines

115

116

def annotate_iter(self):

117

"""Yield tuples of (origin, text) for each content line."""

118

for origin, text in self._lines:

119

yield origin, text

120

121

def annotate(self):

122

"""Return a list of (origin, text) tuples."""

123

return list(self.annotate_iter())

124

125

def apply_delta(self, delta):

126

"""Apply delta to this content."""

127

offset = 0

128

for start, end, count, lines in delta:

129

self._lines[offset+start:offset+end] = lines

130

offset = offset + (start - end) + count

131

132

def line_delta_iter(self, new_lines):

133

"""Generate line-based delta from new_lines to this content."""

134

new_texts = [text for origin, text in new_lines._lines]

135

old_texts = [text for origin, text in self._lines]

136

s = difflib.SequenceMatcher(None, old_texts, new_texts)

137

for op in s.get_opcodes():

138

if op[0] == 'equal':

139

continue

140

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

141

142

def line_delta(self, new_lines):

143

return list(self.line_delta_iter(new_lines))

144

145

def text(self):

146

return [text for origin, text in self._lines]

147

148

149

class _KnitFactory(object):

150

"""Base factory for creating content objects."""

151

152

def make(self, lines, version):

153

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

155

156

157

class KnitAnnotateFactory(_KnitFactory):

158

"""Factory for creating annotated Content objects."""

159

160

annotated = True

161

162

def parse_fulltext(self, content, version):

163

lines = []

164

for line in content:

165

origin, text = line.split(' ', 1)

166

lines.append((int(origin), text))

167

return KnitContent(lines)

168

169

def parse_line_delta_iter(self, lines):

170

while lines:

171

header = lines.pop(0)

172

start, end, c = [int(n) for n in header.split(',')]

173

contents = []

174

for i in range(c):

175

origin, text = lines.pop(0).split(' ', 1)

176

contents.append((int(origin), text))

177

yield start, end, c, contents

178

179

def parse_line_delta(self, lines, version):

180

return list(self.parse_line_delta_iter(lines))

181

182

def lower_fulltext(self, content):

183

return ['%d %s' % (o, t) for o, t in content._lines]

184

185

def lower_line_delta(self, delta):

186

out = []

187

for start, end, c, lines in delta:

188

out.append('%d,%d,%d\n' % (start, end, c))

189

for origin, text in lines:

190

out.append('%d %s' % (origin, text))

191

return out

192

193

194

class KnitPlainFactory(_KnitFactory):

195

"""Factory for creating plain Content objects."""

196

197

annotated = False

198

199

def parse_fulltext(self, content, version):

200

return self.make(content, version)

201

202

def parse_line_delta_iter(self, lines, version):

203

while lines:

204

header = lines.pop(0)

205

start, end, c = [int(n) for n in header.split(',')]

206

yield start, end, c, zip([version] * c, lines[:c])

207

del lines[:c]

208

209

def parse_line_delta(self, lines, version):

210

return list(self.parse_line_delta_iter(lines, version))

211

212

def lower_fulltext(self, content):

213

return content.text()

214

215

def lower_line_delta(self, delta):

216

out = []

217

for start, end, c, lines in delta:

218

out.append('%d,%d,%d\n' % (start, end, c))

219

out.extend([text for origin, text in lines])

220

return out

221

222

223

def make_empty_knit(transport, relpath):

224

"""Construct a empty knit at the specified location."""

225

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

226

k._data._open_file()

227

228

229

class KnitVersionedFile(VersionedFile):

230

"""Weave-like structure with faster random access.

231

232

A knit stores a number of texts and a summary of the relationships

233

between them. Texts are identified by a string version-id. Texts

234

are normally stored and retrieved as a series of lines, but can

235

also be passed as single strings.

236

237

Lines are stored with the trailing newline (if any) included, to

238

avoid special cases for files with no final newline. Lines are

239

composed of 8-bit characters, not unicode. The combination of

240

these approaches should mean any 'binary' file can be safely

241

stored and retrieved.

242

"""

243

244

def __init__(self, transport, relpath, mode, factory,

245

basis_knit=None, delta=True):

246

"""Construct a knit at location specified by relpath."""

247

assert mode in ('r', 'w'), "invalid mode specified"

248

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

249

type(basis_knit)

250

251

self.transport = transport

252

self.filename = relpath

253

self.basis_knit = basis_knit

254

self.factory = factory

255

self.writable = (mode == 'w')

256

self.delta = delta

257

258

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

259

mode)

260

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

261

mode)

262

263

def versions(self):

264

"""See VersionedFile.versions."""

265

return self._index.get_versions()

266

267

def has_version(self, version_id):

268

"""See VersionedFile.has_version."""

269

return self._index.has_version(version_id)

270

271

__contains__ = has_version

272

273

def _merge_annotations(self, content, parents):

274

"""Merge annotations for content. This is done by comparing

275

the annotations based on changed to the text."""

276

for parent_id in parents:

277

merge_content = self._get_content(parent_id)

278

seq = SequenceMatcher(None, merge_content.text(), content.text())

279

for i, j, n in seq.get_matching_blocks():

280

if n == 0:

281

continue

282

content._lines[j:j+n] = merge_content._lines[i:i+n]

283

284

def _get_components(self, version_id):

285

"""Return a list of (version_id, method, data) tuples that

286

makes up version specified by version_id of the knit.

287

288

The components should be applied in the order of the returned

289

list.

290

291

The basis knit will be used to the largest extent possible

292

since it is assumed that accesses to it is faster.

293

"""

294

# needed_revisions holds a list of (method, version_id) of

295

# versions that is needed to be fetched to construct the final

296

# version of the file.

297

298

# basis_revisions is a list of versions that needs to be

299

# fetched but exists in the basis knit.

300

301

basis = self.basis_knit

302

needed_versions = []

303

basis_versions = []

304

cursor = version_id

305

306

while 1:

307

picked_knit = self

308

if basis and basis._index.has_version(cursor):

309

picked_knit = basis

310

basis_versions.append(cursor)

311

method = picked_knit._index.get_method(cursor)

312

needed_versions.append((method, cursor))

313

if method == 'fulltext':

314

break

315

cursor = picked_knit.get_parents(cursor)[0]

316

317

components = {}

318

if basis_versions:

319

records = []

320

for comp_id in basis_versions:

321

data_pos, data_size = basis._index.get_data_position(comp_id)

322

records.append((piece_id, data_pos, data_size))

323

components.update(basis._data.read_records(records))

324

325

records = []

326

for comp_id in [vid for method, vid in needed_versions

327

if vid not in basis_versions]:

328

data_pos, data_size = self._index.get_position(comp_id)

329

records.append((comp_id, data_pos, data_size))

330

components.update(self._data.read_records(records))

331

332

# get_data_records returns a mapping with the version id as

333

# index and the value as data. The order the components need

334

# to be applied is held by needed_versions (reversed).

335

out = []

336

for method, comp_id in reversed(needed_versions):

337

out.append((comp_id, method, components[comp_id]))

338

339

return out

340

341

def _get_content(self, version_id):

342

"""Returns a content object that makes up the specified

343

version."""

344

if not self.has_version(version_id):

345

raise RevisionNotPresent(version_id, self.filename)

346

347

if self.basis_knit and version_id in self.basis_knit:

348

return self.basis_knit._get_content(version_id)

349

350

content = None

351

components = self._get_components(version_id)

352

for component_id, method, (data, digest) in components:

353

version_idx = self._index.lookup(component_id)

354

if method == 'fulltext':

355

assert content is None

356

content = self.factory.parse_fulltext(data, version_idx)

357

elif method == 'line-delta':

358

delta = self.factory.parse_line_delta(data, version_idx)

359

content.apply_delta(delta)

360

361

if 'no-eol' in self._index.get_options(version_id):

362

line = content._lines[-1][1].rstrip('\n')

363

content._lines[-1] = (content._lines[-1][0], line)

364

365

if sha_strings(content.text()) != digest:

366

raise KnitCorrupt(self.filename, 'sha-1 does not match')

367

368

return content

369

370

def _check_versions_present(self, version_ids):

371

"""Check that all specified versions are present."""

372

version_ids = set(version_ids)

373

for r in list(version_ids):

374

if self._index.has_version(r):

375

version_ids.remove(r)

376

if version_ids:

377

raise RevisionNotPresent(list(version_ids)[0], self.filename)

378

379

def add_lines(self, version_id, parents, lines):

380

"""See VersionedFile.add_lines."""

381

assert self.writable, "knit is not opened for write"

382

### FIXME escape. RBC 20060228

383

if contains_whitespace(version_id):

384

raise InvalidRevisionId(version_id)

385

if self.has_version(version_id):

386

raise RevisionAlreadyPresent(version_id, self.filename)

387

388

if True or __debug__:

389

for l in lines:

390

assert '\n' not in l[:-1]

391

392

self._check_versions_present(parents)

393

return self._add(version_id, lines[:], parents, self.delta)

394

395

def _add(self, version_id, lines, parents, delta):

396

"""Add a set of lines on top of version specified by parents.

397

398

If delta is true, compress the text as a line-delta against

399

the first parent.

400

"""

401

if delta and not parents:

402

delta = False

403

404

digest = sha_strings(lines)

405

options = []

406

if lines:

407

if lines[-1][-1] != '\n':

408

options.append('no-eol')

409

lines[-1] = lines[-1] + '\n'

410

411

lines = self.factory.make(lines, len(self._index))

412

if self.factory.annotated and len(parents) > 0:

413

# Merge annotations from parent texts if so is needed.

414

self._merge_annotations(lines, parents)

415

416

if parents and delta:

417

# To speed the extract of texts the delta chain is limited

418

# to a fixed number of deltas. This should minimize both

419

# I/O and the time spend applying deltas.

420

count = 0

421

delta_parents = parents

422

while count < 25:

423

parent = delta_parents[0]

424

method = self._index.get_method(parent)

425

if method == 'fulltext':

426

break

427

delta_parents = self._index.get_parents(parent)

428

count = count + 1

429

if method == 'line-delta':

430

delta = False

431

432

if delta:

433

options.append('line-delta')

434

content = self._get_content(parents[0])

435

delta_hunks = content.line_delta(lines)

436

store_lines = self.factory.lower_line_delta(delta_hunks)

437

else:

438

options.append('fulltext')

439

store_lines = self.factory.lower_fulltext(lines)

440

441

where, size = self._data.add_record(version_id, digest, store_lines)

442

self._index.add_version(version_id, options, where, size, parents)

443

444

def clone_text(self, new_version_id, old_version_id, parents):

445

"""See VersionedFile.clone_text()."""

446

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

447

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

448

449

def get_lines(self, version_id):

450

"""See VersionedFile.get_lines()."""

451

return self._get_content(version_id).text()

452

453

def annotate_iter(self, version_id):

454

"""See VersionedFile.annotate_iter."""

455

content = self._get_content(version_id)

456

for origin, text in content.annotate_iter():

457

yield self._index.idx_to_name(origin), text

458

459

def get_parents(self, version_id):

460

"""See VersionedFile.get_parents."""

461

self._check_versions_present([version_id])

462

return list(self._index.get_parents(version_id))

463

464

def get_ancestry(self, versions):

465

"""See VersionedFile.get_ancestry."""

466

if isinstance(versions, basestring):

467

versions = [versions]

468

if not versions:

469

return []

470

self._check_versions_present(versions)

471

return self._index.get_ancestry(versions)

472

473

def _reannotate_line_delta(self, other, lines, new_version_id,

474

new_version_idx):

475

"""Re-annotate line-delta and return new delta."""

476

new_delta = []

477

for start, end, count, contents \

478

in self.factory.parse_line_delta_iter(lines):

479

new_lines = []

480

for origin, line in contents:

481

old_version_id = other._index.idx_to_name(origin)

482

if old_version_id == new_version_id:

483

idx = new_version_idx

484

else:

485

idx = self._index.lookup(old_version_id)

486

new_lines.append((idx, line))

487

new_delta.append((start, end, count, new_lines))

488

489

return self.factory.lower_line_delta(new_delta)

490

491

def _reannotate_fulltext(self, other, lines, new_version_id,

492

new_version_idx):

493

"""Re-annotate fulltext and return new version."""

494

content = self.factory.parse_fulltext(lines, new_version_idx)

495

new_lines = []

496

for origin, line in content.annotate_iter():

497

old_version_id = other._index.idx_to_name(origin)

498

if old_version_id == new_version_id:

499

idx = new_version_idx

500

else:

501

idx = self._index.lookup(old_version_id)

502

new_lines.append((idx, line))

503

504

return self.factory.lower_fulltext(KnitContent(new_lines))

505

506

def join(self, other, pb=None, msg=None, version_ids=None):

507

"""See VersionedFile.join."""

508

assert isinstance(other, KnitVersionedFile)

509

510

if version_ids is None:

511

version_ids = other.versions()

512

if not version_ids:

513

return 0

514

515

if pb is None:

516

from bzrlib.progress import DummyProgress

517

pb = DummyProgress()

518

519

version_ids = list(version_ids)

520

if None in version_ids:

521

version_ids.remove(None)

522

523

other_ancestry = set(other.get_ancestry(version_ids))

524

this_versions = set(self._index.get_versions())

525

needed_versions = other_ancestry - this_versions

526

cross_check_versions = other_ancestry.intersection(this_versions)

527

mismatched_versions = set()

528

for version in cross_check_versions:

529

# scan to include needed parents.

530

n1 = set(self.get_parents(version))

531

n2 = set(other.get_parents(version))

532

if n1 != n2:

533

# FIXME TEST this check for cycles being introduced works

534

# the logic is we have a cycle if in our graph we are an

535

# ancestor of any of the n2 revisions.

536

for parent in n2:

537

if parent in n1:

538

# safe

539

continue

540

else:

541

parent_ancestors = other.get_ancestry(parent)

542

if version in parent_ancestors:

543

raise errors.GraphCycleError([parent, version])

544

# ensure this parent will be available later.

545

new_parents = n2.difference(n1)

546

needed_versions.update(new_parents.difference(this_versions))

547

mismatched_versions.add(version)

548

549

if not needed_versions and not cross_check_versions:

550

return 0

551

full_list = topo_sort(other._index.get_graph())

552

553

version_list = [i for i in full_list if (not self.has_version(i)

554

and i in needed_versions)]

555

556

records = []

557

for version_id in version_list:

558

data_pos, data_size = other._index.get_position(version_id)

559

records.append((version_id, data_pos, data_size))

560

561

count = 0

562

for version_id, lines, digest \

563

in other._data.read_records_iter(records):

564

options = other._index.get_options(version_id)

565

parents = other._index.get_parents(version_id)

566

567

for parent in parents:

568

assert self.has_version(parent)

569

570

if self.factory.annotated:

571

# FIXME jrydberg: it should be possible to skip

572

# re-annotating components if we know that we are

573

# going to pull all revisions in the same order.

574

new_version_id = version_id

575

new_version_idx = self._index.num_versions()

576

if 'fulltext' in options:

577

lines = self._reannotate_fulltext(other, lines,

578

new_version_id, new_version_idx)

579

elif 'line-delta' in options:

580

lines = self._reannotate_line_delta(other, lines,

581

new_version_id, new_version_idx)

582

583

count = count + 1

584

pb.update(self.filename, count, len(version_list))

585

586

pos, size = self._data.add_record(version_id, digest, lines)

587

self._index.add_version(version_id, options, pos, size, parents)

588

589

for version in mismatched_versions:

590

n1 = set(self.get_parents(version))

591

n2 = set(other.get_parents(version))

592

# write a combined record to our history.

593

new_parents = self.get_parents(version) + list(n2.difference(n1))

594

current_values = self._index._cache[version]

595

self._index.add_version(version,

596

current_values[1],

597

current_values[2],

598

current_values[3],

599

new_parents)

600

pb.clear()

601

return count

602

603

def walk(self, version_ids):

604

"""See VersionedFile.walk."""

605

# We take the short path here, and extract all relevant texts

606

# and put them in a weave and let that do all the work. Far

607

# from optimal, but is much simpler.

608

# FIXME RB 20060228 this really is inefficient!

609

from bzrlib.weave import Weave

610

611

w = Weave(self.filename)

612

ancestry = self.get_ancestry(version_ids)

613

sorted_graph = topo_sort(self._index.get_graph())

614

version_list = [vid for vid in sorted_graph if vid in ancestry]

615

616

for version_id in version_list:

617

lines = self.get_lines(version_id)

618

w.add_lines(version_id, self.get_parents(version_id), lines)

619

620

for lineno, insert_id, dset, line in w.walk(version_ids):

621

yield lineno, insert_id, dset, line

622

623

624

class _KnitComponentFile(object):

625

"""One of the files used to implement a knit database"""

626

627

def __init__(self, transport, filename, mode):

628

self._transport = transport

629

self._filename = filename

630

self._mode = mode

631

632

def write_header(self):

633

old_len = self._transport.append(self._filename, self.HEADER)

634

if old_len != 0:

635

raise KnitCorrupt(self._filename, 'misaligned after writing header')

636

637

def check_header(self, fp):

638

line = fp.read(len(self.HEADER))

639

if line != self.HEADER:

640

raise KnitHeaderError(badline=line)

641

642

def commit(self):

643

"""Commit is a nop."""

644

645

def __repr__(self):

646

return '%s(%s)' % (self.__class__.__name__, self._filename)

647

648

649

class _KnitIndex(_KnitComponentFile):

650

"""Manages knit index file.

651

652

The index is already kept in memory and read on startup, to enable

653

fast lookups of revision information. The cursor of the index

654

file is always pointing to the end, making it easy to append

655

entries.

656

657

_cache is a cache for fast mapping from version id to a Index

658

object.

659

660

_history is a cache for fast mapping from indexes to version ids.

661

662

The index data format is dictionary compressed when it comes to

663

parent references; a index entry may only have parents that with a

664

lover index number. As a result, the index is topological sorted.

665

666

Duplicate entries may be written to the index for a single version id

667

if this is done then the latter one completely replaces the former:

668

this allows updates to correct version and parent information.

669

Note that the two entries may share the delta, and that successive

670

annotations and references MUST point to the first entry.

671

"""

672

673

HEADER = "# bzr knit index 7\n"

674

675

def _cache_version(self, version_id, options, pos, size, parents):

676

val = (version_id, options, pos, size, parents)

677

self._cache[version_id] = val

678

if not version_id in self._history:

679

self._history.append(version_id)

680

681

def _iter_index(self, fp):

682

lines = fp.read()

683

for l in lines.splitlines(False):

684

yield l.split()

685

686

def __init__(self, transport, filename, mode):

687

_KnitComponentFile.__init__(self, transport, filename, mode)

688

self._cache = {}

689

# position in _history is the 'official' index for a revision

690

# but the values may have come from a newer entry.

691

# so - wc -l of a knit index is != the number of uniqe names

692

# in the weave.

693

self._history = []

694

try:

695

fp = self._transport.get(self._filename)

696

self.check_header(fp)

697

for rec in self._iter_index(fp):

698

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

699

[self._history[int(i)] for i in rec[4:]])

700

except NoSuchFile, e:

701

if mode != 'w':

702

raise e

703

self.write_header()

704

705

def get_graph(self):

706

graph = []

707

for version_id, index in self._cache.iteritems():

708

graph.append((version_id, index[4]))

709

return graph

710

711

def get_ancestry(self, versions):

712

"""See VersionedFile.get_ancestry."""

713

version_idxs = []

714

for version_id in versions:

715

version_idxs.append(self._history.index(version_id))

716

i = set(versions)

717

for v in xrange(max(version_idxs), 0, -1):

718

if self._history[v] in i:

719

# include all its parents

720

i.update(self._cache[self._history[v]][4])

721

return list(i)

722

723

def num_versions(self):

724

return len(self._history)

725

726

__len__ = num_versions

727

728

def get_versions(self):

729

return self._history

730

731

def idx_to_name(self, idx):

732

return self._history[idx]

733

734

def lookup(self, version_id):

735

assert version_id in self._cache

736

return self._history.index(version_id)

737

738

def add_version(self, version_id, options, pos, size, parents):

739

"""Add a version record to the index."""

740

self._cache_version(version_id, options, pos, size, parents)

741

742

content = "%s %s %s %s %s\n" % (version_id,

743

','.join(options),

744

pos,

745

size,

746

' '.join([str(self.lookup(vid)) for

747

vid in parents]))

748

self._transport.append(self._filename, content)

749

750

def has_version(self, version_id):

751

"""True if the version is in the index."""

752

return self._cache.has_key(version_id)

753

754

def get_position(self, version_id):

755

"""Return data position and size of specified version."""

756

return (self._cache[version_id][2], \

757

self._cache[version_id][3])

758

759

def get_method(self, version_id):

760

"""Return compression method of specified version."""

761

options = self._cache[version_id][1]

762

if 'fulltext' in options:

763

return 'fulltext'

764

else:

765

assert 'line-delta' in options

766

return 'line-delta'

767

768

def get_options(self, version_id):

769

return self._cache[version_id][1]

770

771

def get_parents(self, version_id):

772

"""Return parents of specified version."""

773

return self._cache[version_id][4]

774

775

def check_versions_present(self, version_ids):

776

"""Check that all specified versions are present."""

777

version_ids = set(version_ids)

778

for version_id in list(version_ids):

779

if version_id in self._cache:

780

version_ids.remove(version_id)

781

if version_ids:

782

raise RevisionNotPresent(list(version_ids)[0], self.filename)

783

784

785

class _KnitData(_KnitComponentFile):

786

"""Contents of the knit data file"""

787

788

HEADER = "# bzr knit data 7\n"

789

790

def __init__(self, transport, filename, mode):

791

_KnitComponentFile.__init__(self, transport, filename, mode)

792

self._file = None

793

self._checked = False

794

795

def _open_file(self):

796

if self._file is None:

797

try:

798

self._file = self._transport.get(self._filename)

799

except NoSuchFile:

800

pass

801

return self._file

802

803

def add_record(self, version_id, digest, lines):

804

"""Write new text record to disk. Returns the position in the

805

file where it was written."""

806

sio = StringIO()

807

data_file = GzipFile(None, mode='wb', fileobj=sio)

808

print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)

809

data_file.writelines(lines)

810

print >>data_file, "end %s\n" % version_id

811

data_file.close()

812

813

content = sio.getvalue()

814

start_pos = self._transport.append(self._filename, content)

815

return start_pos, len(content)

816

817

def _parse_record(self, version_id, data):

818

df = GzipFile(mode='rb', fileobj=StringIO(data))

819

rec = df.readline().split()

820

if len(rec) != 4:

821

raise KnitCorrupt(self._filename, 'unexpected number of records')

822

if rec[1] != version_id:

823

raise KnitCorrupt(self.file.name,

824

'unexpected version, wanted %r' % version_id)

825

lines = int(rec[2])

826

record_contents = self._read_record_contents(df, lines)

827

l = df.readline()

828

if l != 'end %s\n' % version_id:

829

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

830

% (l, version_id))

831

return record_contents, rec[3]

832

833

def _read_record_contents(self, df, record_lines):

834

"""Read and return n lines from datafile."""

835

r = []

836

for i in range(record_lines):

837

r.append(df.readline())

838

return r

839

840

def read_records_iter(self, records):

841

"""Read text records from data file and yield result.

842

843

Each passed record is a tuple of (version_id, pos, len) and

844

will be read in the given order. Yields (version_id,

845

contents, digest).

846

"""

847

848

class ContinuousRange:

849

def __init__(self, rec_id, pos, size):

850

self.start_pos = pos

851

self.end_pos = pos + size

852

self.versions = [(rec_id, pos, size)]

853

854

def add(self, rec_id, pos, size):

855

if self.end_pos != pos:

856

return False

857

self.end_pos = pos + size

858

self.versions.append((rec_id, pos, size))

859

return True

860

861

def split(self, fp):

862

for rec_id, pos, size in self.versions:

863

yield rec_id, fp.read(size)

864

865

fp = self._open_file()

866

867

# Loop through all records and try to collect as large

868

# continuous region as possible to read.

869

while records:

870

record_id, pos, size = records.pop(0)

871

continuous_range = ContinuousRange(record_id, pos, size)

872

while records:

873

record_id, pos, size = records[0]

874

if continuous_range.add(record_id, pos, size):

875

del records[0]

876

else:

877

break

878

fp.seek(continuous_range.start_pos, 0)

879

for record_id, data in continuous_range.split(fp):

880

content, digest = self._parse_record(record_id, data)

881

yield record_id, content, digest

882

883

self._file = None

884

885

def read_records(self, records):

886

"""Read records into a dictionary."""

887

components = {}

888

for record_id, content, digest in self.read_records_iter(records):

889

components[record_id] = (content, digest)

890

return components

891

Older »