/brz/remove-bazaar : revision 2208.4.4

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Andrew Bennetts
Date: 2007-04-26 06:19:07 UTC
mfrom: (2459 +trunk)
mto: (2432.3.7 hpss-vfs-fallback)
mto: This revision was merged to the branch mainline in revision 2463.
Revision ID: andrew.bennetts@canonical.com-20070426061907-ipn74twdz1gmz52i

Merge bzr.dev.

files added:
bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/dirstate.py

bzrlib/hooks.py

bzrlib/merge_directive.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/strace.py

bzrlib/tag.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_help.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/http/ca_bundle.py

bzrlib/util/bencode.py

bzrlib/workingtree_4.py

bzrlib/xml7.py

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

files removed:
NEWS.developers

build-api

bzrlib/tests/test_doc_generate.py

files renamed:
bzrlib/transport/smart.py => bzrlib/transport/remote.py

bzrlib/win32console.py => bzrlib/win32utils.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/workingtree.py

bzrlib/xml5.py

bzrlib/xml6.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

setup.py *

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

149

157

class _KnitFactory(object):

150

158

"""Base factory for creating content objects."""

151

159

152

def make(self, lines, version):

160

def make(self, lines, version_id):

153

161

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

162

return KnitContent(zip([version_id] * num_lines, lines))

155

163

156

164

157

165

class KnitAnnotateFactory(_KnitFactory):

159

167

160

168

annotated = True

161

169

162

def parse_fulltext(self, content, version):

170

def parse_fulltext(self, content, version_id):

163

171

"""Convert fulltext to internal representation

164

172

165

173

fulltext content is of the format

167

175

internal representation is of the format:

168

176

(revid, plaintext)

169

177

"""

170

decode_utf8 = cache_utf8.decode

171

lines = []

172

for line in content:

173

origin, text = line.split(' ', 1)

174

lines.append((decode_utf8(origin), text))

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

175

183

return KnitContent(lines)

176

184

177

185

def parse_line_delta_iter(self, lines):

178

186

return iter(self.parse_line_delta(lines))

179

187

180

def parse_line_delta(self, lines, version):

188

def parse_line_delta(self, lines, version_id):

181

189

"""Convert a line based delta into internal representation.

182

190

183

191

line delta is in the form of:

187

195

internal representation is

188

196

(start, end, count, [1..count tuples (revid, newline)])

189

197

"""

190

decode_utf8 = cache_utf8.decode

191

198

result = []

192

199

lines = iter(lines)

193

200

next = lines.next

201

202

cache = {}

203

def cache_and_return(line):

204

origin, text = line.split(' ', 1)

205

return cache.setdefault(origin, origin), text

206

194

207

# walk through the lines parsing.

195

208

for header in lines:

196

209

start, end, count = [int(n) for n in header.split(',')]

197

contents = []

198

remaining = count

199

while remaining:

200

origin, text = next().split(' ', 1)

201

remaining -= 1

202

contents.append((decode_utf8(origin), text))

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

203

211

result.append((start, end, count, contents))

204

212

return result

205

213

227

235

228

236

see parse_fulltext which this inverts.

229

237

"""

230

encode_utf8 = cache_utf8.encode

231

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

232

241

233

242

def lower_line_delta(self, delta):

234

243

"""convert a delta into a serializable form.

235

244

236

245

See parse_line_delta which this inverts.

237

246

"""

238

encode_utf8 = cache_utf8.encode

247

# TODO: jam 20070209 We only do the caching thing to make sure that

248

# the origin is a valid utf-8 line, eventually we could remove it

239

249

out = []

240

250

for start, end, c, lines in delta:

241

251

out.append('%d,%d,%d\n' % (start, end, c))

242

out.extend(encode_utf8(origin) + ' ' + text

252

out.extend(origin + ' ' + text

243

253

for origin, text in lines)

244

254

return out

245

255

249

259

250

260

annotated = False

251

261

252

def parse_fulltext(self, content, version):

262

def parse_fulltext(self, content, version_id):

253

263

"""This parses an unannotated fulltext.

254

264

255

265

Note that this is not a noop - the internal representation

256

266

has (versionid, line) - its just a constant versionid.

257

267

"""

258

return self.make(content, version)

268

return self.make(content, version_id)

259

269

260

def parse_line_delta_iter(self, lines, version):

270

def parse_line_delta_iter(self, lines, version_id):

261

271

cur = 0

262

272

num_lines = len(lines)

263

273

while cur < num_lines:

264

274

header = lines[cur]

265

275

cur += 1

266

276

start, end, c = [int(n) for n in header.split(',')]

267

yield start, end, c, zip([version] * c, lines[cur:cur+c])

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

268

278

cur += c

269

279

270

def parse_line_delta(self, lines, version):

271

return list(self.parse_line_delta_iter(lines, version))

280

def parse_line_delta(self, lines, version_id):

281

return list(self.parse_line_delta_iter(lines, version_id))

272

282

273

283

def get_fulltext_content(self, lines):

274

284

"""Extract just the content lines from a fulltext."""

493

503

return KnitVersionedFile(name, transport, factory=self.factory,

494

504

delta=self.delta, create=True)

495

505

496

def _fix_parents(self, version, new_parents):

506

def _fix_parents(self, version_id, new_parents):

497

507

"""Fix the parents list for version.

498

508

499

509

This is done by appending a new version to the index

501

511

the parents list must be a superset of the current

502

512

list.

503

513

"""

504

current_values = self._index._cache[version]

514

current_values = self._index._cache[version_id]

505

515

assert set(current_values[4]).difference(set(new_parents)) == set()

506

self._index.add_version(version,

516

self._index.add_version(version_id,

507

517

current_values[1],

508

518

current_values[2],

509

519

current_values[3],

511

521

512

522

def get_delta(self, version_id):

513

523

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

525

self.check_not_reserved_id(version_id)

514

526

if not self.has_version(version_id):

515

527

raise RevisionNotPresent(version_id, self.filename)

516

528

521

533

parent = None

522

534

data_pos, data_size = self._index.get_position(version_id)

523

535

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

524

version_idx = self._index.lookup(version_id)

525

536

noeol = 'no-eol' in self._index.get_options(version_id)

526

537

if 'fulltext' == self._index.get_method(version_id):

527

new_content = self.factory.parse_fulltext(data, version_idx)

538

new_content = self.factory.parse_fulltext(data, version_id)

528

539

if parent is not None:

529

540

reference_content = self._get_content(parent)

530

541

old_texts = reference_content.text()

534

545

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

535

546

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

536

547

else:

537

delta = self.factory.parse_line_delta(data, version_idx)

548

delta = self.factory.parse_line_delta(data, version_id)

538

549

return parent, sha1, noeol, delta

539

550

540

551

def get_graph_with_ghosts(self):

544

555

545

556

def get_sha1(self, version_id):

546

557

"""See VersionedFile.get_sha1()."""

558

version_id = osutils.safe_revision_id(version_id)

547

559

record_map = self._get_record_map([version_id])

548

560

method, content, digest, next = record_map[version_id]

549

561

return digest

555

567

556

568

def has_ghost(self, version_id):

557

569

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

558

571

# maybe we have it

559

572

if self.has_version(version_id):

560

573

return False

573

586

574

587

def has_version(self, version_id):

575

588

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

576

590

return self._index.has_version(version_id)

577

591

578

592

__contains__ = has_version

659

673

660

674

def _check_versions_present(self, version_ids):

661

675

"""Check that all specified versions are present."""

662

version_ids = set(version_ids)

663

for r in list(version_ids):

664

if self._index.has_version(r):

665

version_ids.remove(r)

666

if version_ids:

667

raise RevisionNotPresent(list(version_ids)[0], self.filename)

676

self._index.check_versions_present(version_ids)

668

677

669

678

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

670

679

"""See VersionedFile.add_lines_with_ghosts()."""

683

692

### FIXME escape. RBC 20060228

684

693

if contains_whitespace(version_id):

685

694

raise InvalidRevisionId(version_id, self.filename)

695

self.check_not_reserved_id(version_id)

686

696

if self.has_version(version_id):

687

697

raise RevisionAlreadyPresent(version_id, self.filename)

688

698

self._check_lines_not_unicode(lines)

734

744

# I/O and the time spend applying deltas.

735

745

delta = self._check_should_delta(present_parents)

736

746

747

assert isinstance(version_id, str)

737

748

lines = self.factory.make(lines, version_id)

738

749

if delta or (self.factory.annotated and len(present_parents) > 0):

739

750

# Merge annotations from parent texts if so is needed.

795

806

796

807

def get_line_list(self, version_ids):

797

808

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

810

for version_id in version_ids:

811

self.check_not_reserved_id(version_id)

798

812

text_map, content_map = self._get_content_maps(version_ids)

799

813

return [text_map[v] for v in version_ids]

800

814

828

842

if component_id in content_map:

829

843

content = content_map[component_id]

830

844

else:

831

version_idx = self._index.lookup(component_id)

832

845

if method == 'fulltext':

833

846

assert content is None

834

content = self.factory.parse_fulltext(data, version_idx)

847

content = self.factory.parse_fulltext(data, version_id)

835

848

elif method == 'line-delta':

836

delta = self.factory.parse_line_delta(data, version_idx)

849

delta = self.factory.parse_line_delta(data, version_id)

837

850

content = content.copy()

838

851

content._lines = self._apply_delta(content._lines,

839

852

delta)

859

872

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

860

873

if version_ids is None:

861

874

version_ids = self.versions()

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

862

877

if pb is None:

863

878

pb = progress.DummyProgress()

864

879

# we don't care about inclusions, the caller cares.

881

896

enumerate(self._data.read_records_iter(version_id_records)):

882

897

pb.update('Walking content.', version_idx, total)

883

898

method = self._index.get_method(version_id)

884

version_idx = self._index.lookup(version_id)

885

899

886

900

assert method in ('fulltext', 'line-delta')

887

901

if method == 'fulltext':

901

915

902

916

def annotate_iter(self, version_id):

903

917

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

904

919

content = self._get_content(version_id)

905

920

for origin, text in content.annotate_iter():

906

921

yield origin, text

910

925

# perf notes:

911

926

# optimism counts!

912

927

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

913

929

try:

914

930

return self._index.get_parents(version_id)

915

931

except KeyError:

917

933

918

934

def get_parents_with_ghosts(self, version_id):

919

935

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

920

937

try:

921

938

return self._index.get_parents_with_ghosts(version_id)

922

939

except KeyError:

928

945

versions = [versions]

929

946

if not versions:

930

947

return []

931

self._check_versions_present(versions)

948

versions = [osutils.safe_revision_id(v) for v in versions]

932

949

return self._index.get_ancestry(versions)

933

950

934

951

def get_ancestry_with_ghosts(self, versions):

937

954

versions = [versions]

938

955

if not versions:

939

956

return []

940

self._check_versions_present(versions)

957

versions = [osutils.safe_revision_id(v) for v in versions]

941

958

return self._index.get_ancestry_with_ghosts(versions)

942

959

943

960

#@deprecated_method(zero_eight)

963

980

964

981

def plan_merge(self, ver_a, ver_b):

965

982

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

966

985

ancestors_b = set(self.get_ancestry(ver_b))

967

986

def status_a(revision, text):

968

987

if revision in ancestors_b:

1014

1033

self._create_parent_dir = create_parent_dir

1015

1034

self._need_to_create = False

1016

1035

1036

def _full_path(self):

1037

"""Return the full path to this file."""

1038

return self._transport.base + self._filename

1039

1017

1040

def check_header(self, fp):

1018

1041

line = fp.readline()

1019

1042

if line == '':

1020

1043

# An empty file can actually be treated as though the file doesn't

1021

1044

# exist yet.

1022

raise errors.NoSuchFile(self._transport.base + self._filename)

1045

raise errors.NoSuchFile(self._full_path())

1023

1046

if line != self.HEADER:

1024

1047

raise KnitHeaderError(badline=line,

1025

1048

filename=self._transport.abspath(self._filename))

1073

1096

The ' :' marker is the end of record marker.

1074

1097

1075

1098

partial writes:

1076

when a write is interrupted to the index file, it will result in a line that

1077

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1078

the end of the file, then the record that is missing it will be ignored by

1079

the parser.

1099

when a write is interrupted to the index file, it will result in a line

1100

that does not end in ' :'. If the ' :' is not present at the end of a line,

1101

or at the end of the file, then the record that is missing it will be

1102

ignored by the parser.

1080

1103

1081

1104

When writing new records to the index file, the data is preceded by '\n'

1082

1105

to ensure that records always start on new lines even if the last write was

1091

1114

1092

1115

def _cache_version(self, version_id, options, pos, size, parents):

1093

1116

"""Cache a version record in the history array and index cache.

1094

1095

This is inlined into __init__ for performance. KEEP IN SYNC.

1117

1118

This is inlined into _load_data for performance. KEEP IN SYNC.

1096

1119

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1097

1120

indexes).

1098

1121

"""

1122

1145

# so - wc -l of a knit index is != the number of unique names

1123

1146

# in the knit.

1124

1147

self._history = []

1125

decode_utf8 = cache_utf8.decode

1126

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1127

1148

try:

1128

count = 0

1129

total = 1

1149

fp = self._transport.get(self._filename)

1130

1150

try:

1131

pb.update('read knit index', count, total)

1132

fp = self._transport.get(self._filename)

1133

try:

1134

self.check_header(fp)

1135

# readlines reads the whole file at once:

1136

# bad for transports like http, good for local disk

1137

# we save 60 ms doing this one change (

1138

# from calling readline each time to calling

1139

# readlines once.

1140

# probably what we want for nice behaviour on

1141

# http is a incremental readlines that yields, or

1142

# a check for local vs non local indexes,

1143

for l in fp.readlines():

1144

rec = l.split()

1145

if len(rec) < 5 or rec[-1] != ':':

1146

# corrupt line.

1147

# FIXME: in the future we should determine if its a

1148

# short write - and ignore it

1149

# or a different failure, and raise. RBC 20060407

1150

continue

1151

count += 1

1152

total += 1

1153

#pb.update('read knit index', count, total)

1154

# See self._parse_parents

1155

parents = []

1156

for value in rec[4:-1]:

1157

if '.' == value[0]:

1158

# uncompressed reference

1159

parents.append(decode_utf8(value[1:]))

1160

else:

1161

# this is 15/4000ms faster than isinstance,

1162

# (in lsprof)

1163

# this function is called thousands of times a

1164

# second so small variations add up.

1165

assert value.__class__ is str

1166

parents.append(self._history[int(value)])

1167

# end self._parse_parents

1168

# self._cache_version(decode_utf8(rec[0]),

1169

# rec[1].split(','),

1170

# int(rec[2]),

1171

# int(rec[3]),

1172

# parents)

1173

# --- self._cache_version

1174

# only want the _history index to reference the 1st

1175

# index entry for version_id

1176

version_id = decode_utf8(rec[0])

1177

if version_id not in self._cache:

1178

index = len(self._history)

1179

self._history.append(version_id)

1180

else:

1181

index = self._cache[version_id][5]

1182

self._cache[version_id] = (version_id,

1183

rec[1].split(','),

1184

int(rec[2]),

1185

int(rec[3]),

1186

parents,

1187

index)

1188

# --- self._cache_version

1189

finally:

1190

fp.close()

1191

except NoSuchFile, e:

1192

if mode != 'w' or not create:

1193

raise

1194

if delay_create:

1195

self._need_to_create = True

1151

# _load_data may raise NoSuchFile if the target knit is

1152

# completely empty.

1153

self._load_data(fp)

1154

finally:

1155

fp.close()

1156

except NoSuchFile:

1157

if mode != 'w' or not create:

1158

raise

1159

elif delay_create:

1160

self._need_to_create = True

1161

else:

1162

self._transport.put_bytes_non_atomic(

1163

self._filename, self.HEADER, mode=self._file_mode)

1164

1165

def _load_data(self, fp):

1166

cache = self._cache

1167

history = self._history

1168

1169

self.check_header(fp)

1170

# readlines reads the whole file at once:

1171

# bad for transports like http, good for local disk

1172

# we save 60 ms doing this one change (

1173

# from calling readline each time to calling

1174

# readlines once.

1175

# probably what we want for nice behaviour on

1176

# http is a incremental readlines that yields, or

1177

# a check for local vs non local indexes,

1178

history_top = len(history) - 1

1179

for line in fp.readlines():

1180

rec = line.split()

1181

if len(rec) < 5 or rec[-1] != ':':

1182

# corrupt line.

1183

# FIXME: in the future we should determine if its a

1184

# short write - and ignore it

1185

# or a different failure, and raise. RBC 20060407

1186

continue

1187

1188

parents = []

1189

for value in rec[4:-1]:

1190

if value[0] == '.':

1191

# uncompressed reference

1192

parent_id = value[1:]

1196

1193

else:

1197

self._transport.put_bytes_non_atomic(self._filename,

1198

self.HEADER, mode=self._file_mode)

1199

1200

finally:

1201

pb.update('read knit index', total, total)

1202

pb.finished()

1203

1204

def _parse_parents(self, compressed_parents):

1205

"""convert a list of string parent values into version ids.

1206

1207

ints are looked up in the index.

1208

.FOO values are ghosts and converted in to FOO.

1209

1210

NOTE: the function is retained here for clarity, and for possible

1211

use in partial index reads. However bulk processing now has

1212

it inlined in __init__ for inner-loop optimisation.

1213

"""

1214

result = []

1215

for value in compressed_parents:

1216

if value[-1] == '.':

1217

# uncompressed reference

1218

result.append(cache_utf8.decode_utf8(value[1:]))

1194

parent_id = history[int(value)]

1195

parents.append(parent_id)

1196

1197

version_id, options, pos, size = rec[:4]

1198

version_id = version_id

1199

1200

# See self._cache_version

1201

# only want the _history index to reference the 1st

1202

# index entry for version_id

1203

if version_id not in cache:

1204

history_top += 1

1205

index = history_top

1206

history.append(version_id)

1219

1207

else:

1220

# this is 15/4000ms faster than isinstance,

1221

# this function is called thousands of times a

1222

# second so small variations add up.

1223

assert value.__class__ is str

1224

result.append(self._history[int(value)])

1225

return result

1208

index = cache[version_id][5]

1209

cache[version_id] = (version_id,

1210

options.split(','),

1211

int(pos),

1212

int(size),

1213

parents,

1214

index)

1215

# end self._cache_version

1226

1216

1227

1217

def get_graph(self):

1228

graph = []

1229

for version_id, index in self._cache.iteritems():

1230

graph.append((version_id, index[4]))

1231

return graph

1218

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1232

1219

1233

1220

def get_ancestry(self, versions):

1234

1221

"""See VersionedFile.get_ancestry."""

1235

1222

# get a graph of all the mentioned versions:

1236

1223

graph = {}

1237

1224

pending = set(versions)

1238

while len(pending):

1225

cache = self._cache

1226

while pending:

1239

1227

version = pending.pop()

1240

parents = self._cache[version][4]

1241

# got the parents ok

1242

1228

# trim ghosts

1243

parents = [parent for parent in parents if parent in self._cache]

1244

for parent in parents:

1245

# if not completed and not a ghost

1246

if parent not in graph:

1247

pending.add(parent)

1229

try:

1230

parents = [p for p in cache[version][4] if p in cache]

1231

except KeyError:

1232

raise RevisionNotPresent(version, self._filename)

1233

# if not completed and not a ghost

1234

pending.update([p for p in parents if p not in graph])

1248

1235

graph[version] = parents

1249

1236

return topo_sort(graph.items())

1250

1237

1251

1238

def get_ancestry_with_ghosts(self, versions):

1252

1239

"""See VersionedFile.get_ancestry_with_ghosts."""

1253

1240

# get a graph of all the mentioned versions:

1241

self.check_versions_present(versions)

1242

cache = self._cache

1254

1243

graph = {}

1255

1244

pending = set(versions)

1256

while len(pending):

1245

while pending:

1257

1246

version = pending.pop()

1258

1247

try:

1259

parents = self._cache[version][4]

1248

parents = cache[version][4]

1260

1249

except KeyError:

1261

1250

# ghost, fake it

1262

1251

graph[version] = []

1263

pass

1264

1252

else:

1265

# got the parents ok

1266

for parent in parents:

1267

if parent not in graph:

1268

pending.add(parent)

1253

# if not completed

1254

pending.update([p for p in parents if p not in graph])

1269

1255

graph[version] = parents

1270

1256

return topo_sort(graph.items())

1271

1257

1285

1271

return self._cache[version_id][5]

1286

1272

1287

1273

def _version_list_to_index(self, versions):

1288

encode_utf8 = cache_utf8.encode

1289

1274

result_list = []

1275

cache = self._cache

1290

1276

for version in versions:

1291

if version in self._cache:

1277

if version in cache:

1292

1278

# -- inlined lookup() --

1293

result_list.append(str(self._cache[version][5]))

1279

result_list.append(str(cache[version][5]))

1294

1280

# -- end lookup () --

1295

1281

else:

1296

result_list.append('.' + encode_utf8(version))

1282

result_list.append('.' + version)

1297

1283

return ' '.join(result_list)

1298

1284

1299

1285

def add_version(self, version_id, options, pos, size, parents):

1307

1293

(version_id, options, pos, size, parents).

1308

1294

"""

1309

1295

lines = []

1310

encode_utf8 = cache_utf8.encode

1311

1296

orig_history = self._history[:]

1312

1297

orig_cache = self._cache.copy()

1313

1298

1314

1299

try:

1315

1300

for version_id, options, pos, size, parents in versions:

1316

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1301

line = "\n%s %s %s %s %s :" % (version_id,

1317

1302

','.join(options),

1318

1303

pos,

1319

1304

size,

1342

1327

1343

1328

def has_version(self, version_id):

1344

1329

"""True if the version is in the index."""

1345

return (version_id in self._cache)

1330

return version_id in self._cache

1346

1331

1347

1332

def get_position(self, version_id):

1348

1333

"""Return data position and size of specified version."""

1349

return (self._cache[version_id][2], \

1350

self._cache[version_id][3])

1334

entry = self._cache[version_id]

1335

return entry[2], entry[3]

1351

1336

1352

1337

def get_method(self, version_id):

1353

1338

"""Return compression method of specified version."""

1355

1340

if 'fulltext' in options:

1356

1341

return 'fulltext'

1357

1342

else:

1358

assert 'line-delta' in options

1343

if 'line-delta' not in options:

1344

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1359

1345

return 'line-delta'

1360

1346

1361

1347

def get_options(self, version_id):

1372

1358

1373

1359

def check_versions_present(self, version_ids):

1374

1360

"""Check that all specified versions are present."""

1375

version_ids = set(version_ids)

1376

for version_id in list(version_ids):

1377

if version_id in self._cache:

1378

version_ids.remove(version_id)

1379

if version_ids:

1380

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1361

cache = self._cache

1362

for version_id in version_ids:

1363

if version_id not in cache:

1364

raise RevisionNotPresent(version_id, self._filename)

1381

1365

1382

1366

1383

1367

class _KnitData(_KnitComponentFile):

1428

1412

sio = StringIO()

1429

1413

data_file = GzipFile(None, mode='wb', fileobj=sio)

1430

1414

1431

version_id_utf8 = cache_utf8.encode(version_id)

1415

assert isinstance(version_id, str)

1432

1416

data_file.writelines(chain(

1433

["version %s %d %s\n" % (version_id_utf8,

1417

["version %s %d %s\n" % (version_id,

1434

1418

len(lines),

1435

1419

digest)],

1436

1420

lines,

1437

["end %s\n" % version_id_utf8]))

1421

["end %s\n" % version_id]))

1438

1422

data_file.close()

1439

1423

length= sio.tell()

1440

1424

1482

1466

as (stream, header_record)

1483

1467

"""

1484

1468

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1485

rec = df.readline().split()

1469

try:

1470

rec = self._check_header(version_id, df.readline())

1471

except Exception, e:

1472

raise KnitCorrupt(self._filename,

1473

"While reading {%s} got %s(%s)"

1474

% (version_id, e.__class__.__name__, str(e)))

1475

return df, rec

1476

1477

def _check_header(self, version_id, line):

1478

rec = line.split()

1486

1479

if len(rec) != 4:

1487

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1488

if cache_utf8.decode(rec[1]) != version_id:

1489

raise KnitCorrupt(self._filename,

1490

'unexpected version, wanted %r, got %r' % (

1491

version_id, rec[1]))

1492

return df, rec

1480

raise KnitCorrupt(self._filename,

1481

'unexpected number of elements in record header')

1482

if rec[1] != version_id:

1483

raise KnitCorrupt(self._filename,

1484

'unexpected version, wanted %r, got %r'

1485

% (version_id, rec[1]))

1486

return rec

1493

1487

1494

1488

def _parse_record(self, version_id, data):

1495

1489

# profiling notes:

1496

1490

# 4168 calls in 2880 217 internal

1497

1491

# 4168 calls to _parse_record_header in 2121

1498

1492

# 4168 calls to readlines in 330

1499

df, rec = self._parse_record_header(version_id, data)

1500

record_contents = df.readlines()

1501

l = record_contents.pop()

1502

assert len(record_contents) == int(rec[2])

1503

if l != 'end %s\n' % cache_utf8.encode(version_id):

1504

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1505

% (l, version_id))

1493

df = GzipFile(mode='rb', fileobj=StringIO(data))

1494

1495

try:

1496

record_contents = df.readlines()

1497

except Exception, e:

1498

raise KnitCorrupt(self._filename,

1499

"While reading {%s} got %s(%s)"

1500

% (version_id, e.__class__.__name__, str(e)))

1501

header = record_contents.pop(0)

1502

rec = self._check_header(version_id, header)

1503

1504

last_line = record_contents.pop()

1505

if len(record_contents) != int(rec[2]):

1506

raise KnitCorrupt(self._filename,

1507

'incorrect number of lines %s != %s'

1508

' for version {%s}'

1509

% (len(record_contents), int(rec[2]),

1510

version_id))

1511

if last_line != 'end %s\n' % rec[1]:

1512

raise KnitCorrupt(self._filename,

1513

'unexpected version end line %r, wanted %r'

1514

% (last_line, version_id))

1506

1515

df.close()

1507

1516

return record_contents, rec[3]

1508

1517

1526

1535

in records]

1527

1536

1528

1537

raw_records = self._transport.readv(self._filename, needed_offsets)

1529

1530

1538

1531

1539

for version_id, pos, size in records:

1532

1540

if version_id in self._cache:

1622

1630

if not version_ids:

1623

1631

return 0

1624

1632

1625

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1633

pb = ui.ui_factory.nested_progress_bar()

1626

1634

try:

1627

1635

version_ids = list(version_ids)

1628

1636

if None in version_ids:

1739

1747

if not version_ids:

1740

1748

return 0

1741

1749

1742

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1750

pb = ui.ui_factory.nested_progress_bar()

1743

1751

try:

1744

1752

version_ids = list(version_ids)

1745

1753

1939

1947

bestsize = bestsize + 1

1940

1948

1941

1949

return besti, bestj, bestsize

1942

Older »