/brz/remove-bazaar : revision 1852.16.5

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: John Arbash Meinel
Date: 2007-02-02 04:49:38 UTC
mfrom: (1908.11.7 tree-revision-tree.)
mto: (1852.15.19 test_write_inventory)
mto: This revision was merged to the branch mainline in revision 2322.
Revision ID: john@arbash-meinel.com-20070202044938-6c04umlv9y4j2tz1

[merge] bzr.dev 2255, resolve conflicts, update copyrights

files added:
bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/debug.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/help_topics.py

bzrlib/lazy_regex.py

bzrlib/registry.py

bzrlib/repofmt

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/bazaar-vcs.org.kid

doc/http_smart_server.txt

tools/rst2prettyhtml.py

tools/win32/info.txt

files renamed:
bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/index.txt

doc/specifying_revisions.txt

doc/tutorial.txt

profile_imports.py

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from bzrlib import (

cache_utf8,

errors,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.ui

100

import bzrlib.weave

101

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

102

117

126

118

127

def annotate_iter(self):

119

128

"""Yield tuples of (origin, text) for each content line."""

120

for origin, text in self._lines:

121

yield origin, text

129

return iter(self._lines)

122

130

123

131

def annotate(self):

124

132

"""Return a list of (origin, text) tuples."""

126

134

127

135

def line_delta_iter(self, new_lines):

128

136

"""Generate line-based delta from this content to new_lines."""

129

new_texts = [text for origin, text in new_lines._lines]

130

old_texts = [text for origin, text in self._lines]

137

new_texts = new_lines.text()

138

old_texts = self.text()

131

139

s = KnitSequenceMatcher(None, old_texts, new_texts)

132

for op in s.get_opcodes():

133

if op[0] == 'equal':

140

for tag, i1, i2, j1, j2 in s.get_opcodes():

141

if tag == 'equal':

134

142

continue

135

# ofrom oto length data

136

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

143

# ofrom, oto, length, data

144

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

137

145

138

146

def line_delta(self, new_lines):

139

147

return list(self.line_delta_iter(new_lines))

174

182

return KnitContent(lines)

175

183

176

184

def parse_line_delta_iter(self, lines):

177

for result_item in self.parse_line_delta[lines]:

178

yield result_item

185

return iter(self.parse_line_delta(lines))

179

186

180

187

def parse_line_delta(self, lines, version):

181

188

"""Convert a line based delta into internal representation.

203

210

result.append((start, end, count, contents))

204

211

return result

205

212

213

def get_fulltext_content(self, lines):

214

"""Extract just the content lines from a fulltext."""

215

return (line.split(' ', 1)[1] for line in lines)

216

217

def get_linedelta_content(self, lines):

218

"""Extract just the content from a line delta.

219

220

This doesn't return all of the extra information stored in a delta.

221

Only the actual content lines.

222

"""

223

lines = iter(lines)

224

next = lines.next

225

for header in lines:

226

header = header.split(',')

227

count = int(header[2])

228

for i in xrange(count):

229

origin, text = next().split(' ', 1)

230

yield text

231

206

232

def lower_fulltext(self, content):

207

233

"""convert a fulltext content record into a serializable form.

208

234

239

265

return self.make(content, version)

240

266

241

267

def parse_line_delta_iter(self, lines, version):

242

while lines:

243

header = lines.pop(0)

268

cur = 0

269

num_lines = len(lines)

270

while cur < num_lines:

271

header = lines[cur]

272

cur += 1

244

273

start, end, c = [int(n) for n in header.split(',')]

245

yield start, end, c, zip([version] * c, lines[:c])

246

del lines[:c]

274

yield start, end, c, zip([version] * c, lines[cur:cur+c])

275

cur += c

247

276

248

277

def parse_line_delta(self, lines, version):

249

278

return list(self.parse_line_delta_iter(lines, version))

250

279

280

def get_fulltext_content(self, lines):

281

"""Extract just the content lines from a fulltext."""

282

return iter(lines)

283

284

def get_linedelta_content(self, lines):

285

"""Extract just the content from a line delta.

286

287

This doesn't return all of the extra information stored in a delta.

288

Only the actual content lines.

289

"""

290

lines = iter(lines)

291

next = lines.next

292

for header in lines:

293

header = header.split(',')

294

count = int(header[2])

295

for i in xrange(count):

296

yield next()

297

251

298

def lower_fulltext(self, content):

252

299

return content.text()

253

300

307

354

self.writable = (access_mode == 'w')

308

355

self.delta = delta

309

356

357

self._max_delta_chain = 200

358

310

359

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

311

360

access_mode, create=create, file_mode=file_mode,

312

361

create_parent_dir=create_parent_dir, delay_create=delay_create,

320

369

return '%s(%s)' % (self.__class__.__name__,

321

370

self.transport.abspath(self.filename))

322

371

372

def _check_should_delta(self, first_parents):

373

"""Iterate back through the parent listing, looking for a fulltext.

374

375

This is used when we want to decide whether to add a delta or a new

376

fulltext. It searches for _max_delta_chain parents. When it finds a

377

fulltext parent, it sees if the total size of the deltas leading up to

378

it is large enough to indicate that we want a new full text anyway.

379

380

Return True if we should create a new delta, False if we should use a

381

full text.

382

"""

383

delta_size = 0

384

fulltext_size = None

385

delta_parents = first_parents

386

for count in xrange(self._max_delta_chain):

387

parent = delta_parents[0]

388

method = self._index.get_method(parent)

389

pos, size = self._index.get_position(parent)

390

if method == 'fulltext':

391

fulltext_size = size

392

break

393

delta_size += size

394

delta_parents = self._index.get_parents(parent)

395

else:

396

# We couldn't find a fulltext, so we must create a new one

397

return False

398

399

return fulltext_size > delta_size

400

323

401

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

324

402

"""See VersionedFile._add_delta()."""

325

403

self._check_add(version_id, []) # should we check the lines ?

357

435

# To speed the extract of texts the delta chain is limited

358

436

# to a fixed number of deltas. This should minimize both

359

437

# I/O and the time spend applying deltas.

360

count = 0

361

delta_parents = [delta_parent]

362

while count < 25:

363

parent = delta_parents[0]

364

method = self._index.get_method(parent)

365

if method == 'fulltext':

366

break

367

delta_parents = self._index.get_parents(parent)

368

count = count + 1

369

if method == 'line-delta':

370

# did not find a fulltext in the delta limit.

371

# just do a normal insertion.

438

# The window was changed to a maximum of 200 deltas, but also added

439

# was a check that the total compressed size of the deltas is

440

# smaller than the compressed size of the fulltext.

441

if not self._check_should_delta([delta_parent]):

442

# We don't want a delta here, just do a normal insertion.

372

443

return super(KnitVersionedFile, self)._add_delta(version_id,

373

444

parents,

374

445

delta_parent,

447

518

448

519

def get_delta(self, version_id):

449

520

"""Get a delta for constructing version from some other version."""

521

self.check_not_reserved_id(version_id)

450

522

if not self.has_version(version_id):

451

523

raise RevisionNotPresent(version_id, self.filename)

452

524

522

594

delta_seq = None

523

595

for parent_id in parents:

524

596

merge_content = self._get_content(parent_id, parent_texts)

525

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

597

seq = patiencediff.PatienceSequenceMatcher(

598

None, merge_content.text(), content.text())

526

599

if delta_seq is None:

527

600

# setup a delta seq to reuse.

528

601

delta_seq = seq

539

612

reference_content = self._get_content(parents[0], parent_texts)

540

613

new_texts = content.text()

541

614

old_texts = reference_content.text()

542

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

615

delta_seq = patiencediff.PatienceSequenceMatcher(

616

None, old_texts, new_texts)

543

617

return self._make_line_delta(delta_seq, content)

544

618

545

619

def _make_line_delta(self, delta_seq, new_content):

593

667

594

668

def _check_versions_present(self, version_ids):

595

669

"""Check that all specified versions are present."""

596

version_ids = set(version_ids)

597

for r in list(version_ids):

598

if self._index.has_version(r):

599

version_ids.remove(r)

600

if version_ids:

601

raise RevisionNotPresent(list(version_ids)[0], self.filename)

670

self._index.check_versions_present(version_ids)

602

671

603

672

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

604

673

"""See VersionedFile.add_lines_with_ghosts()."""

617

686

### FIXME escape. RBC 20060228

618

687

if contains_whitespace(version_id):

619

688

raise InvalidRevisionId(version_id, self.filename)

689

self.check_not_reserved_id(version_id)

620

690

if self.has_version(version_id):

621

691

raise RevisionAlreadyPresent(version_id, self.filename)

622

692

self._check_lines_not_unicode(lines)

666

736

# To speed the extract of texts the delta chain is limited

667

737

# to a fixed number of deltas. This should minimize both

668

738

# I/O and the time spend applying deltas.

669

count = 0

670

delta_parents = present_parents

671

while count < 25:

672

parent = delta_parents[0]

673

method = self._index.get_method(parent)

674

if method == 'fulltext':

675

break

676

delta_parents = self._index.get_parents(parent)

677

count = count + 1

678

if method == 'line-delta':

679

delta = False

739

delta = self._check_should_delta(present_parents)

680

740

681

741

lines = self.factory.make(lines, version_id)

682

742

if delta or (self.factory.annotated and len(present_parents) > 0):

739

799

740

800

def get_line_list(self, version_ids):

741

801

"""Return the texts of listed versions as a list of strings."""

802

for version_id in version_ids:

803

self.check_not_reserved_id(version_id)

742

804

text_map, content_map = self._get_content_maps(version_ids)

743

805

return [text_map[v] for v in version_ids]

744

806

777

839

assert content is None

778

840

content = self.factory.parse_fulltext(data, version_idx)

779

841

elif method == 'line-delta':

780

delta = self.factory.parse_line_delta(data[:],

781

version_idx)

842

delta = self.factory.parse_line_delta(data, version_idx)

782

843

content = content.copy()

783

844

content._lines = self._apply_delta(content._lines,

784

845

delta)

810

871

# but we need to setup a list of records to visit.

811

872

# we need version_id, position, length

812

873

version_id_records = []

813

requested_versions = list(version_ids)

874

requested_versions = set(version_ids)

814

875

# filter for available versions

815

876

for version_id in requested_versions:

816

877

if not self.has_version(version_id):

817

878

raise RevisionNotPresent(version_id, self.filename)

818

879

# get a in-component-order queue:

819

version_ids = []

820

880

for version_id in self.versions():

821

881

if version_id in requested_versions:

822

version_ids.append(version_id)

823

882

data_pos, length = self._index.get_position(version_id)

824

883

version_id_records.append((version_id, data_pos, length))

825

884

826

count = 0

827

885

total = len(version_id_records)

828

pb.update('Walking content.', count, total)

829

for version_id, data, sha_value in \

830

self._data.read_records_iter(version_id_records):

831

pb.update('Walking content.', count, total)

886

for version_idx, (version_id, data, sha_value) in \

887

enumerate(self._data.read_records_iter(version_id_records)):

888

pb.update('Walking content.', version_idx, total)

832

889

method = self._index.get_method(version_id)

833

890

version_idx = self._index.lookup(version_id)

891

834

892

assert method in ('fulltext', 'line-delta')

835

893

if method == 'fulltext':

836

content = self.factory.parse_fulltext(data, version_idx)

837

for line in content.text():

838

yield line

894

line_iterator = self.factory.get_fulltext_content(data)

839

895

else:

840

delta = self.factory.parse_line_delta(data, version_idx)

841

for start, end, count, lines in delta:

842

for origin, line in lines:

843

yield line

844

count +=1

896

line_iterator = self.factory.get_linedelta_content(data)

897

for line in line_iterator:

898

yield line

899

845

900

pb.update('Walking content.', total, total)

846

901

847

902

def num_versions(self):

879

934

versions = [versions]

880

935

if not versions:

881

936

return []

882

self._check_versions_present(versions)

883

937

return self._index.get_ancestry(versions)

884

938

885

939

def get_ancestry_with_ghosts(self, versions):

888

942

versions = [versions]

889

943

if not versions:

890

944

return []

891

self._check_versions_present(versions)

892

945

return self._index.get_ancestry_with_ghosts(versions)

893

946

894

947

#@deprecated_method(zero_eight)

965

1018

self._create_parent_dir = create_parent_dir

966

1019

self._need_to_create = False

967

1020

1021

def _full_path(self):

1022

"""Return the full path to this file."""

1023

return self._transport.base + self._filename

1024

968

1025

def check_header(self, fp):

969

1026

line = fp.readline()

1027

if line == '':

1028

# An empty file can actually be treated as though the file doesn't

1029

# exist yet.

1030

raise errors.NoSuchFile(self._full_path())

970

1031

if line != self.HEADER:

971

raise KnitHeaderError(badline=line)

1032

raise KnitHeaderError(badline=line,

1033

filename=self._transport.abspath(self._filename))

972

1034

973

1035

def commit(self):

974

1036

"""Commit is a nop."""

1019

1081

The ' :' marker is the end of record marker.

1020

1082

1021

1083

partial writes:

1022

when a write is interrupted to the index file, it will result in a line that

1023

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1024

the end of the file, then the record that is missing it will be ignored by

1025

the parser.

1084

when a write is interrupted to the index file, it will result in a line

1085

that does not end in ' :'. If the ' :' is not present at the end of a line,

1086

or at the end of the file, then the record that is missing it will be

1087

ignored by the parser.

1026

1088

1027

1089

When writing new records to the index file, the data is preceded by '\n'

1028

1090

to ensure that records always start on new lines even if the last write was

1037

1099

1038

1100

def _cache_version(self, version_id, options, pos, size, parents):

1039

1101

"""Cache a version record in the history array and index cache.

1040

1041

This is inlined into __init__ for performance. KEEP IN SYNC.

1102

1103

This is inlined into _load_data for performance. KEEP IN SYNC.

1042

1104

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1043

1105

indexes).

1044

1106

"""

1049

1111

self._history.append(version_id)

1050

1112

else:

1051

1113

index = self._cache[version_id][5]

1052

self._cache[version_id] = (version_id,

1114

self._cache[version_id] = (version_id,

1053

1115

options,

1054

1116

pos,

1055

1117

size,

1068

1130

# so - wc -l of a knit index is != the number of unique names

1069

1131

# in the knit.

1070

1132

self._history = []

1071

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1133

decode_utf8 = cache_utf8.decode

1134

pb = ui.ui_factory.nested_progress_bar()

1072

1135

try:

1073

count = 0

1074

total = 1

1136

pb.update('read knit index', 0, 1)

1075

1137

try:

1076

pb.update('read knit index', count, total)

1077

1138

fp = self._transport.get(self._filename)

1078

1139

try:

1079

self.check_header(fp)

1080

# readlines reads the whole file at once:

1081

# bad for transports like http, good for local disk

1082

# we save 60 ms doing this one change (

1083

# from calling readline each time to calling

1084

# readlines once.

1085

# probably what we want for nice behaviour on

1086

# http is a incremental readlines that yields, or

1087

# a check for local vs non local indexes,

1088

for l in fp.readlines():

1089

rec = l.split()

1090

if len(rec) < 5 or rec[-1] != ':':

1091

# corrupt line.

1092

# FIXME: in the future we should determine if its a

1093

# short write - and ignore it

1094

# or a different failure, and raise. RBC 20060407

1095

continue

1096

count += 1

1097

total += 1

1098

#pb.update('read knit index', count, total)

1099

# See self._parse_parents

1100

parents = []

1101

for value in rec[4:-1]:

1102

if '.' == value[0]:

1103

# uncompressed reference

1104

parents.append(value[1:])

1105

else:

1106

# this is 15/4000ms faster than isinstance,

1107

# (in lsprof)

1108

# this function is called thousands of times a

1109

# second so small variations add up.

1110

assert value.__class__ is str

1111

parents.append(self._history[int(value)])

1112

# end self._parse_parents

1113

# self._cache_version(rec[0],

1114

# rec[1].split(','),

1115

# int(rec[2]),

1116

# int(rec[3]),

1117

# parents)

1118

# --- self._cache_version

1119

# only want the _history index to reference the 1st

1120

# index entry for version_id

1121

version_id = rec[0]

1122

if version_id not in self._cache:

1123

index = len(self._history)

1124

self._history.append(version_id)

1125

else:

1126

index = self._cache[version_id][5]

1127

self._cache[version_id] = (version_id,

1128

rec[1].split(','),

1129

int(rec[2]),

1130

int(rec[3]),

1131

parents,

1132

index)

1133

# --- self._cache_version

1140

# _load_data may raise NoSuchFile if the target knit is

1141

# completely empty.

1142

self._load_data(fp)

1134

1143

finally:

1135

1144

fp.close()

1136

except NoSuchFile, e:

1145

except NoSuchFile:

1137

1146

if mode != 'w' or not create:

1138

1147

raise

1139

if delay_create:

1148

elif delay_create:

1140

1149

self._need_to_create = True

1141

1150

else:

1142

self._transport.put_bytes_non_atomic(self._filename,

1143

self.HEADER, mode=self._file_mode)

1144

1151

self._transport.put_bytes_non_atomic(

1152

self._filename, self.HEADER, mode=self._file_mode)

1145

1153

finally:

1146

pb.update('read knit index', total, total)

1154

pb.update('read knit index', 1, 1)

1147

1155

pb.finished()

1148

1156

1149

def _parse_parents(self, compressed_parents):

1150

"""convert a list of string parent values into version ids.

1151

1152

ints are looked up in the index.

1153

.FOO values are ghosts and converted in to FOO.

1154

1155

NOTE: the function is retained here for clarity, and for possible

1156

use in partial index reads. However bulk processing now has

1157

it inlined in __init__ for inner-loop optimisation.

1158

"""

1159

result = []

1160

for value in compressed_parents:

1161

if value[-1] == '.':

1162

# uncompressed reference

1163

result.append(value[1:])

1157

def _load_data(self, fp):

1158

cache = self._cache

1159

history = self._history

1160

decode_utf8 = cache_utf8.decode

1161

1162

self.check_header(fp)

1163

# readlines reads the whole file at once:

1164

# bad for transports like http, good for local disk

1165

# we save 60 ms doing this one change (

1166

# from calling readline each time to calling

1167

# readlines once.

1168

# probably what we want for nice behaviour on

1169

# http is a incremental readlines that yields, or

1170

# a check for local vs non local indexes,

1171

history_top = len(history) - 1

1172

for line in fp.readlines():

1173

rec = line.split()

1174

if len(rec) < 5 or rec[-1] != ':':

1175

# corrupt line.

1176

# FIXME: in the future we should determine if its a

1177

# short write - and ignore it

1178

# or a different failure, and raise. RBC 20060407

1179

continue

1180

1181

parents = []

1182

for value in rec[4:-1]:

1183

if value[0] == '.':

1184

# uncompressed reference

1185

parents.append(decode_utf8(value[1:]))

1186

else:

1187

parents.append(history[int(value)])

1188

1189

version_id, options, pos, size = rec[:4]

1190

version_id = decode_utf8(version_id)

1191

1192

# See self._cache_version

1193

# only want the _history index to reference the 1st

1194

# index entry for version_id

1195

if version_id not in cache:

1196

history_top += 1

1197

index = history_top

1198

history.append(version_id)

1164

1199

else:

1165

# this is 15/4000ms faster than isinstance,

1166

# this function is called thousands of times a

1167

# second so small variations add up.

1168

assert value.__class__ is str

1169

result.append(self._history[int(value)])

1170

return result

1200

index = cache[version_id][5]

1201

cache[version_id] = (version_id,

1202

options.split(','),

1203

int(pos),

1204

int(size),

1205

parents,

1206

index)

1207

# end self._cache_version

1171

1208

1172

1209

def get_graph(self):

1173

graph = []

1174

for version_id, index in self._cache.iteritems():

1175

graph.append((version_id, index[4]))

1176

return graph

1210

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1177

1211

1178

1212

def get_ancestry(self, versions):

1179

1213

"""See VersionedFile.get_ancestry."""

1180

1214

# get a graph of all the mentioned versions:

1181

1215

graph = {}

1182

1216

pending = set(versions)

1183

while len(pending):

1217

cache = self._cache

1218

while pending:

1184

1219

version = pending.pop()

1185

parents = self._cache[version][4]

1186

# got the parents ok

1187

1220

# trim ghosts

1188

parents = [parent for parent in parents if parent in self._cache]

1189

for parent in parents:

1190

# if not completed and not a ghost

1191

if parent not in graph:

1192

pending.add(parent)

1221

try:

1222

parents = [p for p in cache[version][4] if p in cache]

1223

except KeyError:

1224

raise RevisionNotPresent(version, self._filename)

1225

# if not completed and not a ghost

1226

pending.update([p for p in parents if p not in graph])

1193

1227

graph[version] = parents

1194

1228

return topo_sort(graph.items())

1195

1229

1196

1230

def get_ancestry_with_ghosts(self, versions):

1197

1231

"""See VersionedFile.get_ancestry_with_ghosts."""

1198

1232

# get a graph of all the mentioned versions:

1233

self.check_versions_present(versions)

1234

cache = self._cache

1199

1235

graph = {}

1200

1236

pending = set(versions)

1201

while len(pending):

1237

while pending:

1202

1238

version = pending.pop()

1203

1239

try:

1204

parents = self._cache[version][4]

1240

parents = cache[version][4]

1205

1241

except KeyError:

1206

1242

# ghost, fake it

1207

1243

graph[version] = []

1208

pass

1209

1244

else:

1210

# got the parents ok

1211

for parent in parents:

1212

if parent not in graph:

1213

pending.add(parent)

1245

# if not completed

1246

pending.update([p for p in parents if p not in graph])

1214

1247

graph[version] = parents

1215

1248

return topo_sort(graph.items())

1216

1249

1232

1265

def _version_list_to_index(self, versions):

1233

1266

encode_utf8 = cache_utf8.encode

1234

1267

result_list = []

1268

cache = self._cache

1235

1269

for version in versions:

1236

if version in self._cache:

1270

if version in cache:

1237

1271

# -- inlined lookup() --

1238

result_list.append(str(self._cache[version][5]))

1272

result_list.append(str(cache[version][5]))

1239

1273

# -- end lookup () --

1240

1274

else:

1241

1275

result_list.append('.' + encode_utf8(version))

1253

1287

"""

1254

1288

lines = []

1255

1289

encode_utf8 = cache_utf8.encode

1256

for version_id, options, pos, size, parents in versions:

1257

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1258

','.join(options),

1259

pos,

1260

size,

1261

self._version_list_to_index(parents))

1262

assert isinstance(line, str), \

1263

'content must be utf-8 encoded: %r' % (line,)

1264

lines.append(line)

1265

if not self._need_to_create:

1266

self._transport.append_bytes(self._filename, ''.join(lines))

1267

else:

1268

sio = StringIO()

1269

sio.write(self.HEADER)

1270

sio.writelines(lines)

1271

sio.seek(0)

1272

self._transport.put_file_non_atomic(self._filename, sio,

1273

create_parent_dir=self._create_parent_dir,

1274

mode=self._file_mode,

1275

dir_mode=self._dir_mode)

1276

self._need_to_create = False

1277

1278

# cache after writing, so that a failed write leads to missing cache

1279

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1280

# failure, reload the index or flush it or some such, to prevent

1281

# writing records that did complete twice.

1282

for version_id, options, pos, size, parents in versions:

1283

self._cache_version(version_id, options, pos, size, parents)

1284

1290

orig_history = self._history[:]

1291

orig_cache = self._cache.copy()

1292

1293

try:

1294

for version_id, options, pos, size, parents in versions:

1295

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1296

','.join(options),

1297

pos,

1298

size,

1299

self._version_list_to_index(parents))

1300

assert isinstance(line, str), \

1301

'content must be utf-8 encoded: %r' % (line,)

1302

lines.append(line)

1303

self._cache_version(version_id, options, pos, size, parents)

1304

if not self._need_to_create:

1305

self._transport.append_bytes(self._filename, ''.join(lines))

1306

else:

1307

sio = StringIO()

1308

sio.write(self.HEADER)

1309

sio.writelines(lines)

1310

sio.seek(0)

1311

self._transport.put_file_non_atomic(self._filename, sio,

1312

create_parent_dir=self._create_parent_dir,

1313

mode=self._file_mode,

1314

dir_mode=self._dir_mode)

1315

self._need_to_create = False

1316

except:

1317

# If any problems happen, restore the original values and re-raise

1318

self._history = orig_history

1319

self._cache = orig_cache

1320

raise

1321

1285

1322

def has_version(self, version_id):

1286

1323

"""True if the version is in the index."""

1287

return (version_id in self._cache)

1324

return version_id in self._cache

1288

1325

1289

1326

def get_position(self, version_id):

1290

1327

"""Return data position and size of specified version."""

1291

return (self._cache[version_id][2], \

1292

self._cache[version_id][3])

1328

entry = self._cache[version_id]

1329

return entry[2], entry[3]

1293

1330

1294

1331

def get_method(self, version_id):

1295

1332

"""Return compression method of specified version."""

1297

1334

if 'fulltext' in options:

1298

1335

return 'fulltext'

1299

1336

else:

1300

assert 'line-delta' in options

1337

if 'line-delta' not in options:

1338

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1301

1339

return 'line-delta'

1302

1340

1303

1341

def get_options(self, version_id):

1314

1352

1315

1353

def check_versions_present(self, version_ids):

1316

1354

"""Check that all specified versions are present."""

1317

version_ids = set(version_ids)

1318

for version_id in list(version_ids):

1319

if version_id in self._cache:

1320

version_ids.remove(version_id)

1321

if version_ids:

1322

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1355

cache = self._cache

1356

for version_id in version_ids:

1357

if version_id not in cache:

1358

raise RevisionNotPresent(version_id, self._filename)

1323

1359

1324

1360

1325

1361

class _KnitData(_KnitComponentFile):

1424

1460

as (stream, header_record)

1425

1461

"""

1426

1462

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1427

rec = df.readline().split()

1463

rec = self._check_header(version_id, df.readline())

1464

return df, rec

1465

1466

def _check_header(self, version_id, line):

1467

rec = line.split()

1428

1468

if len(rec) != 4:

1429

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1469

raise KnitCorrupt(self._filename,

1470

'unexpected number of elements in record header')

1430

1471

if cache_utf8.decode(rec[1]) != version_id:

1431

raise KnitCorrupt(self._filename,

1432

'unexpected version, wanted %r, got %r' % (

1433

version_id, rec[1]))

1434

return df, rec

1472

raise KnitCorrupt(self._filename,

1473

'unexpected version, wanted %r, got %r'

1474

% (version_id, rec[1]))

1475

return rec

1435

1476

1436

1477

def _parse_record(self, version_id, data):

1437

1478

# profiling notes:

1438

1479

# 4168 calls in 2880 217 internal

1439

1480

# 4168 calls to _parse_record_header in 2121

1440

1481

# 4168 calls to readlines in 330

1441

df, rec = self._parse_record_header(version_id, data)

1482

df = GzipFile(mode='rb', fileobj=StringIO(data))

1483

1442

1484

record_contents = df.readlines()

1443

l = record_contents.pop()

1485

header = record_contents.pop(0)

1486

rec = self._check_header(version_id, header)

1487

1488

last_line = record_contents.pop()

1444

1489

assert len(record_contents) == int(rec[2])

1445

if l != 'end %s\n' % cache_utf8.encode(version_id):

1446

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1447

% (l, version_id))

1490

if last_line != 'end %s\n' % rec[1]:

1491

raise KnitCorrupt(self._filename,

1492

'unexpected version end line %r, wanted %r'

1493

% (last_line, version_id))

1448

1494

df.close()

1449

1495

return record_contents, rec[3]

1450

1496

1468

1514

in records]

1469

1515

1470

1516

raw_records = self._transport.readv(self._filename, needed_offsets)

1471

1472

1517

1473

1518

for version_id, pos, size in records:

1474

1519

if version_id in self._cache:

1564

1609

if not version_ids:

1565

1610

return 0

1566

1611

1567

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1612

pb = ui.ui_factory.nested_progress_bar()

1568

1613

try:

1569

1614

version_ids = list(version_ids)

1570

1615

if None in version_ids:

1681

1726

if not version_ids:

1682

1727

return 0

1683

1728

1684

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1729

pb = ui.ui_factory.nested_progress_bar()

1685

1730

try:

1686

1731

version_ids = list(version_ids)

1687

1732

1881

1926

bestsize = bestsize + 1

1882

1927

1883

1928

return besti, bestj, bestsize

1884

Older »