/brz/remove-bazaar : revision 2000.3.7

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: v.ladeuil+lp at free
Date: 2006-11-30 14:11:41 UTC
mfrom: (2156 +trunk)
mto: (2157.1.1 bzr.dev) (2172.3.1 bzr.73948)
mto: This revision was merged to the branch mainline in revision 2157.
Revision ID: v.ladeuil+lp@free.fr-20061130141141-dw2q0etk307ypkdv

Merge bzr.dev

files added:
bzrlib/debug.py

bzrlib/generate_ids.py

bzrlib/help_topics.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_wsgi.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

doc/bazaar-vcs.org.kid

doc/http_smart_server.txt

tools/rst2prettyhtml.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/annotate.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/ignores.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patiencediff.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_http.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/memory.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/ui/__init__.py

bzrlib/workingtree.py

doc/centralized_workflow.txt

doc/configuration.txt

doc/index.txt

doc/specifying_revisions.txt

doc/tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

from bzrlib import (

cache_utf8,

errors,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

117

118

119

def annotate_iter(self):

119

120

"""Yield tuples of (origin, text) for each content line."""

120

for origin, text in self._lines:

121

yield origin, text

121

return iter(self._lines)

122

123

def annotate(self):

124

"""Return a list of (origin, text) tuples."""

126

127

def line_delta_iter(self, new_lines):

128

"""Generate line-based delta from this content to new_lines."""

129

new_texts = [text for origin, text in new_lines._lines]

130

old_texts = [text for origin, text in self._lines]

129

new_texts = new_lines.text()

130

old_texts = self.text()

131

s = KnitSequenceMatcher(None, old_texts, new_texts)

132

for op in s.get_opcodes():

133

if op[0] == 'equal':

132

for tag, i1, i2, j1, j2 in s.get_opcodes():

133

if tag == 'equal':

134

continue

135

# ofrom oto length data

136

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

135

# ofrom, oto, length, data

136

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

137

138

def line_delta(self, new_lines):

139

return list(self.line_delta_iter(new_lines))

307

self.writable = (access_mode == 'w')

308

self.delta = delta

309

310

self._max_delta_chain = 200

311

310

312

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

311

313

access_mode, create=create, file_mode=file_mode,

312

314

create_parent_dir=create_parent_dir, delay_create=delay_create,

320

322

return '%s(%s)' % (self.__class__.__name__,

321

323

self.transport.abspath(self.filename))

322

324

325

def _check_should_delta(self, first_parents):

326

"""Iterate back through the parent listing, looking for a fulltext.

327

328

This is used when we want to decide whether to add a delta or a new

329

fulltext. It searches for _max_delta_chain parents. When it finds a

330

fulltext parent, it sees if the total size of the deltas leading up to

331

it is large enough to indicate that we want a new full text anyway.

332

333

Return True if we should create a new delta, False if we should use a

334

full text.

335

"""

336

delta_size = 0

337

fulltext_size = None

338

delta_parents = first_parents

339

for count in xrange(self._max_delta_chain):

340

parent = delta_parents[0]

341

method = self._index.get_method(parent)

342

pos, size = self._index.get_position(parent)

343

if method == 'fulltext':

344

fulltext_size = size

345

break

346

delta_size += size

347

delta_parents = self._index.get_parents(parent)

348

else:

349

# We couldn't find a fulltext, so we must create a new one

350

return False

351

352

return fulltext_size > delta_size

353

323

354

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

324

355

"""See VersionedFile._add_delta()."""

325

356

self._check_add(version_id, []) # should we check the lines ?

357

388

# To speed the extract of texts the delta chain is limited

358

389

# to a fixed number of deltas. This should minimize both

359

390

# I/O and the time spend applying deltas.

360

count = 0

361

delta_parents = [delta_parent]

362

while count < 25:

363

parent = delta_parents[0]

364

method = self._index.get_method(parent)

365

if method == 'fulltext':

366

break

367

delta_parents = self._index.get_parents(parent)

368

count = count + 1

369

if method == 'line-delta':

370

# did not find a fulltext in the delta limit.

371

# just do a normal insertion.

391

# The window was changed to a maximum of 200 deltas, but also added

392

# was a check that the total compressed size of the deltas is

393

# smaller than the compressed size of the fulltext.

394

if not self._check_should_delta([delta_parent]):

395

# We don't want a delta here, just do a normal insertion.

372

396

return super(KnitVersionedFile, self)._add_delta(version_id,

373

397

parents,

374

398

delta_parent,

522

546

delta_seq = None

523

547

for parent_id in parents:

524

548

merge_content = self._get_content(parent_id, parent_texts)

525

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

549

seq = patiencediff.PatienceSequenceMatcher(

550

None, merge_content.text(), content.text())

526

551

if delta_seq is None:

527

552

# setup a delta seq to reuse.

528

553

delta_seq = seq

539

564

reference_content = self._get_content(parents[0], parent_texts)

540

565

new_texts = content.text()

541

566

old_texts = reference_content.text()

542

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

567

delta_seq = patiencediff.PatienceSequenceMatcher(

568

None, old_texts, new_texts)

543

569

return self._make_line_delta(delta_seq, content)

544

570

545

571

def _make_line_delta(self, delta_seq, new_content):

666

692

# To speed the extract of texts the delta chain is limited

667

693

# to a fixed number of deltas. This should minimize both

668

694

# I/O and the time spend applying deltas.

669

count = 0

670

delta_parents = present_parents

671

while count < 25:

672

parent = delta_parents[0]

673

method = self._index.get_method(parent)

674

if method == 'fulltext':

675

break

676

delta_parents = self._index.get_parents(parent)

677

count = count + 1

678

if method == 'line-delta':

679

delta = False

695

delta = self._check_should_delta(present_parents)

680

696

681

697

lines = self.factory.make(lines, version_id)

682

698

if delta or (self.factory.annotated and len(present_parents) > 0):

823

839

data_pos, length = self._index.get_position(version_id)

824

840

version_id_records.append((version_id, data_pos, length))

825

841

826

count = 0

827

842

total = len(version_id_records)

828

pb.update('Walking content.', count, total)

829

for version_id, data, sha_value in \

830

self._data.read_records_iter(version_id_records):

831

pb.update('Walking content.', count, total)

843

for version_idx, (version_id, data, sha_value) in \

844

enumerate(self._data.read_records_iter(version_id_records)):

845

pb.update('Walking content.', version_idx, total)

832

846

method = self._index.get_method(version_id)

833

847

version_idx = self._index.lookup(version_id)

834

848

assert method in ('fulltext', 'line-delta')

841

855

for start, end, count, lines in delta:

842

856

for origin, line in lines:

843

857

yield line

844

count +=1

845

858

pb.update('Walking content.', total, total)

846

859

847

860

def num_versions(self):

1253

1266

"""

1254

1267

lines = []

1255

1268

encode_utf8 = cache_utf8.encode

1256

for version_id, options, pos, size, parents in versions:

1257

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1258

','.join(options),

1259

pos,

1260

size,

1261

self._version_list_to_index(parents))

1262

assert isinstance(line, str), \

1263

'content must be utf-8 encoded: %r' % (line,)

1264

lines.append(line)

1265

if not self._need_to_create:

1266

self._transport.append_bytes(self._filename, ''.join(lines))

1267

else:

1268

sio = StringIO()

1269

sio.write(self.HEADER)

1270

sio.writelines(lines)

1271

sio.seek(0)

1272

self._transport.put_file_non_atomic(self._filename, sio,

1273

create_parent_dir=self._create_parent_dir,

1274

mode=self._file_mode,

1275

dir_mode=self._dir_mode)

1276

self._need_to_create = False

1277

1278

# cache after writing, so that a failed write leads to missing cache

1279

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1280

# failure, reload the index or flush it or some such, to prevent

1281

# writing records that did complete twice.

1282

for version_id, options, pos, size, parents in versions:

1283

self._cache_version(version_id, options, pos, size, parents)

1284

1269

orig_history = self._history[:]

1270

orig_cache = self._cache.copy()

1271

1272

try:

1273

for version_id, options, pos, size, parents in versions:

1274

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1275

','.join(options),

1276

pos,

1277

size,

1278

self._version_list_to_index(parents))

1279

assert isinstance(line, str), \

1280

'content must be utf-8 encoded: %r' % (line,)

1281

lines.append(line)

1282

self._cache_version(version_id, options, pos, size, parents)

1283

if not self._need_to_create:

1284

self._transport.append_bytes(self._filename, ''.join(lines))

1285

else:

1286

sio = StringIO()

1287

sio.write(self.HEADER)

1288

sio.writelines(lines)

1289

sio.seek(0)

1290

self._transport.put_file_non_atomic(self._filename, sio,

1291

create_parent_dir=self._create_parent_dir,

1292

mode=self._file_mode,

1293

dir_mode=self._dir_mode)

1294

self._need_to_create = False

1295

except:

1296

# If any problems happen, restore the original values and re-raise

1297

self._history = orig_history

1298

self._cache = orig_cache

1299

raise

1300

1285

1301

def has_version(self, version_id):

1286

1302

"""True if the version is in the index."""

1287

1303

return (version_id in self._cache)

Older »