/brz/remove-bazaar : revision 929

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/weave.py

Committer: Martin Pool
Date: 2005-07-17 18:38:22 UTC
Revision ID: mbp@sourcefrog.net-20050717183820-e347e5897ccd375b

- progress bar: avoid repeatedly checking screen width

files removed:
bzrlib/delta.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testrevision.py

contrib/emacs

contrib/emacs/bzr-mode.el

doc/split-join-files.txt

patches/pending-merge.patch

tutorial.txt

files modified:
.bzrignore

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge_core.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/versioning.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

testsweet.py

tools/convertfile.py

tools/testweave.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/weave.py

# with intset (r926) 2000 versions in 93s !!!

# better to just use plain sets.

# making _extract build and return a list, rather than being a generator

# takes 37.94s

# with python -O, r923 does 2000 versions in 36.87s

# with optimizations to avoid mutating lists - 35.75! I guess copying

# all the elements every time costs more than the small manipulations.

# a surprisingly small change.

# r931, which avoids using a generator for extract, does 36.98s

# with memoized inclusions, takes 41.49s; not very good

# with slots, takes 37.35s; without takes 39.16, a bit surprising

# with the delta calculation mixed in with the add method, rather than

# separated, takes 36.78s

# with delta folded in and mutation of the list, 36.13s

# with all this and simplification of add code, 33s

# TODO: Perhaps have copy method for Weave instances?

# XXX: If we do weaves this way, will a merge still behave the same

# properly nested, that there is no text outside of an insertion, that

# insertions or deletions are not repeated, etc.

# TODO: Make the info command just show info, not extract everything:

# it can be much faster.

# TODO: Perhaps use long integers as sets instead of set objects; may

# be faster.

# TODO: Parallel-extract that passes back each line along with a

# description of which revisions include it. Nice for checking all

# shas in parallel.

107

the version-id is used to reference it in the larger world.

108

109

The weave is represented as a list mixing edit instructions and

110

literal text. Each entry in _weave can be either a string (or

literal text. Each entry in _l can be either a string (or

111

unicode), or a tuple. If a string, it means that the given line

112

should be output in the currently active revisions.

113

151

134

should be no way to get an earlier version deleting a later

152

135

version.

153

136

154

_weave

155

Text of the weave; list of control instruction tuples and strings.

137

138

Text of the weave.

156

139

157

_parents

140

158

141

List of parents, indexed by version number.

159

142

It is only necessary to store the minimal set of parents for

160

143

each version; the parent's parents are implied.

162

145

_sha1s

163

146

List of hex SHA-1 of each version, or None if not recorded.

164

147

"""

165

166

__slots__ = ['_weave', '_parents', '_sha1s']

167

168

148

def __init__(self):

169

self._weave = []

170

self._parents = []

149

self._l = []

150

self._v = []

171

151

self._sha1s = []

172

152

173

153

174

154

def __eq__(self, other):

175

155

if not isinstance(other, Weave):

176

156

return False

177

return self._parents == other._parents \

178

and self._weave == other._weave

157

return self._v == other._v \

158

and self._l == other._l

179

159

180

160

181

161

def __ne__(self, other):

192

172

193

173

text

194

174

Sequence of lines to be added in the new version."""

195

196

self._check_versions(parents)

175

## self._check_versions(parents)

197

176

## self._check_lines(text)

198

new_version = len(self._parents)

177

idx = len(self._v)

199

178

200

179

import sha

201

180

s = sha.new()

202

map(s.update, text)

181

for l in text:

182

s.update(l)

203

183

sha1 = s.hexdigest()

204

184

del s

205

185

206

# if we abort after here the weave will be corrupt

207

self._parents.append(frozenset(parents))

186

# TODO: It'd probably be faster to append things on to a new

187

# list rather than modifying the existing one, which is likely

188

# to cause a lot of copying.

189

190

if parents:

191

ancestors = self.inclusions(parents)

192

delta = self._delta(ancestors, text)

193

194

# offset gives the number of lines that have been inserted

195

# into the weave up to the current point; if the original edit instruction

196

# says to change line A then we actually change (A+offset)

197

offset = 0

198

199

for i1, i2, newlines in delta:

200

assert 0 <= i1

201

assert i1 <= i2

202

assert i2 <= len(self._l)

203

204

# the deletion and insertion are handled separately.

205

# first delete the region.

206

if i1 != i2:

207

self._l.insert(i1+offset, ('[', idx))

208

self._l.insert(i2+offset+1, (']', idx))

209

offset += 2

210

# is this OK???

211

212

if newlines:

213

# there may have been a deletion spanning up to

214

# i2; we want to insert after this region to make sure

215

# we don't destroy ourselves

216

i = i2 + offset

217

self._l[i:i] = [('{', idx)] \

218

+ newlines \

219

+ [('}', idx)]

220

offset += 2 + len(newlines)

221

222

self._addversion(parents)

223

else:

224

# special case; adding with no parents revision; can do this

225

# more quickly by just appending unconditionally

226

self._l.append(('{', idx))

227

self._l += text

228

self._l.append(('}', idx))

229

230

self._addversion(None)

231

208

232

self._sha1s.append(sha1)

209

210

211

if not parents:

212

# special case; adding with no parents revision; can do

213

# this more quickly by just appending unconditionally.

214

# even more specially, if we're adding an empty text we

215

# need do nothing at all.

216

if text:

217

self._weave.append(('{', new_version))

218

self._weave.extend(text)

219

self._weave.append(('}', new_version))

220

221

return new_version

222

223

if len(parents) == 1:

224

pv = list(parents)[0]

225

if sha1 == self._sha1s[pv]:

226

# special case: same as the single parent

227

return new_version

228

229

230

ancestors = self.inclusions(parents)

231

232

l = self._weave

233

234

# basis a list of (origin, lineno, line)

235

basis_lineno = []

236

basis_lines = []

237

for origin, lineno, line in self._extract(ancestors):

238

basis_lineno.append(lineno)

239

basis_lines.append(line)

240

241

# another small special case: a merge, producing the same text as auto-merge

242

if text == basis_lines:

243

return new_version

244

245

# add a sentinal, because we can also match against the final line

246

basis_lineno.append(len(self._weave))

247

248

# XXX: which line of the weave should we really consider

249

# matches the end of the file? the current code says it's the

250

# last line of the weave?

251

252

#print 'basis_lines:', basis_lines

253

#print 'new_lines: ', lines

254

255

from difflib import SequenceMatcher

256

s = SequenceMatcher(None, basis_lines, text)

257

258

# offset gives the number of lines that have been inserted

259

# into the weave up to the current point; if the original edit instruction

260

# says to change line A then we actually change (A+offset)

261

offset = 0

262

263

for tag, i1, i2, j1, j2 in s.get_opcodes():

264

# i1,i2 are given in offsets within basis_lines; we need to map them

265

# back to offsets within the entire weave

266

#print 'raw match', tag, i1, i2, j1, j2

267

if tag == 'equal':

268

continue

269

270

i1 = basis_lineno[i1]

271

i2 = basis_lineno[i2]

272

273

assert 0 <= j1 <= j2 <= len(text)

274

275

#print tag, i1, i2, j1, j2

276

277

# the deletion and insertion are handled separately.

278

# first delete the region.

279

if i1 != i2:

280

self._weave.insert(i1+offset, ('[', new_version))

281

self._weave.insert(i2+offset+1, (']', new_version))

282

offset += 2

283

284

if j1 != j2:

285

# there may have been a deletion spanning up to

286

# i2; we want to insert after this region to make sure

287

# we don't destroy ourselves

288

i = i2 + offset

289

self._weave[i:i] = ([('{', new_version)]

290

+ text[j1:j2]

291

+ [('}', new_version)])

292

offset += 2 + (j2 - j1)

293

294

return new_version

233

234

return idx

295

235

296

236

297

237

def inclusions(self, versions):

302

242

while v >= 0:

303

243

if v in i:

304

244

# include all its parents

305

i.update(self._parents[v])

245

i.update(self._v[v])

306

246

v -= 1

307

247

return i

308

248

except IndexError:

311

251

312

252

def minimal_parents(self, version):

313

253

"""Find the minimal set of parents for the version."""

314

included = self._parents[version]

254

included = self._v[version]

315

255

if not included:

316

256

return []

317

257

331

271

return mininc

332

272

333

273

274

def _addversion(self, parents):

275

if parents:

276

self._v.append(parents)

277

else:

278

self._v.append(set())

279

334

280

335

281

def _check_lines(self, text):

336

282

if not isinstance(text, list):

347

293

"""Check everything in the sequence of indexes is valid"""

348

294

for i in indexes:

349

295

try:

350

self._parents[i]

296

self._v[i]

351

297

except IndexError:

352

298

raise IndexError("invalid version number %r" % i)

353

299

377

323

378

324

lineno = 0 # line of weave, 0-based

379

325

380

for l in self._weave:

326

for l in self._l:

381

327

if isinstance(l, tuple):

382

328

c, v = l

383

329

isactive = None

419

365

420

366

isactive = None

421

367

422

result = []

423

424

368

WFE = WeaveFormatError

425

369

426

for l in self._weave:

370

for l in self._l:

427

371

if isinstance(l, tuple):

428

372

c, v = l

429

373

isactive = None

447

391

if isactive is None:

448

392

isactive = (not dset) and istack and (istack[-1] in included)

449

393

if isactive:

450

result.append((istack[-1], lineno, l))

394

yield istack[-1], lineno, l

451

395

lineno += 1

452

396

453

397

if istack:

457

401

raise WFE("unclosed deletion blocks at end of weave",

458

402

dset)

459

403

460

return result

461

462

463

404

464

405

def get_iter(self, version):

465

406

"""Yield lines for the specified version."""

479

420

480

421

def dump(self, to_file):

481

422

from pprint import pprint

482

print >>to_file, "Weave._weave = ",

483

pprint(self._weave, to_file)

484

print >>to_file, "Weave._parents = ",

485

pprint(self._parents, to_file)

423

print >>to_file, "Weave._l = ",

424

pprint(self._l, to_file)

425

print >>to_file, "Weave._v = ",

426

pprint(self._v, to_file)

486

427

487

428

488

429

489

430

def numversions(self):

490

l = len(self._parents)

431

l = len(self._v)

491

432

assert l == len(self._sha1s)

492

433

return l

493

434

494

435

495

def __len__(self):

496

return self.numversions()

497

498

499

436

def check(self, progress_bar=None):

500

437

# check no circular inclusions

501

438

for version in range(self.numversions()):

502

inclusions = list(self._parents[version])

439

inclusions = list(self._v[version])

503

440

if inclusions:

504

441

inclusions.sort()

505

442

if inclusions[-1] >= version:

565

502

If line1=line2, this is a pure insert; if newlines=[] this is a

566

503

pure delete. (Similar to difflib.)

567

504

"""

505

# basis a list of (origin, lineno, line)

506

basis_lineno = []

507

basis_lines = []

508

for origin, lineno, line in self._extract(included):

509

basis_lineno.append(lineno)

510

basis_lines.append(line)

511

512

# add a sentinal, because we can also match against the final line

513

basis_lineno.append(len(self._l))

514

515

# XXX: which line of the weave should we really consider

516

# matches the end of the file? the current code says it's the

517

# last line of the weave?

518

519

from difflib import SequenceMatcher

520

s = SequenceMatcher(None, basis_lines, lines)

521

522

# TODO: Perhaps return line numbers from composed weave as well?

523

524

for tag, i1, i2, j1, j2 in s.get_opcodes():

525

##print tag, i1, i2, j1, j2

526

527

if tag == 'equal':

528

continue

529

530

# i1,i2 are given in offsets within basis_lines; we need to map them

531

# back to offsets within the entire weave

532

real_i1 = basis_lineno[i1]

533

real_i2 = basis_lineno[i2]

534

535

assert 0 <= j1

536

assert j1 <= j2

537

assert j2 <= len(lines)

538

539

yield real_i1, real_i2, lines[j1:j2]

568

540

569

541

570

542

670

642

671

643

672

644

673

def weave_info(w):

645

def weave_info(filename, out):

674

646

"""Show some text information about the weave."""

675

print '%6s %40s %20s' % ('ver', 'sha1', 'parents')

676

for i in (6, 40, 20):

677

print '-' * i,

678

679

for i in range(w.numversions()):

680

sha1 = w._sha1s[i]

681

print '%6d %40s %s' % (i, sha1, ' '.join(map(str, w._parents[i])))

682

683

684

685

def weave_stats(weave_file):

686

from bzrlib.progress import ProgressBar

687

from bzrlib.weavefile import read_weave

688

689

pb = ProgressBar()

690

691

wf = file(weave_file, 'rb')

647

from weavefile import read_weave

648

wf = file(filename, 'rb')

692

649

w = read_weave(wf)

693

650

# FIXME: doesn't work on pipes

694

651

weave_size = wf.tell()

652

print >>out, "weave file size %d bytes" % weave_size

653

print >>out, "weave contains %d versions" % len(w._v)

695

654

696

655

total = 0

697

vers = len(w)

698

for i in range(vers):

699

pb.update('checking sizes', i, vers)

700

for line in w.get_iter(i):

701

total += len(line)

702

703

pb.clear()

704

705

print 'versions %9d' % vers

706

print 'weave file %9d bytes' % weave_size

707

print 'total contents %9d bytes' % total

708

print 'compression ratio %9.2fx' % (float(total) / float(weave_size))

709

656

print '%6s %6s %8s %40s %20s' % ('ver', 'lines', 'bytes', 'sha1', 'parents')

657

for i in (6, 6, 8, 40, 20):

658

print '-' * i,

659

660

for i in range(len(w._v)):

661

text = w.get(i)

662

lines = len(text)

663

bytes = sum((len(a) for a in text))

664

sha1 = w._sha1s[i]

665

print '%6d %6d %8d %40s' % (i, lines, bytes, sha1),

666

for pv in w._v[i]:

667

print pv,

668

669

total += bytes

670

671

print >>out, "versions total %d bytes" % total

672

print >>out, "compression ratio %.3f" % (float(total)/float(weave_size))

710

673

711

674

712

675

def usage():

810

773

lasto = origin

811

774

812

775

elif cmd == 'info':

813

weave_info(readit())

814

815

elif cmd == 'stats':

816

weave_stats(argv[2])

776

weave_info(argv[2], sys.stdout)

817

777

818

778

elif cmd == 'check':

819

779

w = readit()

820

780

pb = ProgressBar()

821

781

w.check(pb)

822

782

pb.clear()

823

print '%d versions ok' % w.numversions()

824

783

825

784

elif cmd == 'inclusions':

826

785

w = readit()

828

787

829

788

elif cmd == 'parents':

830

789

w = readit()

831

print ' '.join(map(str, w._parents[int(argv[3])]))

790

print ' '.join(map(str, w._v[int(argv[3])]))

832

791

833

792

elif cmd == 'plan-merge':

834

793

w = readit()

Older »