/brz/remove-bazaar : revision 1352

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to tools/history2weaves.py

Committer: Martin Pool
Date: 2005-09-22 05:18:24 UTC
Revision ID: mbp@sourcefrog.net-20050922051824-263a54b20d3c54a4

- store control weaves in .bzr/, not mixed in with file weaves

files added:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzr-man.py

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/teststore.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/ui.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/weavestore.py

bzrlib/workingtree.py

bzrlib/xml.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

setup.py

testbzr

testsweet.py

tools

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

tools/history2weaves.py

#! /usr/bin/python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Experiment in converting existing bzr branches to weaves."""

# To make this properly useful

# 1. assign text version ids, and put those text versions into

# the inventory as they're converted.

# 2. keep track of the previous version of each file, rather than

# just using the last one imported

# 3. assign entry versions when files are added, renamed or moved.

# 4. when merged-in versions are observed, walk down through them

# to discover everything, then commit bottom-up

# 5. track ancestry as things are merged in, and commit that in each

# revision

# Perhaps it's best to first walk the whole graph and make a plan for

# what should be imported in what order? Need a kind of topological

# sort of all revisions. (Or do we, can we just before doing a revision

# see that all its parents have either been converted or abandoned?)

# Cannot import a revision until all its parents have been

# imported. in other words, we can only import revisions whose

# parents have all been imported. the first step must be to

# import a revision with no parents, of which there must be at

# least one. (So perhaps it's useful to store forward pointers

# from a list of parents to their children?)

# Another (equivalent?) approach is to build up the ordered

# ancestry list for the last revision, and walk through that. We

# are going to need that.

# We don't want to have to recurse all the way back down the list.

# Suppose we keep a queue of the revisions able to be processed at

# any point. This starts out with all the revisions having no

# parents.

# This seems like a generally useful algorithm...

# The current algorithm is dumb (O(n**2)?) but will do the job, and

# takes less than a second on the bzr.dev branch.

# This currently does a kind of lazy conversion of file texts, where a

# new text is written in every version. That's unnecessary but for

# the moment saves us having to worry about when files need new

# versions.

# TODO: Check that the working directory is clean before converting

if False:

try:

import psyco

psyco.full()

except ImportError:

pass

import os

import tempfile

import hotshot, hotshot.stats

import sys

import logging

import shutil

from bzrlib.branch import Branch, find_branch

from bzrlib.revfile import Revfile

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave

from bzrlib.progress import ProgressBar

from bzrlib.atomicfile import AtomicFile

from bzrlib.xml4 import serializer_v4

from bzrlib.xml5 import serializer_v5

from bzrlib.trace import mutter, note, warning, enable_default_logging

from bzrlib.osutils import sha_strings, sha_string

from bzrlib.commit import merge_ancestry_lines

100

101

class Convert(object):

102

def __init__(self):

103

self.converted_revs = set()

104

self.absent_revisions = set()

105

self.text_count = 0

106

self.revisions = {}

107

self.inventories = {}

108

self.convert()

109

110

111

112

def convert(self):

113

enable_default_logging()

114

self._backup_control_dir()

115

self.pb = ProgressBar()

116

if not os.path.isdir('.bzr/weaves'):

117

os.mkdir('.bzr/weaves')

118

self.inv_weave = Weave('__inventory')

119

self.anc_weave = Weave('__ancestry')

120

self.ancestries = {}

121

# holds in-memory weaves for all files

122

self.text_weaves = {}

123

self.branch = Branch('.', relax_version_check=True)

124

self._convert_working_inv()

125

rev_history = self.branch.revision_history()[:300]

126

# to_read is a stack holding the revisions we still need to process;

127

# appending to it adds new highest-priority revisions

128

self.known_revisions = set(rev_history)

129

self.to_read = [rev_history[-1]]

130

while self.to_read:

131

rev_id = self.to_read.pop()

132

if (rev_id not in self.revisions

133

and rev_id not in self.absent_revisions):

134

self._load_one_rev(rev_id)

135

self.pb.clear()

136

to_import = self._make_order()

137

for i, rev_id in enumerate(to_import):

138

self.pb.update('converting revision', i, len(to_import))

139

self._convert_one_rev(rev_id)

140

self.pb.clear()

141

note('upgraded to weaves:')

142

note(' %6d revisions and inventories' % len(self.revisions))

143

note(' %6d absent revisions removed' % len(self.absent_revisions))

144

note(' %6d texts' % self.text_count)

145

self._write_all_weaves()

146

self._write_all_revs()

147

148

149

def _backup_control_dir(self):

150

shutil.copytree('.bzr', '.bzr.backup')

151

note('.bzr has been backed up to .bzr.backup')

152

note('if conversion fails, you can move this directory back to .bzr')

153

note('if it succeeds, you can remove this directory if you wish')

154

155

156

def _convert_working_inv(self):

157

branch = self.branch

158

inv = serializer_v4.read_inventory(branch.controlfile('inventory', 'rb'))

159

serializer_v5.write_inventory(inv, branch.controlfile('new-inventory', 'wb'))

160

161

162

163

def _write_all_weaves(self):

164

write_a_weave(self.inv_weave, '.bzr/inventory.weave')

165

write_a_weave(self.anc_weave, '.bzr/ancestry.weave')

166

i = 0

167

try:

168

for file_id, file_weave in self.text_weaves.items():

169

self.pb.update('writing weave', i, len(self.text_weaves))

170

write_a_weave(file_weave, '.bzr/weaves/%s.weave' % file_id)

171

i += 1

172

finally:

173

self.pb.clear()

174

175

176

def _write_all_revs(self):

177

"""Write all revisions out in new form."""

178

try:

179

for i, rev_id in enumerate(self.converted_revs):

180

self.pb.update('write revision', i, len(self.converted_revs))

181

f = file('new-revisions/%s' % rev_id, 'wb')

182

try:

183

serializer_v5.write_revision(self.revisions[rev_id], f)

184

finally:

185

f.close()

186

finally:

187

self.pb.clear()

188

189

190

def _load_one_rev(self, rev_id):

191

"""Load a revision object into memory.

192

193

Any parents not either loaded or abandoned get queued to be

194

loaded."""

195

self.pb.update('loading revision',

196

len(self.revisions),

197

len(self.known_revisions))

198

if rev_id not in self.branch.revision_store:

199

self.pb.clear()

200

note('revision {%s} not present in branch; '

201

'will not be converted',

202

rev_id)

203

self.absent_revisions.add(rev_id)

204

else:

205

rev_xml = self.branch.revision_store[rev_id].read()

206

rev = serializer_v4.read_revision_from_string(rev_xml)

207

for parent_id in rev.parent_ids:

208

self.known_revisions.add(parent_id)

209

self.to_read.append(parent_id)

210

self.revisions[rev_id] = rev

211

old_inv_xml = self.branch.inventory_store[rev_id].read()

212

inv = serializer_v4.read_inventory_from_string(old_inv_xml)

213

assert rev.inventory_sha1 == sha_string(old_inv_xml)

214

self.inventories[rev_id] = inv

215

216

217

def _convert_one_rev(self, rev_id):

218

"""Convert revision and all referenced objects to new format."""

219

rev = self.revisions[rev_id]

220

inv = self.inventories[rev_id]

221

for parent_id in rev.parent_ids[:]:

222

if parent_id in self.absent_revisions:

223

rev.parent_ids.remove(parent_id)

224

self.pb.clear()

225

note('remove {%s} as parent of {%s}', parent_id, rev_id)

226

self._convert_revision_contents(rev, inv)

227

# the XML is now updated with text versions

228

new_inv_xml = serializer_v5.write_inventory_to_string(inv)

229

new_inv_sha1 = sha_string(new_inv_xml)

230

self.inv_weave.add(rev_id, rev.parent_ids,

231

new_inv_xml.splitlines(True),

232

new_inv_sha1)

233

# TODO: Upgrade revision XML and write that out

234

rev.inventory_sha1 = new_inv_sha1

235

self._make_rev_ancestry(rev)

236

self.converted_revs.add(rev_id)

237

238

239

def _make_rev_ancestry(self, rev):

240

rev_id = rev.revision_id

241

for parent_id in rev.parent_ids:

242

assert parent_id in self.converted_revs

243

if rev.parent_ids:

244

lines = list(self.anc_weave.mash_iter(rev.parent_ids))

245

else:

246

lines = []

247

lines.append(rev_id + '\n')

248

if __debug__:

249

parent_ancestries = [self.ancestries[p] for p in rev.parent_ids]

250

new_lines = merge_ancestry_lines(rev_id, parent_ancestries)

251

assert set(lines) == set(new_lines)

252

self.ancestries[rev_id] = new_lines

253

self.anc_weave.add(rev_id, rev.parent_ids, lines)

254

255

256

def _convert_revision_contents(self, rev, inv):

257

"""Convert all the files within a revision.

258

259

Also upgrade the inventory to refer to the text revision ids."""

260

rev_id = rev.revision_id

261

mutter('converting texts of revision {%s}',

262

rev_id)

263

for file_id in inv:

264

ie = inv[file_id]

265

self._set_name_version(rev, ie)

266

if ie.kind != 'file':

267

continue

268

self._convert_file_version(rev, ie)

269

270

271

def _set_name_version(self, rev, ie):

272

"""Set name version for a file.

273

274

Done in a slightly lazy way: if the file is renamed or in a merge revision

275

it gets a new version, otherwise the same as before.

276

"""

277

file_id = ie.file_id

278

if len(rev.parent_ids) != 1:

279

ie.name_version = rev.revision_id

280

else:

281

old_inv = self.inventories[rev.parent_ids[0]]

282

if not old_inv.has_id(file_id):

283

ie.name_version = rev.revision_id

284

else:

285

old_ie = old_inv[file_id]

286

if (old_ie.parent_id != ie.parent_id

287

or old_ie.name != ie.name):

288

ie.name_version = rev.revision_id

289

else:

290

ie.name_version = old_ie.name_version

291

292

293

294

def _convert_file_version(self, rev, ie):

295

"""Convert one version of one file.

296

297

The file needs to be added into the weave if it is a merge

298

of >=2 parents or if it's changed from its parent.

299

"""

300

file_id = ie.file_id

301

rev_id = rev.revision_id

302

w = self.text_weaves.get(file_id)

303

if w is None:

304

w = Weave(file_id)

305

self.text_weaves[file_id] = w

306

file_lines = self.branch.text_store[ie.text_id].readlines()

307

assert sha_strings(file_lines) == ie.text_sha1

308

assert sum(map(len, file_lines)) == ie.text_size

309

file_parents = []

310

text_changed = False

311

for parent_id in rev.parent_ids:

312

##if parent_id in self.absent_revisions:

313

## continue

314

assert parent_id in self.converted_revs, \

315

'parent {%s} not converted' % parent_id

316

parent_inv = self.inventories[parent_id]

317

if parent_inv.has_id(file_id):

318

parent_ie = parent_inv[file_id]

319

old_text_version = parent_ie.text_version

320

assert old_text_version in self.converted_revs

321

if old_text_version not in file_parents:

322

file_parents.append(old_text_version)

323

if parent_ie.text_sha1 != ie.text_sha1:

324

text_changed = True

325

if len(file_parents) != 1 or text_changed:

326

w.add(rev_id, file_parents, file_lines, ie.text_sha1)

327

ie.text_version = rev_id

328

self.text_count += 1

329

##mutter('import text {%s} of {%s}',

330

## ie.text_id, file_id)

331

else:

332

##mutter('text of {%s} unchanged from parent', file_id)

333

ie.text_version = file_parents[0]

334

del ie.text_id

335

336

337

338

def _make_order(self):

339

"""Return a suitable order for importing revisions.

340

341

The order must be such that an revision is imported after all

342

its (present) parents.

343

"""

344

todo = set(self.revisions.keys())

345

done = self.absent_revisions.copy()

346

o = []

347

while todo:

348

# scan through looking for a revision whose parents

349

# are all done

350

for rev_id in sorted(list(todo)):

351

rev = self.revisions[rev_id]

352

parent_ids = set(rev.parent_ids)

353

if parent_ids.issubset(done):

354

# can take this one now

355

o.append(rev_id)

356

todo.remove(rev_id)

357

done.add(rev_id)

358

return o

359

360

361

def write_a_weave(weave, filename):

362

inv_wf = file(filename, 'wb')

363

try:

364

write_weave(weave, inv_wf)

365

finally:

366

inv_wf.close()

367

368

369

370

371

def profile_convert():

372

prof_f = tempfile.NamedTemporaryFile()

373

374

prof = hotshot.Profile(prof_f.name)

375

376

prof.runcall(Convert)

377

prof.close()

378

379

stats = hotshot.stats.load(prof_f.name)

380

##stats.strip_dirs()

381

stats.sort_stats('time')

382

# XXX: Might like to write to stderr or the trace file instead but

383

# print_stats seems hardcoded to stdout

384

stats.print_stats(100)

385

386

387

if __name__ == '__main__':

388

enable_default_logging()

389

390

if '-p' in sys.argv[1:]:

391

profile_convert()

392

else:

393

Convert()

Older »