/brz/remove-bazaar : revision 1247

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/commit.py

Committer: Martin Pool
Date: 2005-09-14 06:18:18 UTC
Revision ID: mbp@sourcefrog.net-20050914061818-05a79652196cc758

- tests for deletion and removal of files in commits

files added:
HACKING

Makefile

bzrlib/builtins.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/plugins/__init__.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_parent.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/testfetch.py

bzrlib/selftest/teststore.py

bzrlib/shellcomplete.py

bzrlib/ui.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/weavestore.py

bzrlib/xml5.py

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/revfile.txt

notes/schemas.xml

tools/history2revfiles.py

tools/history2weaves.py

tools/http_client.py

files removed:
plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

files renamed:
plugins/ => bzrlib/plugins/

tools/testweave.py => bzrlib/selftest/test_weave.py

effbot/ => bzrlib/util/effbot/

elementtree/ => bzrlib/util/elementtree/

urlgrabber/ => bzrlib/util/urlgrabber/

bzrlib/newinventory.py => contrib/newinventory.py

files modified:
.bzrignore

NEWS

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/changeset.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/inventory.py

bzrlib/log.py

bzrlib/mdiff.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_core.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/selftest/blackbox.py

bzrlib/selftest/plugins.py

bzrlib/selftest/testbranch.py

bzrlib/selftest/testdiff.py

bzrlib/selftest/testhashcache.py

bzrlib/selftest/testinv.py

bzrlib/selftest/testlog.py

bzrlib/selftest/testmerge3.py

bzrlib/selftest/testrevision.py

bzrlib/selftest/testrevisionnamespaces.py

bzrlib/selftest/teststatus.py

bzrlib/selftest/versioning.py

bzrlib/selftest/whitebox.py

bzrlib/status.py

bzrlib/store.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml.py

doc/index.txt

doc/todo-from-arch.txt

setup.py

testsweet.py

tools/weavebench.py

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/commit.py

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# FIXME: "bzr commit doc/format" commits doc/format.txt!

def commit(branch, message,

timestamp=None,

timezone=None,

committer=None,

verbose=True,

specific_files=None,

rev_id=None,

allow_pointless=True):

"""Commit working copy as a new revision.

The basic approach is to add all the file texts into the

store, then the inventory, then make a new revision pointing

to that inventory and store that.

This is not quite safe if the working copy changes during the

commit; for the moment that is simply not allowed. A better

approach is to make a temporary copy of the files before

computing their hashes, and then add those hashes in turn to

the inventory. This should mean at least that there are no

broken hash pointers. There is no way we can get a snapshot

of the whole directory at an instant. This would also have to

be robust against files disappearing, moving, etc. So the

whole thing is a bit hard.

This raises PointlessCommit if there are no changes, no new merges,

and allow_pointless is false.

timestamp -- if not None, seconds-since-epoch for a

postdated/predated commit.

specific_files

If true, commit only those files.

rev_id

If set, use this as the new revision id.

Useful for test or import commands that need to tightly

control what revisions are assigned. If you duplicate

a revision id that exists elsewhere it is your own fault.

If null (default), a time/random revision id is generated.

"""

import time, tempfile

from bzrlib.osutils import local_time_offset, username

from bzrlib.branch import gen_file_id

from bzrlib.errors import BzrError, PointlessCommit

from bzrlib.revision import Revision, RevisionReference

from bzrlib.trace import mutter, note

from bzrlib.xml import pack_xml

branch.lock_write()

try:

# First walk over the working inventory; and both update that

# and also build a new revision inventory. The revision

# inventory needs to hold the text-id, sha1 and size of the

# actual file versions committed in the revision. (These are

# not present in the working inventory.) We also need to

# detect missing/deleted files, and remove them from the

# working inventory.

work_tree = branch.working_tree()

work_inv = work_tree.inventory

basis = branch.basis_tree()

basis_inv = basis.inventory

if verbose:

note('looking for changes...')

pending_merges = branch.pending_merges()

missing_ids, new_inv, any_changes = \

_gather_commit(branch,

work_tree,

work_inv,

basis_inv,

specific_files,

verbose)

100

if not (any_changes or allow_pointless or pending_merges):

101

raise PointlessCommit()

102

103

for file_id in missing_ids:

104

# Any files that have been deleted are now removed from the

105

# working inventory. Files that were not selected for commit

106

# are left as they were in the working inventory and ommitted

107

# from the revision inventory.

108

109

# have to do this later so we don't mess up the iterator.

110

# since parents may be removed before their children we

111

# have to test.

112

113

# FIXME: There's probably a better way to do this; perhaps

114

# the workingtree should know how to filter itbranch.

115

if work_inv.has_id(file_id):

116

del work_inv[file_id]

117

118

119

if rev_id is None:

120

rev_id = _gen_revision_id(time.time())

121

inv_id = rev_id

122

123

inv_tmp = tempfile.TemporaryFile()

124

pack_xml(new_inv, inv_tmp)

## XXX: Can we do any better about making interrupted commits change

## nothing?

## XXX: If we merged two versions of a file then we still need to

## create a new version representing that merge, even if it didn't

## change from the parent.

## TODO: Read back the just-generated changeset, and make sure it

## applies and recreates the right state.

## This is not quite safe if the working copy changes during the

## commit; for the moment that is simply not allowed. A better

## approach is to make a temporary copy of the files before

## computing their hashes, and then add those hashes in turn to

## the inventory. This should mean at least that there are no

## broken hash pointers. There is no way we can get a snapshot

## of the whole directory at an instant. This would also have to

## be robust against files disappearing, moving, etc. So the

## whole thing is a bit hard.

## The newly committed revision is going to have a shape corresponding

## to that of the working inventory. Files that are not in the

## working tree and that were in the predecessor are reported as

## removed -- this can include files that were either removed from the

## inventory or deleted in the working tree. If they were only

## deleted from disk, they are removed from the working inventory.

## We then consider the remaining entries, which will be in the new

## version. Directory entries are simply copied across. File entries

## must be checked to see if a new version of the file should be

## recorded. For each parent revision inventory, we check to see what

## version of the file was present. If the file was present in at

## least one tree, and if it was the same version in all the trees,

## then we can just refer to that version. Otherwise, a new version

## representing the merger of the file versions must be added.

import os

import sys

import time

import tempfile

import sha

from binascii import hexlify

from cStringIO import StringIO

from bzrlib.osutils import (local_time_offset, username,

rand_bytes, compact_date, user_email,

kind_marker, is_inside_any, quotefn,

sha_string, sha_strings, sha_file, isdir, isfile)

from bzrlib.branch import gen_file_id, INVENTORY_FILEID, ANCESTRY_FILEID

from bzrlib.errors import BzrError, PointlessCommit

from bzrlib.revision import Revision, RevisionReference

from bzrlib.trace import mutter, note

from bzrlib.xml5 import serializer_v5

from bzrlib.inventory import Inventory

from bzrlib.delta import compare_trees

from bzrlib.weave import Weave

from bzrlib.weavefile import read_weave, write_weave_v5

from bzrlib.atomicfile import AtomicFile

def commit(*args, **kwargs):

"""Commit a new revision to a branch.

Function-style interface for convenience of old callers.

New code should use the Commit class instead.

"""

Commit().commit(*args, **kwargs)

class NullCommitReporter(object):

"""I report on progress of a commit."""

def added(self, path):

pass

def removed(self, path):

100

pass

101

102

def renamed(self, old_path, new_path):

103

pass

104

105

106

class ReportCommitToLog(NullCommitReporter):

107

def added(self, path):

108

note('added %s', path)

109

110

def removed(self, path):

111

note('removed %s', path)

112

113

def renamed(self, old_path, new_path):

114

note('renamed %s => %s', old_path, new_path)

115

116

117

class Commit(object):

118

"""Task of committing a new revision.

119

120

This is a MethodObject: it accumulates state as the commit is

121

prepared, and then it is discarded. It doesn't represent

122

historical revisions, just the act of recording a new one.

123

124

missing_ids

125

Modified to hold a list of files that have been deleted from

126

the working directory; these should be removed from the

127

working inventory.

128

"""

129

def __init__(self,

130

reporter=None):

131

if reporter is not None:

132

self.reporter = reporter

133

else:

134

self.reporter = NullCommitReporter()

135

136

137

def commit(self,

138

branch, message,

139

timestamp=None,

140

timezone=None,

141

committer=None,

142

specific_files=None,

143

rev_id=None,

144

allow_pointless=True):

145

"""Commit working copy as a new revision.

146

147

The basic approach is to add all the file texts into the

148

store, then the inventory, then make a new revision pointing

149

to that inventory and store that.

150

151

This raises PointlessCommit if there are no changes, no new merges,

152

and allow_pointless is false.

153

154

timestamp -- if not None, seconds-since-epoch for a

155

postdated/predated commit.

156

157

specific_files

158

If true, commit only those files.

159

160

rev_id

161

If set, use this as the new revision id.

162

Useful for test or import commands that need to tightly

163

control what revisions are assigned. If you duplicate

164

a revision id that exists elsewhere it is your own fault.

165

If null (default), a time/random revision id is generated.

166

"""

167

168

self.branch = branch

169

self.rev_id = rev_id

170

self.specific_files = specific_files

171

self.allow_pointless = allow_pointless

172

173

if timestamp is None:

174

self.timestamp = time.time()

175

else:

176

self.timestamp = long(timestamp)

177

178

if committer is None:

179

self.committer = username(self.branch)

180

else:

181

assert isinstance(committer, basestring), type(committer)

182

self.committer = committer

183

184

if timezone is None:

185

self.timezone = local_time_offset()

186

else:

187

self.timezone = int(timezone)

188

189

assert isinstance(message, basestring), type(message)

190

self.message = message

191

192

self.branch.lock_write()

193

try:

194

# First walk over the working inventory; and both update that

195

# and also build a new revision inventory. The revision

196

# inventory needs to hold the text-id, sha1 and size of the

197

# actual file versions committed in the revision. (These are

198

# not present in the working inventory.) We also need to

199

# detect missing/deleted files, and remove them from the

200

# working inventory.

201

202

self.work_tree = self.branch.working_tree()

203

self.work_inv = self.work_tree.inventory

204

self.basis_tree = self.branch.basis_tree()

205

self.basis_inv = self.basis_tree.inventory

206

207

self._gather_parents()

208

209

if self.rev_id is None:

210

self.rev_id = _gen_revision_id(self.branch, time.time())

211

212

self._remove_deletions()

213

214

# TODO: update hashcache

215

self.delta = compare_trees(self.basis_tree, self.work_tree,

216

specific_files=self.specific_files)

217

218

if not (self.delta.has_changed()

219

or self.allow_pointless

220

or len(self.parents) != 1):

221

raise PointlessCommit()

222

223

self.new_inv = self.basis_inv.copy()

224

225

## FIXME: Don't write to stdout!

226

self.delta.show(sys.stdout)

227

228

self._remove_deleted()

229

self._store_files()

230

231

self.branch._write_inventory(self.work_inv)

232

self._record_inventory()

233

self._record_ancestry()

234

235

self._make_revision()

236

note('committted r%d {%s}', (self.branch.revno() + 1),

237

self.rev_id)

238

self.branch.append_revision(self.rev_id)

239

self.branch.set_pending_merges([])

240

finally:

241

self.branch.unlock()

242

243

244

245

def _remove_deletions(self):

246

"""Remove deleted files from the working inventory."""

247

pass

248

249

250

251

def _record_inventory(self):

252

"""Store the inventory for the new revision."""

253

inv_tmp = StringIO()

254

serializer_v5.write_inventory(self.new_inv, inv_tmp)

125

255

inv_tmp.seek(0)

126

branch.inventory_store.add(inv_tmp, inv_id)

127

mutter('new inventory_id is {%s}' % inv_id)

128

129

# We could also just sha hash the inv_tmp file

130

# however, in the case that branch.inventory_store.add()

131

# ever actually does anything special

132

inv_sha1 = branch.get_inventory_sha1(inv_id)

133

134

branch._write_inventory(work_inv)

135

136

if timestamp == None:

137

timestamp = time.time()

138

139

if committer == None:

140

committer = username()

141

142

if timezone == None:

143

timezone = local_time_offset()

144

145

mutter("building commit log message")

146

rev = Revision(timestamp=timestamp,

147

timezone=timezone,

148

committer=committer,

149

message = message,

150

inventory_id=inv_id,

151

inventory_sha1=inv_sha1,

152

revision_id=rev_id)

153

154

rev.parents = []

155

precursor_id = branch.last_patch()

256

self.inv_sha1 = sha_string(inv_tmp.getvalue())

257

inv_lines = inv_tmp.readlines()

258

self.branch.weave_store.add_text(INVENTORY_FILEID, self.rev_id,

259

inv_lines, self.parents)

260

261

262

def _record_ancestry(self):

263

"""Append merged revision ancestry to the ancestry file."""

264

if len(self.parents) > 1:

265

raise NotImplementedError("sorry, can't commit merges yet")

266

w = self.branch.weave_store.get_weave_or_empty(ANCESTRY_FILEID)

267

if self.parents:

268

lines = w.get(w.lookup(self.parents[0]))

269

else:

270

lines = []

271

lines.append(self.rev_id + '\n')

272

parent_idxs = map(w.lookup, self.parents)

273

w.add(self.rev_id, parent_idxs, lines)

274

self.branch.weave_store.put_weave(ANCESTRY_FILEID, w)

275

276

277

def _gather_parents(self):

278

pending_merges = self.branch.pending_merges()

279

if pending_merges:

280

raise NotImplementedError("sorry, can't commit merges to the weave format yet")

281

self.parents = []

282

precursor_id = self.branch.last_revision()

156

283

if precursor_id:

157

precursor_sha1 = branch.get_revision_sha1(precursor_id)

158

rev.parents.append(RevisionReference(precursor_id, precursor_sha1))

159

for merge_rev in pending_merges:

160

rev.parents.append(RevisionReference(merge_rev))

161

284

self.parents.append(precursor_id)

285

self.parents += pending_merges

286

287

288

def _make_revision(self):

289

"""Record a new revision object for this commit."""

290

self.rev = Revision(timestamp=self.timestamp,

291

timezone=self.timezone,

292

committer=self.committer,

293

message=self.message,

294

inventory_sha1=self.inv_sha1,

295

revision_id=self.rev_id)

296

self.rev.parents = map(RevisionReference, self.parents)

162

297

rev_tmp = tempfile.TemporaryFile()

163

pack_xml(rev, rev_tmp)

298

serializer_v5.write_revision(self.rev, rev_tmp)

164

299

rev_tmp.seek(0)

165

branch.revision_store.add(rev_tmp, rev_id)

166

mutter("new revision_id is {%s}" % rev_id)

167

168

## XXX: Everything up to here can simply be orphaned if we abort

169

## the commit; it will leave junk files behind but that doesn't

170

## matter.

171

172

## TODO: Read back the just-generated changeset, and make sure it

173

## applies and recreates the right state.

174

175

## TODO: Also calculate and store the inventory SHA1

176

mutter("committing patch r%d" % (branch.revno() + 1))

177

178

branch.append_revision(rev_id)

179

180

branch.set_pending_merges([])

181

182

if verbose:

183

note("commited r%d" % branch.revno())

184

finally:

185

branch.unlock()

186

187

188

189

def _gen_revision_id(when):

300

self.branch.revision_store.add(rev_tmp, self.rev_id)

301

mutter('new revision_id is {%s}', self.rev_id)

302

303

304

def _remove_deleted(self):

305

"""Remove deleted files from the working and stored inventories."""

306

for path, id, kind in self.delta.removed:

307

if self.work_inv.has_id(id):

308

del self.work_inv[id]

309

if self.new_inv.has_id(id):

310

del self.new_inv[id]

311

312

313

314

def _store_files(self):

315

"""Store new texts of modified/added files."""

316

# We must make sure that directories are added before anything

317

# inside them is added. the files within the delta report are

318

# sorted by path so we know the directory will come before its

319

# contents.

320

for path, file_id, kind in self.delta.added:

321

if kind != 'file':

322

ie = self.work_inv[file_id].copy()

323

self.new_inv.add(ie)

324

else:

325

self._store_file_text(file_id)

326

327

for path, file_id, kind in self.delta.modified:

328

if kind != 'file':

329

continue

330

self._store_file_text(file_id)

331

332

for old_path, new_path, file_id, kind, text_modified in self.delta.renamed:

333

if kind != 'file':

334

continue

335

if not text_modified:

336

continue

337

self._store_file_text(file_id)

338

339

340

def _store_file_text(self, file_id):

341

"""Store updated text for one modified or added file."""

342

note('store new text for {%s} in revision {%s}',

343

file_id, self.rev_id)

344

new_lines = self.work_tree.get_file(file_id).readlines()

345

if file_id in self.new_inv: # was in basis inventory

346

ie = self.new_inv[file_id]

347

assert ie.file_id == file_id

348

assert file_id in self.basis_inv

349

assert self.basis_inv[file_id].kind == 'file'

350

old_version = self.basis_inv[file_id].text_version

351

file_parents = [old_version]

352

else: # new in this revision

353

ie = self.work_inv[file_id].copy()

354

self.new_inv.add(ie)

355

assert file_id not in self.basis_inv

356

file_parents = []

357

assert ie.kind == 'file'

358

self._add_text_to_weave(file_id, new_lines, file_parents)

359

# make a new inventory entry for this file, using whatever

360

# it had in the working copy, plus details on the new text

361

ie.text_sha1 = sha_strings(new_lines)

362

ie.text_size = sum(map(len, new_lines))

363

ie.text_version = self.rev_id

364

ie.entry_version = self.rev_id

365

366

367

def _add_text_to_weave(self, file_id, new_lines, parents):

368

if file_id.startswith('__'):

369

raise ValueError('illegal file-id %r for text file' % file_id)

370

self.branch.weave_store.add_text(file_id, self.rev_id, new_lines, parents)

371

372

373

def _gen_revision_id(branch, when):

190

374

"""Return new revision-id."""

191

from binascii import hexlify

192

from osutils import rand_bytes, compact_date, user_email

193

194

s = '%s-%s-' % (user_email(), compact_date(when))

375

s = '%s-%s-' % (user_email(branch), compact_date(when))

195

376

s += hexlify(rand_bytes(8))

196

377

return s

197

378

198

199

def _gather_commit(branch, work_tree, work_inv, basis_inv, specific_files,

200

verbose):

201

"""Build inventory preparatory to commit.

202

203

Returns missing_ids, new_inv, any_changes.

204

205

This adds any changed files into the text store, and sets their

206

test-id, sha and size in the returned inventory appropriately.

207

208

missing_ids

209

Modified to hold a list of files that have been deleted from

210

the working directory; these should be removed from the

211

working inventory.

212

"""

213

from bzrlib.inventory import Inventory

214

from bzrlib.osutils import isdir, isfile, sha_string, quotefn, \

215

local_time_offset, username, kind_marker, is_inside_any

216

217

from bzrlib.branch import gen_file_id

218

from bzrlib.errors import BzrError

219

from bzrlib.revision import Revision

220

from bzrlib.trace import mutter, note

221

222

any_changes = False

223

inv = Inventory(work_inv.root.file_id)

224

missing_ids = []

225

226

for path, entry in work_inv.iter_entries():

227

## TODO: Check that the file kind has not changed from the previous

228

## revision of this file (if any).

229

230

p = branch.abspath(path)

231

file_id = entry.file_id

232

mutter('commit prep file %s, id %r ' % (p, file_id))

233

234

if specific_files and not is_inside_any(specific_files, path):

235

mutter(' skipping file excluded from commit')

236

if basis_inv.has_id(file_id):

237

# carry over with previous state

238

inv.add(basis_inv[file_id].copy())

239

else:

240

# omit this from committed inventory

241

pass

242

continue

243

244

if not work_tree.has_id(file_id):

245

if verbose:

246

print('deleted %s%s' % (path, kind_marker(entry.kind)))

247

any_changes = True

248

mutter(" file is missing, removing from inventory")

249

missing_ids.append(file_id)

250

continue

251

252

# this is present in the new inventory; may be new, modified or

253

# unchanged.

254

old_ie = basis_inv.has_id(file_id) and basis_inv[file_id]

255

256

entry = entry.copy()

257

inv.add(entry)

258

259

if old_ie:

260

old_kind = old_ie.kind

261

if old_kind != entry.kind:

262

raise BzrError("entry %r changed kind from %r to %r"

263

% (file_id, old_kind, entry.kind))

264

265

if entry.kind == 'directory':

266

if not isdir(p):

267

raise BzrError("%s is entered as directory but not a directory"

268

% quotefn(p))

269

elif entry.kind == 'file':

270

if not isfile(p):

271

raise BzrError("%s is entered as file but is not a file" % quotefn(p))

272

273

new_sha1 = work_tree.get_file_sha1(file_id)

274

275

if (old_ie

276

and old_ie.text_sha1 == new_sha1):

277

## assert content == basis.get_file(file_id).read()

278

entry.text_id = old_ie.text_id

279

entry.text_sha1 = new_sha1

280

entry.text_size = old_ie.text_size

281

mutter(' unchanged from previous text_id {%s}' %

282

entry.text_id)

283

else:

284

content = file(p, 'rb').read()

285

286

# calculate the sha again, just in case the file contents

287

# changed since we updated the cache

288

entry.text_sha1 = sha_string(content)

289

entry.text_size = len(content)

290

291

entry.text_id = gen_file_id(entry.name)

292

branch.text_store.add(content, entry.text_id)

293

mutter(' stored with text_id {%s}' % entry.text_id)

294

295

if verbose:

296

marked = path + kind_marker(entry.kind)

297

if not old_ie:

298

print 'added', marked

299

any_changes = True

300

elif old_ie == entry:

301

pass # unchanged

302

elif (old_ie.name == entry.name

303

and old_ie.parent_id == entry.parent_id):

304

print 'modified', marked

305

any_changes = True

306

else:

307

print 'renamed', marked

308

any_changes = True

309

310

return missing_ids, inv, any_changes

311

312

Older »