/brz/remove-bazaar : revision 862

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/hashcache.py

Committer: Martin Pool
Date: 2005-07-08 02:40:22 UTC
Revision ID: mbp@sourcefrog.net-20050708024021-731a320c625619f6

- code to re-read hashcache from file

files added:
bzrlib/changeset.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/statcache.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

files removed:
BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugins/__init__.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/emacs

contrib/emacs/bzr-mode.el

generate_docs.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/effbot/ => effbot/

bzrlib/util/elementtree/ => elementtree/

bzrlib/plugins/ => plugins/

bzrlib/tests/test_weave.py => tools/testweave.py

bzrlib/util/urlgrabber/ => urlgrabber/

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/convertfile.py

tools/convertinv.py

Show diffs side-by-side

added added

removed removed

bzrlib/hashcache.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Up-front, stat all files in order and remove those which are deleted or

# out-of-date. Don't actually re-read them until they're needed. That ought

# to bring all the inodes into core so that future stats to them are fast, and

# it preserves the nice property that any caller will always get up-to-date

# data except in unavoidable cases.

# TODO: Perhaps return more details on the file to avoid statting it

# again: nonexistent, file type, size, etc

# TODO: Perhaps use a Python pickle instead of a text file; might be faster.

CACHE_HEADER = "### bzr hashcache v5\n"

import os, stat, time

import sha

from bzrlib.osutils import sha_file, pathjoin, safe_unicode

from bzrlib.trace import mutter, warning

from bzrlib.atomicfile import AtomicFile

from bzrlib.errors import BzrError

FP_MTIME_COLUMN = 1

FP_CTIME_COLUMN = 2

FP_MODE_COLUMN = 5

CACHE_HEADER = "### bzr statcache v5\n"

def _fingerprint(abspath):

import os, stat

try:

fs = os.lstat(abspath)

except OSError:

if stat.S_ISDIR(fs.st_mode):

return None

# we discard any high precision because it's not reliable; perhaps we

# could do better on some systems?

return (fs.st_size, long(fs.st_mtime),

long(fs.st_ctime), fs.st_ino, fs.st_dev, fs.st_mode)

return (fs.st_size, fs.st_mtime,

fs.st_ctime, fs.st_ino, fs.st_dev)

class HashCache(object):

miss_count

number of misses (times files have been completely re-read)

"""

needs_write = False

def __init__(self, root, cache_file_name, mode=None):

"""Create a hash cache in base dir, and set the file mode to mode."""

self.root = safe_unicode(root)

def __init__(self, basedir):

self.basedir = basedir

100

self.hit_count = 0

101

self.miss_count = 0

102

self.stat_count = 0

103

self.danger_count = 0

104

self.removed_count = 0

105

self.update_count = 0

106

self._cache = {}

107

self._mode = mode

108

self._cache_file_name = safe_unicode(cache_file_name)

109

110

def cache_file_name(self):

111

return self._cache_file_name

112

113

def clear(self):

114

"""Discard all cached information.

115

116

This does not reset the counters."""

117

if self._cache:

118

self.needs_write = True

119

self._cache = {}

120

121

def scan(self):

122

"""Scan all files and remove entries where the cache entry is obsolete.

123

124

Obsolete entries are those where the file has been modified or deleted

125

since the entry was inserted.

126

"""

127

# FIXME optimisation opportunity, on linux [and check other oses]:

128

# rather than iteritems order, stat in inode order.

129

prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]

130

prep.sort()

131

132

for inum, path, cache_entry in prep:

133

abspath = pathjoin(self.root, path)

134

fp = _fingerprint(abspath)

135

self.stat_count += 1

136

137

cache_fp = cache_entry[1]

138

139

if (not fp) or (cache_fp != fp):

140

# not here or not a regular file anymore

141

self.removed_count += 1

142

self.needs_write = True

143

del self._cache[path]

self._cache_sha1 = {}

144

145

146

def get_sha1(self, path):

147

"""Return the sha1 of a file.

"""Return the hex SHA-1 of the contents of the file at path.

XXX: If the file does not exist or is not a plain file???

148

"""

149

abspath = pathjoin(self.root, path)

import os, time

from bzrlib.osutils import sha_file

abspath = os.path.join(self.basedir, path)

100

fp = _fingerprint(abspath)

101

c = self._cache.get(path)

102

if c:

103

cache_sha1, cache_fp = c

104

else:

105

cache_sha1, cache_fp = None, None

106

150

107

self.stat_count += 1

151

file_fp = _fingerprint(abspath)

152

153

if not file_fp:

154

# not a regular file or not existing

155

if path in self._cache:

156

self.removed_count += 1

157

self.needs_write = True

158

del self._cache[path]

159

return None

160

161

if path in self._cache:

162

cache_sha1, cache_fp = self._cache[path]

163

else:

164

cache_sha1, cache_fp = None, None

165

166

if cache_fp == file_fp:

108

109

if not fp:

110

# not a regular file

111

return None

112

elif cache_fp and (cache_fp == fp):

167

113

self.hit_count += 1

168

114

return cache_sha1

169

170

self.miss_count += 1

171

172

173

mode = file_fp[FP_MODE_COLUMN]

174

if stat.S_ISREG(mode):

175

digest = sha_file(file(abspath, 'rb', buffering=65000))

176

elif stat.S_ISLNK(mode):

177

digest = sha.new(os.readlink(abspath)).hexdigest()

178

else:

179

raise BzrError("file %r: unknown file stat mode: %o"%(abspath,mode))

180

181

now = int(time.time())

182

if file_fp[FP_MTIME_COLUMN] >= now or file_fp[FP_CTIME_COLUMN] >= now:

183

# changed too recently; can't be cached. we can

184

# return the result and it could possibly be cached

185

# next time.

186

187

# the point is that we only want to cache when we are sure that any

188

# subsequent modifications of the file can be detected. If a

189

# modification neither changes the inode, the device, the size, nor

190

# the mode, then we can only distinguish it by time; therefore we

191

# need to let sufficient time elapse before we may cache this entry

192

# again. If we didn't do this, then, for example, a very quick 1

193

# byte replacement in the file might go undetected.

194

self.danger_count += 1

195

if cache_fp:

196

self.removed_count += 1

197

self.needs_write = True

198

del self._cache[path]

199

else:

200

self.update_count += 1

201

self.needs_write = True

202

self._cache[path] = (digest, file_fp)

203

return digest

204

205

def write(self):

115

else:

116

self.miss_count += 1

117

digest = sha_file(file(abspath, 'rb'))

118

119

now = int(time.time())

120

if fp[1] >= now or fp[2] >= now:

121

# changed too recently; can't be cached. we can

122

# return the result and it could possibly be cached

123

# next time.

124

self.danger_count += 1

125

if cache_fp:

126

del self._cache[path]

127

else:

128

self._cache[path] = (digest, fp)

129

130

return digest

131

132

133

134

def write(self, cachefn):

206

135

"""Write contents of cache to file."""

207

outf = AtomicFile(self.cache_file_name(), 'wb', new_mode=self._mode)

136

from atomicfile import AtomicFile

137

138

outf = AtomicFile(cachefn, 'wb')

208

139

try:

209

140

print >>outf, CACHE_HEADER,

210

141

216

147

for fld in c[1]:

217

148

print >>outf, "%d" % fld,

218

149

print >>outf

150

219

151

outf.commit()

220

self.needs_write = False

221

mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",

222

self.cache_file_name(), self.hit_count, self.miss_count,

223

self.stat_count,

224

self.danger_count, self.update_count)

225

152

finally:

226

153

if not outf.closed:

227

154

outf.abort()

228

229

def read(self):

155

156

157

158

def read(self, cachefn):

230

159

"""Reinstate cache from file.

231

160

232

161

Overwrites existing cache.

233

162

234

163

If the cache file has the wrong version marker, this just clears

235

164

the cache."""

165

from bzrlib.trace import mutter, warning

166

167

inf = file(cachefn, 'rb')

236

168

self._cache = {}

237

169

238

fn = self.cache_file_name()

239

try:

240

inf = file(fn, 'rb', buffering=65000)

241

except IOError, e:

242

mutter("failed to open %s: %s", fn, e)

243

# better write it now so it is valid

244

self.needs_write = True

245

return

246

247

248

170

hdr = inf.readline()

249

171

if hdr != CACHE_HEADER:

250

mutter('cache header marker not found at top of %s;'

251

' discarding cache', fn)

252

self.needs_write = True

172

mutter('cache header marker not found at top of %s; discarding cache'

173

% cachefn)

253

174

return

254

175

255

176

for l in inf:

261

182

262

183

pos += 3

263

184

fields = l[pos:].split(' ')

264

if len(fields) != 7:

185

if len(fields) != 6:

265

186

warning("bad line in hashcache: %r" % l)

266

187

continue

267

188

274

195

275

196

self._cache[path] = (sha1, fp)

276

197

277

self.needs_write = False

278

279

280

198

281

199

Older »