/brz/remove-bazaar : revision 856

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/hashcache.py

Committer: Martin Pool
Date: 2005-07-07 10:31:36 UTC
Revision ID: mbp@sourcefrog.net-20050707103135-9b4d911d8df6e880

- fix pwk help

files added:
bzrlib/changeset.py

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/statcache.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

files removed:
BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS.developers

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/plugins/__init__.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/emacs

contrib/emacs/bzr-mode.el

generate_docs.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/effbot/ => effbot/

bzrlib/util/elementtree/ => elementtree/

bzrlib/plugins/ => plugins/

bzrlib/tests/test_weave.py => tools/testweave.py

bzrlib/util/urlgrabber/ => urlgrabber/

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/convertfile.py

tools/convertinv.py

Show diffs side-by-side

added added

removed removed

bzrlib/hashcache.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# TODO: Up-front, stat all files in order and remove those which are deleted or

# out-of-date. Don't actually re-read them until they're needed. That ought

# to bring all the inodes into core so that future stats to them are fast, and

# it preserves the nice property that any caller will always get up-to-date

# data except in unavoidable cases.

# TODO: Perhaps return more details on the file to avoid statting it

# again: nonexistent, file type, size, etc

# TODO: Perhaps use a Python pickle instead of a text file; might be faster.

CACHE_HEADER = "### bzr hashcache v5\n"

import os, stat, time

import sha

from bzrlib.osutils import sha_file, pathjoin, safe_unicode

from bzrlib.trace import mutter, warning

from bzrlib.atomicfile import AtomicFile

from bzrlib.errors import BzrError

FP_MTIME_COLUMN = 1

FP_CTIME_COLUMN = 2

FP_MODE_COLUMN = 5

def _fingerprint(abspath):

import os, stat

try:

fs = os.lstat(abspath)

except OSError:

if stat.S_ISDIR(fs.st_mode):

return None

# we discard any high precision because it's not reliable; perhaps we

# could do better on some systems?

return (fs.st_size, long(fs.st_mtime),

long(fs.st_ctime), fs.st_ino, fs.st_dev, fs.st_mode)

return (fs.st_size, fs.st_mtime,

fs.st_ctime, fs.st_ino, fs.st_dev)

class HashCache(object):

This does not canonicalize the paths passed in; that should be

done by the caller.

_cache

Indexed by path, points to a two-tuple of the SHA-1 of the file.

and its fingerprint.

cache_sha1

Indexed by path, gives the SHA-1 of the file.

validator

Indexed by path, gives the fingerprint of the file last time it was read.

stat_count

number of times files have been statted

miss_count

number of misses (times files have been completely re-read)

"""

needs_write = False

def __init__(self, root, cache_file_name, mode=None):

"""Create a hash cache in base dir, and set the file mode to mode."""

self.root = safe_unicode(root)

def __init__(self, basedir):

self.basedir = basedir

100

self.hit_count = 0

101

self.miss_count = 0

102

self.stat_count = 0

103

self.danger_count = 0

104

self.removed_count = 0

105

self.update_count = 0

106

self._cache = {}

107

self._mode = mode

108

self._cache_file_name = safe_unicode(cache_file_name)

self.cache_sha1 = {}

self.validator = {}

109

110

def cache_file_name(self):

111

return self._cache_file_name

112

113

def clear(self):

114

"""Discard all cached information.

115

116

This does not reset the counters."""

117

if self._cache:

118

self.needs_write = True

119

self._cache = {}

120

121

def scan(self):

122

"""Scan all files and remove entries where the cache entry is obsolete.

123

124

Obsolete entries are those where the file has been modified or deleted

125

since the entry was inserted.

126

"""

127

# FIXME optimisation opportunity, on linux [and check other oses]:

128

# rather than iteritems order, stat in inode order.

129

prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]

130

prep.sort()

131

132

for inum, path, cache_entry in prep:

133

abspath = pathjoin(self.root, path)

134

fp = _fingerprint(abspath)

135

self.stat_count += 1

136

137

cache_fp = cache_entry[1]

138

139

if (not fp) or (cache_fp != fp):

140

# not here or not a regular file anymore

141

self.removed_count += 1

142

self.needs_write = True

143

del self._cache[path]

"""Discard all cached information."""

self.validator = {}

self.cache_sha1 = {}

144

145

146

def get_sha1(self, path):

147

"""Return the sha1 of a file.

"""Return the hex SHA-1 of the contents of the file at path.

XXX: If the file does not exist or is not a plain file???

148

"""

149

abspath = pathjoin(self.root, path)

import os, time

from bzrlib.osutils import sha_file

abspath = os.path.join(self.basedir, path)

fp = _fingerprint(abspath)

cache_fp = self.validator.get(path)

100

150

101

self.stat_count += 1

151

file_fp = _fingerprint(abspath)

152

153

if not file_fp:

154

# not a regular file or not existing

155

if path in self._cache:

156

self.removed_count += 1

157

self.needs_write = True

158

del self._cache[path]

159

return None

160

161

if path in self._cache:

162

cache_sha1, cache_fp = self._cache[path]

163

else:

164

cache_sha1, cache_fp = None, None

165

166

if cache_fp == file_fp:

102

103

if not fp:

104

# not a regular file

105

return None

106

elif cache_fp and (cache_fp == fp):

167

107

self.hit_count += 1

168

return cache_sha1

169

170

self.miss_count += 1

171

172

173

mode = file_fp[FP_MODE_COLUMN]

174

if stat.S_ISREG(mode):

175

digest = sha_file(file(abspath, 'rb', buffering=65000))

176

elif stat.S_ISLNK(mode):

177

digest = sha.new(os.readlink(abspath)).hexdigest()

178

else:

179

raise BzrError("file %r: unknown file stat mode: %o"%(abspath,mode))

180

181

now = int(time.time())

182

if file_fp[FP_MTIME_COLUMN] >= now or file_fp[FP_CTIME_COLUMN] >= now:

183

# changed too recently; can't be cached. we can

184

# return the result and it could possibly be cached

185

# next time.

186

187

# the point is that we only want to cache when we are sure that any

188

# subsequent modifications of the file can be detected. If a

189

# modification neither changes the inode, the device, the size, nor

190

# the mode, then we can only distinguish it by time; therefore we

191

# need to let sufficient time elapse before we may cache this entry

192

# again. If we didn't do this, then, for example, a very quick 1

193

# byte replacement in the file might go undetected.

194

self.danger_count += 1

195

if cache_fp:

196

self.removed_count += 1

197

self.needs_write = True

198

del self._cache[path]

199

else:

200

self.update_count += 1

201

self.needs_write = True

202

self._cache[path] = (digest, file_fp)

203

return digest

204

205

def write(self):

206

"""Write contents of cache to file."""

207

outf = AtomicFile(self.cache_file_name(), 'wb', new_mode=self._mode)

208

try:

209

print >>outf, CACHE_HEADER,

210

211

for path, c in self._cache.iteritems():

212

assert '//' not in path, path

213

outf.write(path.encode('utf-8'))

214

outf.write('// ')

215

print >>outf, c[0], # hex sha1

216

for fld in c[1]:

217

print >>outf, "%d" % fld,

218

print >>outf

219

outf.commit()

220

self.needs_write = False

221

mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",

222

self.cache_file_name(), self.hit_count, self.miss_count,

223

self.stat_count,

224

self.danger_count, self.update_count)

225

finally:

226

if not outf.closed:

227

outf.abort()

228

229

def read(self):

230

"""Reinstate cache from file.

231

232

Overwrites existing cache.

233

234

If the cache file has the wrong version marker, this just clears

235

the cache."""

236

self._cache = {}

237

238

fn = self.cache_file_name()

239

try:

240

inf = file(fn, 'rb', buffering=65000)

241

except IOError, e:

242

mutter("failed to open %s: %s", fn, e)

243

# better write it now so it is valid

244

self.needs_write = True

245

return

246

247

248

hdr = inf.readline()

249

if hdr != CACHE_HEADER:

250

mutter('cache header marker not found at top of %s;'

251

' discarding cache', fn)

252

self.needs_write = True

253

return

254

255

for l in inf:

256

pos = l.index('// ')

257

path = l[:pos].decode('utf-8')

258

if path in self._cache:

259

warning('duplicated path %r in cache' % path)

260

continue

261

262

pos += 3

263

fields = l[pos:].split(' ')

264

if len(fields) != 7:

265

warning("bad line in hashcache: %r" % l)

266

continue

267

268

sha1 = fields[0]

269

if len(sha1) != 40:

270

warning("bad sha1 in hashcache: %r" % sha1)

271

continue

272

273

fp = tuple(map(long, fields[1:]))

274

275

self._cache[path] = (sha1, fp)

276

277

self.needs_write = False

278

279

280

281

108

return self.cache_sha1[path]

109

else:

110

self.miss_count += 1

111

digest = sha_file(file(abspath, 'rb'))

112

113

now = int(time.time())

114

if fp[1] >= now or fp[2] >= now:

115

# changed too recently; can't be cached. we can

116

# return the result and it could possibly be cached

117

# next time.

118

self.danger_count += 1

119

if cache_fp:

120

del self.validator[path]

121

del self.cache_sha1[path]

122

else:

123

self.validator[path] = fp

124

self.cache_sha1[path] = digest

125

126

return digest

127

Older »