1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005-2010 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
17
19
# TODO: Up-front, stat all files in order and remove those which are deleted or
18
20
# out-of-date. Don't actually re-read them until they're needed. That ought
19
21
# to bring all the inodes into core so that future stats to them are fast, and
85
89
needs_write = False
87
91
def __init__(self, root, cache_file_name, mode=None,
88
content_filter_stack_provider=None):
92
content_filter_stack_provider=None):
89
93
"""Create a hash cache in base dir, and set the file mode to mode.
91
95
:param content_filter_stack_provider: a function that takes a
93
97
parameters and returns a stack of ContentFilters.
94
98
If None, no content filtering is performed.
96
self.root = osutils.safe_unicode(root)
97
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
100
if not isinstance(root, text_type):
101
raise ValueError("Base dir for hashcache must be text")
98
103
self.hit_count = 0
99
104
self.miss_count = 0
100
105
self.stat_count = 0
123
128
Obsolete entries are those where the file has been modified or deleted
124
129
since the entry was inserted.
126
# FIXME optimisation opportunity, on linux [and check other oses]:
127
# rather than iteritems order, stat in inode order.
128
prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
131
for inum, path, cache_entry in prep:
131
# Stat in inode order as optimisation for at least linux.
132
def inode_order(path_and_cache):
133
return path_and_cache[1][1][3]
134
for path, cache_val in sorted(viewitems(self._cache), key=inode_order):
132
135
abspath = osutils.pathjoin(self.root, path)
133
136
fp = self._fingerprint(abspath)
134
137
self.stat_count += 1
136
cache_fp = cache_entry[1]
138
if (not fp) or (cache_fp != fp):
139
if not fp or cache_val[1] != fp:
139
140
# not here or not a regular file anymore
140
141
self.removed_count += 1
141
142
self.needs_write = True
144
145
def get_sha1(self, path, stat_value=None):
145
146
"""Return the sha1 of a file.
147
if path.__class__ is str:
148
abspath = osutils.pathjoin(self.root_utf8, path)
150
abspath = osutils.pathjoin(self.root, path)
148
abspath = osutils.pathjoin(self.root, path)
151
149
self.stat_count += 1
152
150
file_fp = self._fingerprint(abspath, stat_value)
165
163
cache_sha1, cache_fp = None, None
167
165
if cache_fp == file_fp:
168
## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1)
169
## mutter("now = %s", time.time())
170
166
self.hit_count += 1
171
167
return cache_sha1
177
173
if self._filter_provider is None:
180
filters = self._filter_provider(path=path, file_id=None)
176
filters = self._filter_provider(path=path)
181
177
digest = self._really_sha1_file(abspath, filters)
182
178
elif stat.S_ISLNK(mode):
183
target = osutils.readlink(osutils.safe_unicode(abspath))
179
target = osutils.readlink(abspath)
184
180
digest = osutils.sha_string(target.encode('UTF-8'))
186
182
raise errors.BzrError("file %r: unknown file stat mode: %o"
209
205
self.needs_write = True
210
206
del self._cache[path]
212
## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
208
# mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
213
209
## path, time.time(), file_fp[FP_MTIME_COLUMN],
214
## file_fp[FP_CTIME_COLUMN])
210
# file_fp[FP_CTIME_COLUMN])
215
211
self.update_count += 1
216
212
self.needs_write = True
217
213
self._cache[path] = (digest, file_fp)
225
221
"""Write contents of cache to file."""
226
outf = atomicfile.AtomicFile(self.cache_file_name(), 'wb',
222
with atomicfile.AtomicFile(self.cache_file_name(), 'wb',
223
new_mode=self._mode) as outf:
229
224
outf.write(CACHE_HEADER)
231
for path, c in self._cache.iteritems():
232
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
233
line_info.append(' '.join([str(fld) for fld in c[1]]))
234
line_info.append('\n')
235
outf.write(''.join(line_info))
226
for path, c in viewitems(self._cache):
227
line_info = [path.encode('utf-8'), b'// ', c[0], b' ']
228
line_info.append(b'%d %d %d %d %d %d' % c[1])
229
line_info.append(b'\n')
230
outf.write(b''.join(line_info))
237
231
self.needs_write = False
238
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
239
## self.cache_file_name(), self.hit_count, self.miss_count,
241
## self.danger_count, self.update_count)
232
# mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
233
# self.cache_file_name(), self.hit_count, self.miss_count,
235
# self.danger_count, self.update_count)
246
238
"""Reinstate cache from file.
254
246
fn = self.cache_file_name()
256
inf = file(fn, 'rb', buffering=65000)
258
trace.mutter("failed to open %s: %s", fn, e)
248
inf = open(fn, 'rb', buffering=65000)
250
trace.mutter("failed to open %s: %s", fn, str(e))
259
251
# better write it now so it is valid
260
252
self.needs_write = True
264
if hdr != CACHE_HEADER:
265
trace.mutter('cache header marker not found at top of %s;'
266
' discarding cache', fn)
267
self.needs_write = True
272
path = l[:pos].decode('utf-8')
273
if path in self._cache:
274
trace.warning('duplicated path %r in cache' % path)
278
fields = l[pos:].split(' ')
280
trace.warning("bad line in hashcache: %r" % l)
285
trace.warning("bad sha1 in hashcache: %r" % sha1)
288
fp = tuple(map(long, fields[1:]))
290
self._cache[path] = (sha1, fp)
257
if hdr != CACHE_HEADER:
258
trace.mutter('cache header marker not found at top of %s;'
259
' discarding cache', fn)
260
self.needs_write = True
264
pos = l.index(b'// ')
265
path = l[:pos].decode('utf-8')
266
if path in self._cache:
267
trace.warning('duplicated path %r in cache' % path)
271
fields = l[pos:].split(b' ')
273
trace.warning("bad line in hashcache: %r" % l)
278
trace.warning("bad sha1 in hashcache: %r" % sha1)
281
fp = tuple(map(int, fields[1:]))
283
self._cache[path] = (sha1, fp)
292
285
self.needs_write = False
311
304
# we discard any high precision because it's not reliable; perhaps we
312
305
# could do better on some systems?
313
return (stat_value.st_size, long(stat_value.st_mtime),
314
long(stat_value.st_ctime), stat_value.st_ino,
306
return (stat_value.st_size, int(stat_value.st_mtime),
307
int(stat_value.st_ctime), stat_value.st_ino,
315
308
stat_value.st_dev, stat_value.st_mode)