14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
17
19
# TODO: Up-front, stat all files in order and remove those which are deleted or
18
20
# out-of-date. Don't actually re-read them until they're needed. That ought
19
21
# to bring all the inodes into core so that future stats to them are fast, and
83
87
needs_write = False
85
89
def __init__(self, root, cache_file_name, mode=None,
86
content_filter_stack_provider=None):
90
content_filter_stack_provider=None):
87
91
"""Create a hash cache in base dir, and set the file mode to mode.
89
93
:param content_filter_stack_provider: a function that takes a
91
95
parameters and returns a stack of ContentFilters.
92
96
If None, no content filtering is performed.
94
if not isinstance(root, str):
95
raise ValueError("Base dir for hashcache must be text")
98
self.root = osutils.safe_unicode(root)
99
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
97
100
self.hit_count = 0
98
101
self.miss_count = 0
99
102
self.stat_count = 0
122
125
Obsolete entries are those where the file has been modified or deleted
123
126
since the entry was inserted.
125
# Stat in inode order as optimisation for at least linux.
126
def inode_order(path_and_cache):
127
return path_and_cache[1][1][3]
128
for path, cache_val in sorted(self._cache.items(), key=inode_order):
128
# FIXME optimisation opportunity, on linux [and check other oses]:
129
# rather than iteritems order, stat in inode order.
130
prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
133
for inum, path, cache_entry in prep:
129
134
abspath = osutils.pathjoin(self.root, path)
130
135
fp = self._fingerprint(abspath)
131
136
self.stat_count += 1
133
if not fp or cache_val[1] != fp:
138
cache_fp = cache_entry[1]
140
if (not fp) or (cache_fp != fp):
134
141
# not here or not a regular file anymore
135
142
self.removed_count += 1
136
143
self.needs_write = True
139
146
def get_sha1(self, path, stat_value=None):
140
147
"""Return the sha1 of a file.
142
abspath = osutils.pathjoin(self.root, path)
149
if path.__class__ is str:
150
abspath = osutils.pathjoin(self.root_utf8, path)
152
abspath = osutils.pathjoin(self.root, path)
143
153
self.stat_count += 1
144
154
file_fp = self._fingerprint(abspath, stat_value)
157
167
cache_sha1, cache_fp = None, None
159
169
if cache_fp == file_fp:
170
## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1)
171
## mutter("now = %s", time.time())
160
172
self.hit_count += 1
161
173
return cache_sha1
167
179
if self._filter_provider is None:
170
filters = self._filter_provider(path=path)
182
filters = self._filter_provider(path=path, file_id=None)
171
183
digest = self._really_sha1_file(abspath, filters)
172
184
elif stat.S_ISLNK(mode):
173
target = osutils.readlink(abspath)
185
target = osutils.readlink(osutils.safe_unicode(abspath))
174
186
digest = osutils.sha_string(target.encode('UTF-8'))
176
188
raise errors.BzrError("file %r: unknown file stat mode: %o"
199
211
self.needs_write = True
200
212
del self._cache[path]
202
# mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
214
## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
203
215
## path, time.time(), file_fp[FP_MTIME_COLUMN],
204
# file_fp[FP_CTIME_COLUMN])
216
## file_fp[FP_CTIME_COLUMN])
205
217
self.update_count += 1
206
218
self.needs_write = True
207
219
self._cache[path] = (digest, file_fp)
215
227
"""Write contents of cache to file."""
216
with atomicfile.AtomicFile(self.cache_file_name(), 'wb',
217
new_mode=self._mode) as outf:
228
outf = atomicfile.AtomicFile(self.cache_file_name(), 'wb',
218
231
outf.write(CACHE_HEADER)
220
for path, c in self._cache.items():
221
line_info = [path.encode('utf-8'), b'// ', c[0], b' ']
222
line_info.append(b'%d %d %d %d %d %d' % c[1])
223
line_info.append(b'\n')
224
outf.write(b''.join(line_info))
233
for path, c in self._cache.iteritems():
234
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
235
line_info.append(' '.join([str(fld) for fld in c[1]]))
236
line_info.append('\n')
237
outf.write(''.join(line_info))
225
239
self.needs_write = False
226
# mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
227
# self.cache_file_name(), self.hit_count, self.miss_count,
229
# self.danger_count, self.update_count)
240
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
241
## self.cache_file_name(), self.hit_count, self.miss_count,
243
## self.danger_count, self.update_count)
232
248
"""Reinstate cache from file.
240
256
fn = self.cache_file_name()
242
inf = open(fn, 'rb', buffering=65000)
244
trace.mutter("failed to open %s: %s", fn, str(e))
258
inf = file(fn, 'rb', buffering=65000)
260
trace.mutter("failed to open %s: %s", fn, e)
245
261
# better write it now so it is valid
246
262
self.needs_write = True
251
if hdr != CACHE_HEADER:
252
trace.mutter('cache header marker not found at top of %s;'
253
' discarding cache', fn)
254
self.needs_write = True
258
pos = l.index(b'// ')
259
path = l[:pos].decode('utf-8')
260
if path in self._cache:
261
trace.warning('duplicated path %r in cache' % path)
265
fields = l[pos:].split(b' ')
267
trace.warning("bad line in hashcache: %r" % l)
272
trace.warning("bad sha1 in hashcache: %r" % sha1)
275
fp = tuple(map(int, fields[1:]))
277
self._cache[path] = (sha1, fp)
266
if hdr != CACHE_HEADER:
267
trace.mutter('cache header marker not found at top of %s;'
268
' discarding cache', fn)
269
self.needs_write = True
274
path = l[:pos].decode('utf-8')
275
if path in self._cache:
276
trace.warning('duplicated path %r in cache' % path)
280
fields = l[pos:].split(' ')
282
trace.warning("bad line in hashcache: %r" % l)
287
trace.warning("bad sha1 in hashcache: %r" % sha1)
290
fp = tuple(map(long, fields[1:]))
292
self._cache[path] = (sha1, fp)
294
# GZ 2009-09-20: Should really use a try/finally block to ensure close
279
297
self.needs_write = False
298
316
# we discard any high precision because it's not reliable; perhaps we
299
317
# could do better on some systems?
300
return (stat_value.st_size, int(stat_value.st_mtime),
301
int(stat_value.st_ctime), stat_value.st_ino,
318
return (stat_value.st_size, long(stat_value.st_mtime),
319
long(stat_value.st_ctime), stat_value.st_ino,
302
320
stat_value.st_dev, stat_value.st_mode)