1
# Copyright (C) 2005-2010 Canonical Ltd
1
# Copyright (C) 2005, 2006 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
19
17
# TODO: Up-front, stat all files in order and remove those which are deleted or
20
18
# out-of-date. Don't actually re-read them until they're needed. That ought
21
19
# to bring all the inodes into core so that future stats to them are fast, and
89
85
needs_write = False
91
87
def __init__(self, root, cache_file_name, mode=None,
92
content_filter_stack_provider=None):
88
content_filter_stack_provider=None):
93
89
"""Create a hash cache in base dir, and set the file mode to mode.
95
91
:param content_filter_stack_provider: a function that takes a
97
93
parameters and returns a stack of ContentFilters.
98
94
If None, no content filtering is performed.
100
if not isinstance(root, text_type):
101
raise ValueError("Base dir for hashcache must be text")
96
self.root = osutils.safe_unicode(root)
97
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
103
98
self.hit_count = 0
104
99
self.miss_count = 0
105
100
self.stat_count = 0
128
123
Obsolete entries are those where the file has been modified or deleted
129
124
since the entry was inserted.
131
# Stat in inode order as optimisation for at least linux.
132
def inode_order(path_and_cache):
133
return path_and_cache[1][1][3]
134
for path, cache_val in sorted(viewitems(self._cache), key=inode_order):
126
# FIXME optimisation opportunity, on linux [and check other oses]:
127
# rather than iteritems order, stat in inode order.
128
prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
131
for inum, path, cache_entry in prep:
135
132
abspath = osutils.pathjoin(self.root, path)
136
133
fp = self._fingerprint(abspath)
137
134
self.stat_count += 1
139
if not fp or cache_val[1] != fp:
136
cache_fp = cache_entry[1]
138
if (not fp) or (cache_fp != fp):
140
139
# not here or not a regular file anymore
141
140
self.removed_count += 1
142
141
self.needs_write = True
145
144
def get_sha1(self, path, stat_value=None):
146
145
"""Return the sha1 of a file.
148
abspath = osutils.pathjoin(self.root, path)
147
if path.__class__ is str:
148
abspath = osutils.pathjoin(self.root_utf8, path)
150
abspath = osutils.pathjoin(self.root, path)
149
151
self.stat_count += 1
150
152
file_fp = self._fingerprint(abspath, stat_value)
163
165
cache_sha1, cache_fp = None, None
165
167
if cache_fp == file_fp:
168
## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1)
169
## mutter("now = %s", time.time())
166
170
self.hit_count += 1
167
171
return cache_sha1
173
177
if self._filter_provider is None:
176
filters = self._filter_provider(path=path)
180
filters = self._filter_provider(path=path, file_id=None)
177
181
digest = self._really_sha1_file(abspath, filters)
178
182
elif stat.S_ISLNK(mode):
179
target = osutils.readlink(abspath)
183
target = osutils.readlink(osutils.safe_unicode(abspath))
180
184
digest = osutils.sha_string(target.encode('UTF-8'))
182
186
raise errors.BzrError("file %r: unknown file stat mode: %o"
205
209
self.needs_write = True
206
210
del self._cache[path]
208
# mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
212
## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
209
213
## path, time.time(), file_fp[FP_MTIME_COLUMN],
210
# file_fp[FP_CTIME_COLUMN])
214
## file_fp[FP_CTIME_COLUMN])
211
215
self.update_count += 1
212
216
self.needs_write = True
213
217
self._cache[path] = (digest, file_fp)
221
225
"""Write contents of cache to file."""
222
with atomicfile.AtomicFile(self.cache_file_name(), 'wb',
223
new_mode=self._mode) as outf:
226
outf = atomicfile.AtomicFile(self.cache_file_name(), 'wb',
224
229
outf.write(CACHE_HEADER)
226
for path, c in viewitems(self._cache):
227
line_info = [path.encode('utf-8'), b'// ', c[0], b' ']
228
line_info.append(b'%d %d %d %d %d %d' % c[1])
229
line_info.append(b'\n')
230
outf.write(b''.join(line_info))
231
for path, c in self._cache.iteritems():
232
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
233
line_info.append(' '.join([str(fld) for fld in c[1]]))
234
line_info.append('\n')
235
outf.write(''.join(line_info))
231
237
self.needs_write = False
232
# mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
233
# self.cache_file_name(), self.hit_count, self.miss_count,
235
# self.danger_count, self.update_count)
238
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
239
## self.cache_file_name(), self.hit_count, self.miss_count,
241
## self.danger_count, self.update_count)
238
246
"""Reinstate cache from file.
246
254
fn = self.cache_file_name()
248
inf = open(fn, 'rb', buffering=65000)
250
trace.mutter("failed to open %s: %s", fn, str(e))
256
inf = file(fn, 'rb', buffering=65000)
258
trace.mutter("failed to open %s: %s", fn, e)
251
259
# better write it now so it is valid
252
260
self.needs_write = True
257
if hdr != CACHE_HEADER:
258
trace.mutter('cache header marker not found at top of %s;'
259
' discarding cache', fn)
260
self.needs_write = True
264
pos = l.index(b'// ')
265
path = l[:pos].decode('utf-8')
266
if path in self._cache:
267
trace.warning('duplicated path %r in cache' % path)
271
fields = l[pos:].split(b' ')
273
trace.warning("bad line in hashcache: %r" % l)
278
trace.warning("bad sha1 in hashcache: %r" % sha1)
281
fp = tuple(map(int, fields[1:]))
283
self._cache[path] = (sha1, fp)
264
if hdr != CACHE_HEADER:
265
trace.mutter('cache header marker not found at top of %s;'
266
' discarding cache', fn)
267
self.needs_write = True
272
path = l[:pos].decode('utf-8')
273
if path in self._cache:
274
trace.warning('duplicated path %r in cache' % path)
278
fields = l[pos:].split(' ')
280
trace.warning("bad line in hashcache: %r" % l)
285
trace.warning("bad sha1 in hashcache: %r" % sha1)
288
fp = tuple(map(long, fields[1:]))
290
self._cache[path] = (sha1, fp)
292
# GZ 2009-09-20: Should really use a try/finally block to ensure close
285
295
self.needs_write = False
304
314
# we discard any high precision because it's not reliable; perhaps we
305
315
# could do better on some systems?
306
return (stat_value.st_size, int(stat_value.st_mtime),
307
int(stat_value.st_ctime), stat_value.st_ino,
316
return (stat_value.st_size, long(stat_value.st_mtime),
317
long(stat_value.st_ctime), stat_value.st_ino,
308
318
stat_value.st_dev, stat_value.st_mode)