13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
# TODO: Up-front, stat all files in order and remove those which are deleted or
18
# out-of-date. Don't actually re-read them until they're needed. That ought
19
# to bring all the inodes into core so that future stats to them are fast, and
17
# TODO: Up-front, stat all files in order and remove those which are deleted or
18
# out-of-date. Don't actually re-read them until they're needed. That ought
19
# to bring all the inodes into core so that future stats to them are fast, and
20
20
# it preserves the nice property that any caller will always get up-to-date
21
21
# data except in unavoidable cases.
77
75
number of times files have been retrieved from the cache, avoiding a
81
79
number of misses (times files have been completely re-read)
83
81
needs_write = False
85
def __init__(self, root, cache_file_name, mode=None,
86
content_filter_stack_provider=None):
87
"""Create a hash cache in base dir, and set the file mode to mode.
89
:param content_filter_stack_provider: a function that takes a
90
path (relative to the top of the tree) and a file-id as
91
parameters and returns a stack of ContentFilters.
92
If None, no content filtering is performed.
94
if not isinstance(root, str):
95
raise ValueError("Base dir for hashcache must be text")
83
def __init__(self, root, cache_file_name, mode=None):
84
"""Create a hash cache in base dir, and set the file mode to mode."""
85
self.root = safe_unicode(root)
86
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
98
88
self.miss_count = 0
99
89
self.stat_count = 0
120
109
"""Scan all files and remove entries where the cache entry is obsolete.
122
111
Obsolete entries are those where the file has been modified or deleted
123
since the entry was inserted.
112
since the entry was inserted.
125
# Stat in inode order as optimisation for at least linux.
126
def inode_order(path_and_cache):
127
return path_and_cache[1][1][3]
128
for path, cache_val in sorted(self._cache.items(), key=inode_order):
129
abspath = osutils.pathjoin(self.root, path)
114
# FIXME optimisation opportunity, on linux [and check other oses]:
115
# rather than iteritems order, stat in inode order.
116
prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
119
for inum, path, cache_entry in prep:
120
abspath = pathjoin(self.root, path)
130
121
fp = self._fingerprint(abspath)
131
122
self.stat_count += 1
133
if not fp or cache_val[1] != fp:
124
cache_fp = cache_entry[1]
126
if (not fp) or (cache_fp != fp):
134
127
# not here or not a regular file anymore
135
128
self.removed_count += 1
136
129
self.needs_write = True
157
153
cache_sha1, cache_fp = None, None
159
155
if cache_fp == file_fp:
156
## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1)
157
## mutter("now = %s", time.time())
160
158
self.hit_count += 1
161
159
return cache_sha1
163
161
self.miss_count += 1
165
163
mode = file_fp[FP_MODE_COLUMN]
166
164
if stat.S_ISREG(mode):
167
if self._filter_provider is None:
170
filters = self._filter_provider(path=path)
171
digest = self._really_sha1_file(abspath, filters)
165
digest = self._really_sha1_file(abspath)
172
166
elif stat.S_ISLNK(mode):
173
target = osutils.readlink(abspath)
174
digest = osutils.sha_string(target.encode('UTF-8'))
167
digest = sha.new(os.readlink(abspath)).hexdigest()
176
raise errors.BzrError("file %r: unknown file stat mode: %o"
169
raise BzrError("file %r: unknown file stat mode: %o"%(abspath,mode))
179
171
# window of 3 seconds to allow for 2s resolution on windows,
180
172
# unsynchronized file servers, etc.
199
191
self.needs_write = True
200
192
del self._cache[path]
202
# mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
194
## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
203
195
## path, time.time(), file_fp[FP_MTIME_COLUMN],
204
# file_fp[FP_CTIME_COLUMN])
196
## file_fp[FP_CTIME_COLUMN])
205
197
self.update_count += 1
206
198
self.needs_write = True
207
199
self._cache[path] = (digest, file_fp)
210
def _really_sha1_file(self, abspath, filters):
202
def _really_sha1_file(self, abspath):
211
203
"""Calculate the SHA1 of a file by reading the full text"""
212
return _mod_filters.internal_size_sha_file_byname(abspath, filters)[1]
204
return sha_file(file(abspath, 'rb', buffering=65000))
215
207
"""Write contents of cache to file."""
216
with atomicfile.AtomicFile(self.cache_file_name(), 'wb',
217
new_mode=self._mode) as outf:
208
outf = AtomicFile(self.cache_file_name(), 'wb', new_mode=self._mode)
218
210
outf.write(CACHE_HEADER)
220
for path, c in self._cache.items():
221
line_info = [path.encode('utf-8'), b'// ', c[0], b' ']
222
line_info.append(b'%d %d %d %d %d %d' % c[1])
223
line_info.append(b'\n')
224
outf.write(b''.join(line_info))
212
for path, c in self._cache.iteritems():
213
assert '//' not in path, path
214
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
215
line_info.append(' '.join([str(fld) for fld in c[1]]))
216
line_info.append('\n')
217
outf.write(''.join(line_info))
225
219
self.needs_write = False
226
# mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
227
# self.cache_file_name(), self.hit_count, self.miss_count,
229
# self.danger_count, self.update_count)
220
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
221
## self.cache_file_name(), self.hit_count, self.miss_count,
223
## self.danger_count, self.update_count)
232
228
"""Reinstate cache from file.
234
230
Overwrites existing cache.
236
If the cache file has the wrong version marker, this just clears
232
If the cache file has the wrong version marker, this just clears
240
236
fn = self.cache_file_name()
242
inf = open(fn, 'rb', buffering=65000)
244
trace.mutter("failed to open %s: %s", fn, str(e))
238
inf = file(fn, 'rb', buffering=65000)
240
mutter("failed to open %s: %s", fn, e)
245
241
# better write it now so it is valid
246
242
self.needs_write = True
251
if hdr != CACHE_HEADER:
252
trace.mutter('cache header marker not found at top of %s;'
253
' discarding cache', fn)
254
self.needs_write = True
258
pos = l.index(b'// ')
259
path = l[:pos].decode('utf-8')
260
if path in self._cache:
261
trace.warning('duplicated path %r in cache' % path)
265
fields = l[pos:].split(b' ')
267
trace.warning("bad line in hashcache: %r" % l)
272
trace.warning("bad sha1 in hashcache: %r" % sha1)
275
fp = tuple(map(int, fields[1:]))
277
self._cache[path] = (sha1, fp)
246
if hdr != CACHE_HEADER:
247
mutter('cache header marker not found at top of %s;'
248
' discarding cache', fn)
249
self.needs_write = True
254
path = l[:pos].decode('utf-8')
255
if path in self._cache:
256
warning('duplicated path %r in cache' % path)
260
fields = l[pos:].split(' ')
262
warning("bad line in hashcache: %r" % l)
267
warning("bad sha1 in hashcache: %r" % sha1)
270
fp = tuple(map(long, fields[1:]))
272
self._cache[path] = (sha1, fp)
279
274
self.needs_write = False