1
# Copyright (C) 2005-2010 Canonical Ltd
1
# Copyright (C) 2005, 2006 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
83
85
needs_write = False
85
87
def __init__(self, root, cache_file_name, mode=None,
86
content_filter_stack_provider=None):
88
content_filter_stack_provider=None):
87
89
"""Create a hash cache in base dir, and set the file mode to mode.
89
91
:param content_filter_stack_provider: a function that takes a
91
93
parameters and returns a stack of ContentFilters.
92
94
If None, no content filtering is performed.
94
if not isinstance(root, str):
95
raise ValueError("Base dir for hashcache must be text")
96
self.root = osutils.safe_unicode(root)
97
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
98
99
self.miss_count = 0
99
100
self.stat_count = 0
122
123
Obsolete entries are those where the file has been modified or deleted
123
124
since the entry was inserted.
125
# Stat in inode order as optimisation for at least linux.
126
def inode_order(path_and_cache):
127
return path_and_cache[1][1][3]
128
for path, cache_val in sorted(self._cache.items(), key=inode_order):
126
# FIXME optimisation opportunity, on linux [and check other oses]:
127
# rather than iteritems order, stat in inode order.
128
prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
131
for inum, path, cache_entry in prep:
129
132
abspath = osutils.pathjoin(self.root, path)
130
133
fp = self._fingerprint(abspath)
131
134
self.stat_count += 1
133
if not fp or cache_val[1] != fp:
136
cache_fp = cache_entry[1]
138
if (not fp) or (cache_fp != fp):
134
139
# not here or not a regular file anymore
135
140
self.removed_count += 1
136
141
self.needs_write = True
139
144
def get_sha1(self, path, stat_value=None):
140
145
"""Return the sha1 of a file.
142
abspath = osutils.pathjoin(self.root, path)
147
if path.__class__ is str:
148
abspath = osutils.pathjoin(self.root_utf8, path)
150
abspath = osutils.pathjoin(self.root, path)
143
151
self.stat_count += 1
144
152
file_fp = self._fingerprint(abspath, stat_value)
157
165
cache_sha1, cache_fp = None, None
159
167
if cache_fp == file_fp:
168
## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1)
169
## mutter("now = %s", time.time())
160
170
self.hit_count += 1
161
171
return cache_sha1
167
177
if self._filter_provider is None:
170
filters = self._filter_provider(path=path)
180
filters = self._filter_provider(path=path, file_id=None)
171
181
digest = self._really_sha1_file(abspath, filters)
172
182
elif stat.S_ISLNK(mode):
173
target = osutils.readlink(abspath)
183
target = osutils.readlink(osutils.safe_unicode(abspath))
174
184
digest = osutils.sha_string(target.encode('UTF-8'))
176
186
raise errors.BzrError("file %r: unknown file stat mode: %o"
199
209
self.needs_write = True
200
210
del self._cache[path]
202
# mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
212
## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
203
213
## path, time.time(), file_fp[FP_MTIME_COLUMN],
204
# file_fp[FP_CTIME_COLUMN])
214
## file_fp[FP_CTIME_COLUMN])
205
215
self.update_count += 1
206
216
self.needs_write = True
207
217
self._cache[path] = (digest, file_fp)
215
225
"""Write contents of cache to file."""
216
with atomicfile.AtomicFile(self.cache_file_name(), 'wb',
217
new_mode=self._mode) as outf:
226
outf = atomicfile.AtomicFile(self.cache_file_name(), 'wb',
218
229
outf.write(CACHE_HEADER)
220
for path, c in self._cache.items():
221
line_info = [path.encode('utf-8'), b'// ', c[0], b' ']
222
line_info.append(b'%d %d %d %d %d %d' % c[1])
223
line_info.append(b'\n')
224
outf.write(b''.join(line_info))
231
for path, c in self._cache.iteritems():
232
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
233
line_info.append(' '.join([str(fld) for fld in c[1]]))
234
line_info.append('\n')
235
outf.write(''.join(line_info))
225
237
self.needs_write = False
226
# mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
227
# self.cache_file_name(), self.hit_count, self.miss_count,
229
# self.danger_count, self.update_count)
238
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
239
## self.cache_file_name(), self.hit_count, self.miss_count,
241
## self.danger_count, self.update_count)
232
246
"""Reinstate cache from file.
240
254
fn = self.cache_file_name()
242
inf = open(fn, 'rb', buffering=65000)
244
trace.mutter("failed to open %s: %s", fn, str(e))
256
inf = file(fn, 'rb', buffering=65000)
258
trace.mutter("failed to open %s: %s", fn, e)
245
259
# better write it now so it is valid
246
260
self.needs_write = True
251
if hdr != CACHE_HEADER:
252
trace.mutter('cache header marker not found at top of %s;'
253
' discarding cache', fn)
254
self.needs_write = True
258
pos = l.index(b'// ')
259
path = l[:pos].decode('utf-8')
260
if path in self._cache:
261
trace.warning('duplicated path %r in cache' % path)
265
fields = l[pos:].split(b' ')
267
trace.warning("bad line in hashcache: %r" % l)
272
trace.warning("bad sha1 in hashcache: %r" % sha1)
275
fp = tuple(map(int, fields[1:]))
277
self._cache[path] = (sha1, fp)
264
if hdr != CACHE_HEADER:
265
trace.mutter('cache header marker not found at top of %s;'
266
' discarding cache', fn)
267
self.needs_write = True
272
path = l[:pos].decode('utf-8')
273
if path in self._cache:
274
trace.warning('duplicated path %r in cache' % path)
278
fields = l[pos:].split(' ')
280
trace.warning("bad line in hashcache: %r" % l)
285
trace.warning("bad sha1 in hashcache: %r" % sha1)
288
fp = tuple(map(long, fields[1:]))
290
self._cache[path] = (sha1, fp)
279
292
self.needs_write = False
298
311
# we discard any high precision because it's not reliable; perhaps we
299
312
# could do better on some systems?
300
return (stat_value.st_size, int(stat_value.st_mtime),
301
int(stat_value.st_ctime), stat_value.st_ino,
313
return (stat_value.st_size, long(stat_value.st_mtime),
314
long(stat_value.st_ctime), stat_value.st_ino,
302
315
stat_value.st_dev, stat_value.st_mode)