1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005-2010 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
85
83
needs_write = False
87
85
def __init__(self, root, cache_file_name, mode=None,
88
content_filter_stack_provider=None):
86
content_filter_stack_provider=None):
89
87
"""Create a hash cache in base dir, and set the file mode to mode.
91
89
:param content_filter_stack_provider: a function that takes a
93
91
parameters and returns a stack of ContentFilters.
94
92
If None, no content filtering is performed.
96
self.root = osutils.safe_unicode(root)
97
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
94
if not isinstance(root, str):
95
raise ValueError("Base dir for hashcache must be text")
99
98
self.miss_count = 0
100
99
self.stat_count = 0
123
122
Obsolete entries are those where the file has been modified or deleted
124
123
since the entry was inserted.
126
# FIXME optimisation opportunity, on linux [and check other oses]:
127
# rather than iteritems order, stat in inode order.
128
prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
131
for inum, path, cache_entry in prep:
125
# Stat in inode order as optimisation for at least linux.
126
def inode_order(path_and_cache):
127
return path_and_cache[1][1][3]
128
for path, cache_val in sorted(self._cache.items(), key=inode_order):
132
129
abspath = osutils.pathjoin(self.root, path)
133
130
fp = self._fingerprint(abspath)
134
131
self.stat_count += 1
136
cache_fp = cache_entry[1]
138
if (not fp) or (cache_fp != fp):
133
if not fp or cache_val[1] != fp:
139
134
# not here or not a regular file anymore
140
135
self.removed_count += 1
141
136
self.needs_write = True
144
139
def get_sha1(self, path, stat_value=None):
145
140
"""Return the sha1 of a file.
147
if path.__class__ is str:
148
abspath = osutils.pathjoin(self.root_utf8, path)
150
abspath = osutils.pathjoin(self.root, path)
142
abspath = osutils.pathjoin(self.root, path)
151
143
self.stat_count += 1
152
144
file_fp = self._fingerprint(abspath, stat_value)
165
157
cache_sha1, cache_fp = None, None
167
159
if cache_fp == file_fp:
168
## mutter("hashcache hit for %s %r -> %s", path, file_fp, cache_sha1)
169
## mutter("now = %s", time.time())
170
160
self.hit_count += 1
171
161
return cache_sha1
177
167
if self._filter_provider is None:
180
filters = self._filter_provider(path=path, file_id=None)
170
filters = self._filter_provider(path=path)
181
171
digest = self._really_sha1_file(abspath, filters)
182
172
elif stat.S_ISLNK(mode):
183
target = osutils.readlink(osutils.safe_unicode(abspath))
173
target = osutils.readlink(abspath)
184
174
digest = osutils.sha_string(target.encode('UTF-8'))
186
176
raise errors.BzrError("file %r: unknown file stat mode: %o"
209
199
self.needs_write = True
210
200
del self._cache[path]
212
## mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
202
# mutter('%r added to cache: now=%f, mtime=%d, ctime=%d',
213
203
## path, time.time(), file_fp[FP_MTIME_COLUMN],
214
## file_fp[FP_CTIME_COLUMN])
204
# file_fp[FP_CTIME_COLUMN])
215
205
self.update_count += 1
216
206
self.needs_write = True
217
207
self._cache[path] = (digest, file_fp)
225
215
"""Write contents of cache to file."""
226
outf = atomicfile.AtomicFile(self.cache_file_name(), 'wb',
216
with atomicfile.AtomicFile(self.cache_file_name(), 'wb',
217
new_mode=self._mode) as outf:
229
218
outf.write(CACHE_HEADER)
231
for path, c in self._cache.iteritems():
232
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
233
line_info.append(' '.join([str(fld) for fld in c[1]]))
234
line_info.append('\n')
235
outf.write(''.join(line_info))
220
for path, c in self._cache.items():
221
line_info = [path.encode('utf-8'), b'// ', c[0], b' ']
222
line_info.append(b'%d %d %d %d %d %d' % c[1])
223
line_info.append(b'\n')
224
outf.write(b''.join(line_info))
237
225
self.needs_write = False
238
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
239
## self.cache_file_name(), self.hit_count, self.miss_count,
241
## self.danger_count, self.update_count)
226
# mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
227
# self.cache_file_name(), self.hit_count, self.miss_count,
229
# self.danger_count, self.update_count)
246
232
"""Reinstate cache from file.
254
240
fn = self.cache_file_name()
256
inf = file(fn, 'rb', buffering=65000)
258
trace.mutter("failed to open %s: %s", fn, e)
242
inf = open(fn, 'rb', buffering=65000)
244
trace.mutter("failed to open %s: %s", fn, str(e))
259
245
# better write it now so it is valid
260
246
self.needs_write = True
264
if hdr != CACHE_HEADER:
265
trace.mutter('cache header marker not found at top of %s;'
266
' discarding cache', fn)
267
self.needs_write = True
272
path = l[:pos].decode('utf-8')
273
if path in self._cache:
274
trace.warning('duplicated path %r in cache' % path)
278
fields = l[pos:].split(' ')
280
trace.warning("bad line in hashcache: %r" % l)
285
trace.warning("bad sha1 in hashcache: %r" % sha1)
288
fp = tuple(map(long, fields[1:]))
290
self._cache[path] = (sha1, fp)
251
if hdr != CACHE_HEADER:
252
trace.mutter('cache header marker not found at top of %s;'
253
' discarding cache', fn)
254
self.needs_write = True
258
pos = l.index(b'// ')
259
path = l[:pos].decode('utf-8')
260
if path in self._cache:
261
trace.warning('duplicated path %r in cache' % path)
265
fields = l[pos:].split(b' ')
267
trace.warning("bad line in hashcache: %r" % l)
272
trace.warning("bad sha1 in hashcache: %r" % sha1)
275
fp = tuple(map(int, fields[1:]))
277
self._cache[path] = (sha1, fp)
292
279
self.needs_write = False
311
298
# we discard any high precision because it's not reliable; perhaps we
312
299
# could do better on some systems?
313
return (stat_value.st_size, long(stat_value.st_mtime),
314
long(stat_value.st_ctime), stat_value.st_ino,
300
return (stat_value.st_size, int(stat_value.st_mtime),
301
int(stat_value.st_ctime), stat_value.st_ino,
315
302
stat_value.st_dev, stat_value.st_mode)