99
95
parameters and returns a stack of ContentFilters.
100
96
If None, no content filtering is performed.
102
if not isinstance(root, text_type):
103
raise ValueError("Base dir for hashcache must be text")
98
self.root = osutils.safe_unicode(root)
99
self.root_utf8 = self.root.encode('utf8') # where is the filesystem encoding ?
105
100
self.hit_count = 0
106
101
self.miss_count = 0
107
102
self.stat_count = 0
130
125
Obsolete entries are those where the file has been modified or deleted
131
126
since the entry was inserted.
133
# Stat in inode order as optimisation for at least linux.
134
def inode_order(path_and_cache):
135
return path_and_cache[1][1][3]
136
for path, cache_val in sorted(viewitems(self._cache), key=inode_order):
128
# FIXME optimisation opportunity, on linux [and check other oses]:
129
# rather than iteritems order, stat in inode order.
130
prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
133
for inum, path, cache_entry in prep:
137
134
abspath = osutils.pathjoin(self.root, path)
138
135
fp = self._fingerprint(abspath)
139
136
self.stat_count += 1
141
if not fp or cache_val[1] != fp:
138
cache_fp = cache_entry[1]
140
if (not fp) or (cache_fp != fp):
142
141
# not here or not a regular file anymore
143
142
self.removed_count += 1
144
143
self.needs_write = True
147
146
def get_sha1(self, path, stat_value=None):
148
147
"""Return the sha1 of a file.
150
abspath = osutils.pathjoin(self.root, path)
149
if path.__class__ is str:
150
abspath = osutils.pathjoin(self.root_utf8, path)
152
abspath = osutils.pathjoin(self.root, path)
151
153
self.stat_count += 1
152
154
file_fp = self._fingerprint(abspath, stat_value)
180
182
filters = self._filter_provider(path=path, file_id=None)
181
183
digest = self._really_sha1_file(abspath, filters)
182
184
elif stat.S_ISLNK(mode):
183
target = osutils.readlink(abspath)
185
target = osutils.readlink(osutils.safe_unicode(abspath))
184
186
digest = osutils.sha_string(target.encode('UTF-8'))
186
188
raise errors.BzrError("file %r: unknown file stat mode: %o"
229
231
outf.write(CACHE_HEADER)
231
for path, c in viewitems(self._cache):
232
line_info = [path.encode('utf-8'), b'// ', c[0], b' ']
233
line_info.append(b'%d %d %d %d %d %d' % c[1])
234
line_info.append(b'\n')
235
outf.write(b''.join(line_info))
233
for path, c in self._cache.iteritems():
234
line_info = [path.encode('utf-8'), '// ', c[0], ' ']
235
line_info.append(' '.join([str(fld) for fld in c[1]]))
236
line_info.append('\n')
237
outf.write(''.join(line_info))
237
239
self.needs_write = False
238
240
## mutter("write hash cache: %s hits=%d misses=%d stat=%d recent=%d updates=%d",
254
256
fn = self.cache_file_name()
256
inf = open(fn, 'rb', buffering=65000)
258
trace.mutter("failed to open %s: %s", fn, str(e))
258
inf = file(fn, 'rb', buffering=65000)
260
trace.mutter("failed to open %s: %s", fn, e)
259
261
# better write it now so it is valid
260
262
self.needs_write = True
265
if hdr != CACHE_HEADER:
266
trace.mutter('cache header marker not found at top of %s;'
267
' discarding cache', fn)
268
self.needs_write = True
272
pos = l.index(b'// ')
273
path = l[:pos].decode('utf-8')
274
if path in self._cache:
275
trace.warning('duplicated path %r in cache' % path)
279
fields = l[pos:].split(b' ')
281
trace.warning("bad line in hashcache: %r" % l)
286
trace.warning("bad sha1 in hashcache: %r" % sha1)
289
fp = tuple(map(int, fields[1:]))
291
self._cache[path] = (sha1, fp)
266
if hdr != CACHE_HEADER:
267
trace.mutter('cache header marker not found at top of %s;'
268
' discarding cache', fn)
269
self.needs_write = True
274
path = l[:pos].decode('utf-8')
275
if path in self._cache:
276
trace.warning('duplicated path %r in cache' % path)
280
fields = l[pos:].split(' ')
282
trace.warning("bad line in hashcache: %r" % l)
287
trace.warning("bad sha1 in hashcache: %r" % sha1)
290
fp = tuple(map(long, fields[1:]))
292
self._cache[path] = (sha1, fp)
294
# GZ 2009-09-20: Should really use a try/finally block to ensure close
293
297
self.needs_write = False
312
316
# we discard any high precision because it's not reliable; perhaps we
313
317
# could do better on some systems?
314
return (stat_value.st_size, int(stat_value.st_mtime),
315
int(stat_value.st_ctime), stat_value.st_ino,
318
return (stat_value.st_size, long(stat_value.st_mtime),
319
long(stat_value.st_ctime), stat_value.st_ino,
316
320
stat_value.st_dev, stat_value.st_mode)