bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
362
by Martin Pool
 - Import stat-cache code  | 
1  | 
# (C) 2005 Canonical Ltd
 | 
2  | 
||
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
||
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
||
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
import stat, os, sha, time  | 
|
18  | 
from binascii import b2a_qp, a2b_qp  | 
|
19  | 
||
20  | 
from trace import mutter  | 
|
| 
428
by Martin Pool
 - Use AtomicFile to update statcache.  | 
21  | 
from errors import BzrError  | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
22  | 
|
23  | 
||
| 
427
by Martin Pool
 - statcache docs  | 
24  | 
"""File stat cache to speed up tree comparisons.
 | 
25  | 
||
26  | 
This module basically gives a quick way to find the SHA-1 and related
 | 
|
27  | 
information of a file in the working directory, without actually
 | 
|
28  | 
reading and hashing the whole file.
 | 
|
29  | 
||
30  | 
This is done by maintaining a cache indexed by a file fingerprint of
 | 
|
31  | 
(path, size, mtime, ctime, ino, dev) pointing to the SHA-1.  If the
 | 
|
32  | 
fingerprint has changed, we assume the file content has not changed
 | 
|
33  | 
either and the SHA-1 is therefore the same.
 | 
|
34  | 
||
35  | 
If any of the fingerprint fields have changed then the file content
 | 
|
36  | 
*may* have changed, or it may not have.  We need to reread the file
 | 
|
37  | 
contents to make sure, but this is not visible to the user or
 | 
|
38  | 
higher-level code (except as a delay of course).
 | 
|
39  | 
||
40  | 
The mtime and ctime are stored with nanosecond fields, but not all
 | 
|
41  | 
filesystems give this level of precision.  There is therefore a
 | 
|
42  | 
possible race: the file might be modified twice within a second
 | 
|
43  | 
without changing the size or mtime, and a SHA-1 cached from the first
 | 
|
44  | 
version would be wrong.  We handle this by not recording a cached hash
 | 
|
45  | 
for any files which were modified in the current second and that
 | 
|
46  | 
therefore have the chance to change again before the second is up.
 | 
|
47  | 
||
48  | 
The only known hole in this design is if the system clock jumps
 | 
|
49  | 
backwards crossing invocations of bzr.  Please don't do that; use ntp
 | 
|
50  | 
to gradually adjust your clock or don't use bzr over the step.
 | 
|
51  | 
||
52  | 
At the moment this is stored in a simple textfile; it might be nice
 | 
|
53  | 
to use a tdb instead.
 | 
|
| 
434
by Martin Pool
 doc  | 
54  | 
|
55  | 
The cache is represented as a map from file_id to a tuple of (file_id,
 | 
|
56  | 
sha1, path, size, mtime, ctime, ino, dev).
 | 
|
| 
427
by Martin Pool
 - statcache docs  | 
57  | 
"""
 | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
58  | 
|
59  | 
||
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
60  | 
FP_SIZE = 0  | 
61  | 
FP_MTIME = 1  | 
|
62  | 
FP_CTIME = 2  | 
|
63  | 
FP_INO = 3  | 
|
64  | 
FP_DEV = 4  | 
|
65  | 
||
| 
362
by Martin Pool
 - Import stat-cache code  | 
66  | 
|
| 
437
by Martin Pool
 - new command 'bzr modified' to exercise the statcache  | 
67  | 
SC_FILE_ID = 0  | 
68  | 
SC_SHA1 = 1  | 
|
69  | 
||
70  | 
||
| 
458
by Martin Pool
 - fix statcache update from subdirectories  | 
71  | 
def fingerprint(abspath):  | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
72  | 
try:  | 
73  | 
fs = os.lstat(abspath)  | 
|
74  | 
except OSError:  | 
|
75  | 
        # might be missing, etc
 | 
|
76  | 
return None  | 
|
77  | 
||
78  | 
if stat.S_ISDIR(fs.st_mode):  | 
|
79  | 
return None  | 
|
80  | 
||
81  | 
return (fs.st_size, fs.st_mtime,  | 
|
82  | 
fs.st_ctime, fs.st_ino, fs.st_dev)  | 
|
83  | 
||
84  | 
||
| 
453
by Martin Pool
 - Split WorkingTree into its own file  | 
85  | 
def _write_cache(basedir, entry_iter, dangerfiles):  | 
| 
428
by Martin Pool
 - Use AtomicFile to update statcache.  | 
86  | 
from atomicfile import AtomicFile  | 
| 
453
by Martin Pool
 - Split WorkingTree into its own file  | 
87  | 
|
88  | 
cachefn = os.path.join(basedir, '.bzr', 'stat-cache')  | 
|
89  | 
outf = AtomicFile(cachefn, 'wb', 'utf-8')  | 
|
| 
428
by Martin Pool
 - Use AtomicFile to update statcache.  | 
90  | 
try:  | 
91  | 
for entry in entry_iter:  | 
|
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
92  | 
if entry[0] in dangerfiles:  | 
93  | 
                continue
 | 
|
| 
428
by Martin Pool
 - Use AtomicFile to update statcache.  | 
94  | 
outf.write(entry[0] + ' ' + entry[1] + ' ')  | 
95  | 
outf.write(b2a_qp(entry[2], True))  | 
|
96  | 
outf.write(' %d %d %d %d %d\n' % entry[3:])  | 
|
| 
362
by Martin Pool
 - Import stat-cache code  | 
97  | 
|
| 
428
by Martin Pool
 - Use AtomicFile to update statcache.  | 
98  | 
outf.commit()  | 
99  | 
finally:  | 
|
100  | 
if not outf.closed:  | 
|
101  | 
outf.abort()  | 
|
| 
362
by Martin Pool
 - Import stat-cache code  | 
102  | 
|
103  | 
||
| 
453
by Martin Pool
 - Split WorkingTree into its own file  | 
104  | 
def load_cache(basedir):  | 
105  | 
import codecs  | 
|
106  | 
||
| 
362
by Martin Pool
 - Import stat-cache code  | 
107  | 
cache = {}  | 
108  | 
||
109  | 
try:  | 
|
| 
453
by Martin Pool
 - Split WorkingTree into its own file  | 
110  | 
cachefn = os.path.join(basedir, '.bzr', 'stat-cache')  | 
111  | 
cachefile = codecs.open(cachefn, 'r', 'utf-8')  | 
|
| 
362
by Martin Pool
 - Import stat-cache code  | 
112  | 
except IOError:  | 
113  | 
return cache  | 
|
114  | 
||
115  | 
for l in cachefile:  | 
|
116  | 
f = l.split(' ')  | 
|
117  | 
file_id = f[0]  | 
|
118  | 
if file_id in cache:  | 
|
119  | 
raise BzrError("duplicated file_id in cache: {%s}" % file_id)  | 
|
120  | 
cache[file_id] = (f[0], f[1], a2b_qp(f[2])) + tuple([long(x) for x in f[3:]])  | 
|
121  | 
return cache  | 
|
122  | 
||
123  | 
||
124  | 
||
125  | 
||
126  | 
def _files_from_inventory(inv):  | 
|
127  | 
for path, ie in inv.iter_entries():  | 
|
128  | 
if ie.kind != 'file':  | 
|
129  | 
            continue
 | 
|
130  | 
yield ie.file_id, path  | 
|
131  | 
||
132  | 
||
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
133  | 
|
| 
453
by Martin Pool
 - Split WorkingTree into its own file  | 
134  | 
def update_cache(basedir, inv, flush=False):  | 
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
135  | 
"""Update and return the cache for the branch.  | 
136  | 
||
137  | 
    The returned cache may contain entries that have not been written
 | 
|
138  | 
    to disk for files recently touched.
 | 
|
139  | 
||
140  | 
    flush -- discard any previous cache and recalculate from scratch.
 | 
|
141  | 
    """
 | 
|
142  | 
||
| 
362
by Martin Pool
 - Import stat-cache code  | 
143  | 
|
144  | 
    # TODO: It's supposed to be faster to stat the files in order by inum.
 | 
|
145  | 
    # We don't directly know the inum of the files of course but we do
 | 
|
146  | 
    # know where they were last sighted, so we can sort by that.
 | 
|
147  | 
||
| 
438
by Martin Pool
 - Avoid calling Inventory.iter_entries() when finding modified  | 
148  | 
assert isinstance(flush, bool)  | 
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
149  | 
if flush:  | 
150  | 
cache = {}  | 
|
151  | 
else:  | 
|
| 
453
by Martin Pool
 - Split WorkingTree into its own file  | 
152  | 
cache = load_cache(basedir)  | 
153  | 
return _update_cache_from_list(basedir, cache, _files_from_inventory(inv))  | 
|
154  | 
||
155  | 
||
156  | 
||
157  | 
def _update_cache_from_list(basedir, cache, to_update):  | 
|
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
158  | 
"""Update and return the cache for given files.  | 
159  | 
||
160  | 
    cache -- Previously cached values to be validated.
 | 
|
161  | 
||
162  | 
    to_update -- Sequence of (file_id, path) pairs to check.
 | 
|
| 
362
by Martin Pool
 - Import stat-cache code  | 
163  | 
    """
 | 
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
164  | 
|
165  | 
from sets import Set  | 
|
166  | 
||
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
167  | 
stat_cnt = missing_cnt = hardcheck = change_cnt = 0  | 
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
168  | 
|
169  | 
    # files that have been recently touched and can't be
 | 
|
170  | 
    # committed to a persistent cache yet.
 | 
|
171  | 
||
172  | 
dangerfiles = Set()  | 
|
173  | 
now = int(time.time())  | 
|
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
174  | 
|
| 
458
by Martin Pool
 - fix statcache update from subdirectories  | 
175  | 
    ## mutter('update statcache under %r' % basedir)
 | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
176  | 
for file_id, path in to_update:  | 
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
177  | 
abspath = os.path.join(basedir, path)  | 
| 
458
by Martin Pool
 - fix statcache update from subdirectories  | 
178  | 
fp = fingerprint(abspath)  | 
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
179  | 
stat_cnt += 1  | 
180  | 
||
| 
362
by Martin Pool
 - Import stat-cache code  | 
181  | 
cacheentry = cache.get(file_id)  | 
182  | 
||
183  | 
if fp == None: # not here  | 
|
184  | 
if cacheentry:  | 
|
185  | 
del cache[file_id]  | 
|
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
186  | 
change_cnt += 1  | 
187  | 
missing_cnt += 1  | 
|
| 
362
by Martin Pool
 - Import stat-cache code  | 
188  | 
            continue
 | 
189  | 
||
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
190  | 
if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):  | 
191  | 
dangerfiles.add(file_id)  | 
|
192  | 
||
| 
362
by Martin Pool
 - Import stat-cache code  | 
193  | 
if cacheentry and (cacheentry[3:] == fp):  | 
194  | 
continue # all stat fields unchanged  | 
|
195  | 
||
196  | 
hardcheck += 1  | 
|
197  | 
||
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
198  | 
dig = sha.new(file(abspath, 'rb').read()).hexdigest()  | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
199  | 
|
200  | 
if cacheentry == None or dig != cacheentry[1]:  | 
|
201  | 
            # if there was no previous entry for this file, or if the
 | 
|
202  | 
            # SHA has changed, then update the cache
 | 
|
203  | 
cacheentry = (file_id, dig, path) + fp  | 
|
204  | 
cache[file_id] = cacheentry  | 
|
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
205  | 
change_cnt += 1  | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
206  | 
|
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
207  | 
mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '  | 
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
208  | 
'%d in cache'  | 
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
209  | 
% (stat_cnt, hardcheck, change_cnt, len(dangerfiles), len(cache)))  | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
210  | 
|
| 
457
by Martin Pool
 - more trace and profiling in statcache  | 
211  | 
if change_cnt:  | 
| 
436
by Martin Pool
 - Avoid dangerous files when writing out stat cache  | 
212  | 
mutter('updating on-disk statcache')  | 
| 
453
by Martin Pool
 - Split WorkingTree into its own file  | 
213  | 
_write_cache(basedir, cache.itervalues(), dangerfiles)  | 
| 
362
by Martin Pool
 - Import stat-cache code  | 
214  | 
|
215  | 
return cache  |