bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
362
by Martin Pool
 - Import stat-cache code  | 
1  | 
# (C) 2005 Canonical Ltd
 | 
2  | 
||
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
||
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
||
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
import stat, os, sha, time  | 
|
18  | 
from binascii import b2a_qp, a2b_qp  | 
|
19  | 
||
20  | 
from trace import mutter  | 
|
21  | 
||
22  | 
||
23  | 
# file fingerprints are: (path, size, mtime, ctime, ino, dev).
 | 
|
24  | 
#
 | 
|
25  | 
# if this is the same for this file as in the previous revision, we
 | 
|
26  | 
# assume the content is the same and the SHA-1 is the same.
 | 
|
27  | 
||
28  | 
# This is stored in a fingerprint file that also contains the file-id
 | 
|
29  | 
# and the content SHA-1.
 | 
|
30  | 
||
31  | 
# Thus for any given file we can quickly get the SHA-1, either from
 | 
|
32  | 
# the cache or if the cache is out of date.
 | 
|
33  | 
||
34  | 
# At the moment this is stored in a simple textfile; it might be nice
 | 
|
35  | 
# to use a tdb instead.
 | 
|
36  | 
||
37  | 
||
38  | 
# What we need:
 | 
|
39  | 
||
40  | 
# build a new cache from scratch
 | 
|
41  | 
# load cache, incrementally update it
 | 
|
42  | 
||
43  | 
# TODO: Have a paranoid mode where we always compare the texts and
 | 
|
44  | 
# always recalculate the digest, to trap modification without stat
 | 
|
45  | 
# change and SHA collisions.
 | 
|
46  | 
||
47  | 
||
48  | 
||
49  | 
def fingerprint(path, abspath):  | 
|
50  | 
try:  | 
|
51  | 
fs = os.lstat(abspath)  | 
|
52  | 
except OSError:  | 
|
53  | 
        # might be missing, etc
 | 
|
54  | 
return None  | 
|
55  | 
||
56  | 
if stat.S_ISDIR(fs.st_mode):  | 
|
57  | 
return None  | 
|
58  | 
||
59  | 
return (fs.st_size, fs.st_mtime,  | 
|
60  | 
fs.st_ctime, fs.st_ino, fs.st_dev)  | 
|
61  | 
||
62  | 
||
63  | 
def write_cache(branch, entry_iter):  | 
|
64  | 
outf = branch.controlfile('work-cache.tmp', 'wt')  | 
|
65  | 
for entry in entry_iter:  | 
|
66  | 
outf.write(entry[0] + ' ' + entry[1] + ' ')  | 
|
67  | 
outf.write(b2a_qp(entry[2], True))  | 
|
68  | 
outf.write(' %d %d %d %d %d\n' % entry[3:])  | 
|
69  | 
||
70  | 
outf.close()  | 
|
71  | 
os.rename(branch.controlfilename('work-cache.tmp'),  | 
|
72  | 
branch.controlfilename('work-cache'))  | 
|
73  | 
||
74  | 
||
75  | 
||
76  | 
def load_cache(branch):  | 
|
77  | 
cache = {}  | 
|
78  | 
||
79  | 
try:  | 
|
80  | 
cachefile = branch.controlfile('work-cache', 'rt')  | 
|
81  | 
except IOError:  | 
|
82  | 
return cache  | 
|
83  | 
||
84  | 
for l in cachefile:  | 
|
85  | 
f = l.split(' ')  | 
|
86  | 
file_id = f[0]  | 
|
87  | 
if file_id in cache:  | 
|
88  | 
raise BzrError("duplicated file_id in cache: {%s}" % file_id)  | 
|
89  | 
cache[file_id] = (f[0], f[1], a2b_qp(f[2])) + tuple([long(x) for x in f[3:]])  | 
|
90  | 
return cache  | 
|
91  | 
||
92  | 
||
93  | 
||
94  | 
||
95  | 
def _files_from_inventory(inv):  | 
|
96  | 
for path, ie in inv.iter_entries():  | 
|
97  | 
if ie.kind != 'file':  | 
|
98  | 
            continue
 | 
|
99  | 
yield ie.file_id, path  | 
|
100  | 
||
101  | 
||
102  | 
def build_cache(branch):  | 
|
103  | 
inv = branch.read_working_inventory()  | 
|
104  | 
||
105  | 
cache = {}  | 
|
106  | 
_update_cache_from_list(branch, cache, _files_from_inventory(inv))  | 
|
107  | 
||
108  | 
||
109  | 
||
110  | 
def update_cache(branch, inv):  | 
|
111  | 
    # TODO: It's supposed to be faster to stat the files in order by inum.
 | 
|
112  | 
    # We don't directly know the inum of the files of course but we do
 | 
|
113  | 
    # know where they were last sighted, so we can sort by that.
 | 
|
114  | 
||
115  | 
cache = load_cache(branch)  | 
|
116  | 
return _update_cache_from_list(branch, cache, _files_from_inventory(inv))  | 
|
117  | 
||
118  | 
||
119  | 
||
120  | 
def _update_cache_from_list(branch, cache, to_update):  | 
|
121  | 
"""Update the cache to have info on the named files.  | 
|
122  | 
||
123  | 
    to_update is a sequence of (file_id, path) pairs.
 | 
|
124  | 
    """
 | 
|
125  | 
hardcheck = dirty = 0  | 
|
126  | 
for file_id, path in to_update:  | 
|
127  | 
fap = branch.abspath(path)  | 
|
128  | 
fp = fingerprint(fap, path)  | 
|
129  | 
cacheentry = cache.get(file_id)  | 
|
130  | 
||
131  | 
if fp == None: # not here  | 
|
132  | 
if cacheentry:  | 
|
133  | 
del cache[file_id]  | 
|
134  | 
dirty += 1  | 
|
135  | 
            continue
 | 
|
136  | 
||
137  | 
if cacheentry and (cacheentry[3:] == fp):  | 
|
138  | 
continue # all stat fields unchanged  | 
|
139  | 
||
140  | 
hardcheck += 1  | 
|
141  | 
||
142  | 
dig = sha.new(file(fap, 'rb').read()).hexdigest()  | 
|
143  | 
||
144  | 
if cacheentry == None or dig != cacheentry[1]:  | 
|
145  | 
            # if there was no previous entry for this file, or if the
 | 
|
146  | 
            # SHA has changed, then update the cache
 | 
|
147  | 
cacheentry = (file_id, dig, path) + fp  | 
|
148  | 
cache[file_id] = cacheentry  | 
|
149  | 
dirty += 1  | 
|
150  | 
||
151  | 
mutter('work cache: read %d files, %d changed' % (hardcheck, dirty))  | 
|
152  | 
||
153  | 
if dirty:  | 
|
154  | 
write_cache(branch, cache.itervalues())  | 
|
155  | 
||
156  | 
return cache  |