bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
2993.1.1
by Robert Collins
 * New module ``lru_cache`` providing a cache for use by tasks that need  | 
1  | 
# Copyright (C) 2006 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
"""A simple least-recently-used (LRU) cache."""
 | 
|
18  | 
||
19  | 
from collections import deque  | 
|
20  | 
import gc  | 
|
21  | 
||
22  | 
||
23  | 
class LRUCache(object):  | 
|
24  | 
"""A class which manages a cache of entries, removing unused ones."""  | 
|
25  | 
||
26  | 
def __init__(self, max_cache=100, after_cleanup_size=None):  | 
|
27  | 
self._max_cache = max_cache  | 
|
28  | 
if after_cleanup_size is None:  | 
|
29  | 
self._after_cleanup_size = self._max_cache  | 
|
30  | 
else:  | 
|
31  | 
self._after_cleanup_size = min(after_cleanup_size, self._max_cache)  | 
|
32  | 
||
33  | 
self._compact_queue_length = 4*self._max_cache  | 
|
34  | 
||
35  | 
self._cache = {}  | 
|
36  | 
self._cleanup = {}  | 
|
37  | 
self._queue = deque() # Track when things are accessed  | 
|
38  | 
self._refcount = {} # number of entries in self._queue for each key  | 
|
39  | 
||
40  | 
def __contains__(self, key):  | 
|
41  | 
return key in self._cache  | 
|
42  | 
||
43  | 
def __getitem__(self, key):  | 
|
44  | 
val = self._cache[key]  | 
|
45  | 
self._record_access(key)  | 
|
46  | 
return val  | 
|
47  | 
||
48  | 
def __len__(self):  | 
|
49  | 
return len(self._cache)  | 
|
50  | 
||
51  | 
def add(self, key, value, cleanup=None):  | 
|
52  | 
"""Add a new value to the cache.  | 
|
53  | 
||
54  | 
        Also, if the entry is ever removed from the queue, call cleanup.
 | 
|
55  | 
        Passing it the key and value being removed.
 | 
|
56  | 
||
57  | 
        :param key: The key to store it under
 | 
|
58  | 
        :param value: The object to store
 | 
|
59  | 
        :param cleanup: None or a function taking (key, value) to indicate
 | 
|
60  | 
                        'value' sohuld be cleaned up.
 | 
|
61  | 
        """
 | 
|
62  | 
if key in self._cache:  | 
|
63  | 
self._remove(key)  | 
|
64  | 
self._cache[key] = value  | 
|
65  | 
self._cleanup[key] = cleanup  | 
|
66  | 
self._record_access(key)  | 
|
67  | 
||
68  | 
if len(self._cache) > self._max_cache:  | 
|
69  | 
            # Trigger the cleanup
 | 
|
70  | 
self.cleanup()  | 
|
71  | 
||
| 
2998.2.1
by John Arbash Meinel
 Implement LRUCache.get() which acts like dict.get()  | 
72  | 
def get(self, key, default=None):  | 
73  | 
if key in self._cache:  | 
|
74  | 
return self[key]  | 
|
75  | 
return default  | 
|
76  | 
||
| 
2993.1.1
by Robert Collins
 * New module ``lru_cache`` providing a cache for use by tasks that need  | 
77  | 
def cleanup(self):  | 
78  | 
"""Clear the cache until it shrinks to the requested size.  | 
|
79  | 
||
80  | 
        This does not completely wipe the cache, just makes sure it is under
 | 
|
81  | 
        the after_cleanup_size.
 | 
|
82  | 
        """
 | 
|
83  | 
        # Make sure the cache is shrunk to the correct size
 | 
|
84  | 
while len(self._cache) > self._after_cleanup_size:  | 
|
85  | 
self._remove_lru()  | 
|
86  | 
||
87  | 
def __setitem__(self, key, value):  | 
|
88  | 
"""Add a value to the cache, there will be no cleanup function."""  | 
|
89  | 
self.add(key, value, cleanup=None)  | 
|
90  | 
||
91  | 
def _record_access(self, key):  | 
|
92  | 
"""Record that key was accessed."""  | 
|
93  | 
self._queue.append(key)  | 
|
94  | 
        # Can't use setdefault because you can't += 1 the result
 | 
|
95  | 
self._refcount[key] = self._refcount.get(key, 0) + 1  | 
|
96  | 
||
97  | 
        # If our access queue is too large, clean it up too
 | 
|
98  | 
if len(self._queue) > self._compact_queue_length:  | 
|
99  | 
self._compact_queue()  | 
|
100  | 
||
101  | 
def _compact_queue(self):  | 
|
102  | 
"""Compact the queue, leaving things in sorted last appended order."""  | 
|
103  | 
new_queue = deque()  | 
|
104  | 
for item in self._queue:  | 
|
105  | 
if self._refcount[item] == 1:  | 
|
106  | 
new_queue.append(item)  | 
|
107  | 
else:  | 
|
108  | 
self._refcount[item] -= 1  | 
|
109  | 
self._queue = new_queue  | 
|
110  | 
        # All entries should be of the same size. There should be one entry in
 | 
|
111  | 
        # queue for each entry in cache, and all refcounts should == 1
 | 
|
112  | 
assert (len(self._queue) == len(self._cache) ==  | 
|
113  | 
len(self._refcount) == sum(self._refcount.itervalues()))  | 
|
114  | 
||
115  | 
def _remove(self, key):  | 
|
116  | 
"""Remove an entry, making sure to maintain the invariants."""  | 
|
117  | 
cleanup = self._cleanup.pop(key)  | 
|
118  | 
val = self._cache.pop(key)  | 
|
119  | 
if cleanup is not None:  | 
|
120  | 
cleanup(key, val)  | 
|
121  | 
return val  | 
|
122  | 
||
123  | 
def _remove_lru(self):  | 
|
124  | 
"""Remove one entry from the lru, and handle consequences.  | 
|
125  | 
||
126  | 
        If there are no more references to the lru, then this entry should be
 | 
|
127  | 
        removed from the cache.
 | 
|
128  | 
        """
 | 
|
129  | 
key = self._queue.popleft()  | 
|
130  | 
self._refcount[key] -= 1  | 
|
131  | 
if not self._refcount[key]:  | 
|
132  | 
del self._refcount[key]  | 
|
133  | 
self._remove(key)  | 
|
134  | 
||
135  | 
def clear(self):  | 
|
136  | 
"""Clear out all of the cache."""  | 
|
137  | 
        # Clean up in LRU order
 | 
|
138  | 
while self._cache:  | 
|
139  | 
self._remove_lru()  | 
|
140  | 
||
141  | 
||
142  | 
class LRUSizeCache(LRUCache):  | 
|
143  | 
"""An LRUCache that removes things based on the size of the values.  | 
|
144  | 
||
145  | 
    This differs in that it doesn't care how many actual items there are,
 | 
|
146  | 
    it just restricts the cache to be cleaned up after so much data is stored.
 | 
|
147  | 
||
148  | 
    The values that are added must support len(value).
 | 
|
149  | 
    """
 | 
|
150  | 
||
151  | 
def __init__(self, max_size=1024*1024, after_cleanup_size=None,  | 
|
152  | 
compute_size=None):  | 
|
153  | 
"""Create a new LRUSizeCache.  | 
|
154  | 
||
155  | 
        :param max_size: The max number of bytes to store before we start
 | 
|
156  | 
            clearing out entries.
 | 
|
157  | 
        :param after_cleanup_size: After cleaning up, shrink everything to this
 | 
|
158  | 
            size.
 | 
|
159  | 
        :param compute_size: A function to compute the size of the values. We
 | 
|
160  | 
            use a function here, so that you can pass 'len' if you are just
 | 
|
161  | 
            using simple strings, or a more complex function if you are using
 | 
|
162  | 
            something like a list of strings, or even a custom object.
 | 
|
163  | 
            The function should take the form "compute_size(value) => integer".
 | 
|
164  | 
            If not supplied, it defaults to 'len()'
 | 
|
165  | 
        """
 | 
|
166  | 
        # This approximates that texts are > 0.5k in size. It only really
 | 
|
167  | 
        # effects when we clean up the queue, so we don't want it to be too
 | 
|
168  | 
        # large.
 | 
|
169  | 
LRUCache.__init__(self, max_cache=int(max_size/512))  | 
|
170  | 
self._max_size = max_size  | 
|
171  | 
if after_cleanup_size is None:  | 
|
172  | 
self._after_cleanup_size = self._max_size  | 
|
173  | 
else:  | 
|
174  | 
self._after_cleanup_size = min(after_cleanup_size, self._max_size)  | 
|
175  | 
||
176  | 
self._value_size = 0  | 
|
177  | 
self._compute_size = compute_size  | 
|
178  | 
if compute_size is None:  | 
|
179  | 
self._compute_size = len  | 
|
180  | 
||
181  | 
def add(self, key, value, cleanup=None):  | 
|
182  | 
"""Add a new value to the cache.  | 
|
183  | 
||
184  | 
        Also, if the entry is ever removed from the queue, call cleanup.
 | 
|
185  | 
        Passing it the key and value being removed.
 | 
|
186  | 
||
187  | 
        :param key: The key to store it under
 | 
|
188  | 
        :param value: The object to store
 | 
|
189  | 
        :param cleanup: None or a function taking (key, value) to indicate
 | 
|
190  | 
                        'value' sohuld be cleaned up.
 | 
|
191  | 
        """
 | 
|
192  | 
if key in self._cache:  | 
|
193  | 
self._remove(key)  | 
|
194  | 
value_len = self._compute_size(value)  | 
|
195  | 
if value_len >= self._after_cleanup_size:  | 
|
196  | 
            return
 | 
|
197  | 
self._value_size += value_len  | 
|
198  | 
self._cache[key] = value  | 
|
199  | 
self._cleanup[key] = cleanup  | 
|
200  | 
self._record_access(key)  | 
|
201  | 
||
202  | 
if self._value_size > self._max_size:  | 
|
203  | 
            # Time to cleanup
 | 
|
204  | 
self.cleanup()  | 
|
205  | 
||
206  | 
def cleanup(self):  | 
|
207  | 
"""Clear the cache until it shrinks to the requested size.  | 
|
208  | 
||
209  | 
        This does not completely wipe the cache, just makes sure it is under
 | 
|
210  | 
        the after_cleanup_size.
 | 
|
211  | 
        """
 | 
|
212  | 
        # Make sure the cache is shrunk to the correct size
 | 
|
213  | 
while self._value_size > self._after_cleanup_size:  | 
|
214  | 
self._remove_lru()  | 
|
215  | 
||
216  | 
def _remove(self, key):  | 
|
217  | 
"""Remove an entry, making sure to maintain the invariants."""  | 
|
218  | 
val = LRUCache._remove(self, key)  | 
|
219  | 
self._value_size -= self._compute_size(val)  |