bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
|
0.64.334
by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan. |
14 |
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
15 |
|
16 |
"""A manager of caches."""
|
|
17 |
||
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
18 |
import atexit |
19 |
import os |
|
20 |
import shutil |
|
21 |
import tempfile |
|
22 |
import weakref |
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
23 |
|
|
6628.1.2
by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata. |
24 |
from ... import lru_cache, trace |
25 |
from . import ( |
|
|
0.123.6
by Jelmer Vernooij
Split out reftracker. |
26 |
branch_mapper, |
27 |
)
|
|
|
6929.13.2
by Jelmer Vernooij
Remove functionality moved to fastimport. |
28 |
from fastimport.reftracker import ( |
|
0.64.349
by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2. |
29 |
RefTracker, |
30 |
)
|
|
|
6628.1.2
by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata. |
31 |
from .helpers import ( |
|
0.123.3
by Jelmer Vernooij
Fix some imports. |
32 |
single_plural, |
33 |
)
|
|
|
0.123.6
by Jelmer Vernooij
Split out reftracker. |
34 |
|
35 |
||
36 |
class _Cleanup(object): |
|
37 |
"""This class makes sure we clean up when CacheManager goes away. |
|
38 |
||
39 |
We use a helper class to ensure that we are never in a refcycle.
|
|
40 |
"""
|
|
41 |
||
42 |
def __init__(self, disk_blobs): |
|
43 |
self.disk_blobs = disk_blobs |
|
44 |
self.tempdir = None |
|
45 |
self.small_blobs = None |
|
46 |
||
47 |
def __del__(self): |
|
48 |
self.finalize() |
|
49 |
||
50 |
def finalize(self): |
|
51 |
if self.disk_blobs is not None: |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
52 |
for info in self.disk_blobs.values(): |
|
0.123.6
by Jelmer Vernooij
Split out reftracker. |
53 |
if info[-1] is not None: |
54 |
os.unlink(info[-1]) |
|
55 |
self.disk_blobs = None |
|
56 |
if self.small_blobs is not None: |
|
57 |
self.small_blobs.close() |
|
58 |
self.small_blobs = None |
|
59 |
if self.tempdir is not None: |
|
60 |
shutil.rmtree(self.tempdir) |
|
61 |
||
62 |
||
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
63 |
class CacheManager(object): |
|
0.123.6
by Jelmer Vernooij
Split out reftracker. |
64 |
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
65 |
_small_blob_threshold = 25 * 1024 |
66 |
_sticky_cache_size = 300 * 1024 * 1024 |
|
67 |
_sticky_flushed_size = 100 * 1024 * 1024 |
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
68 |
|
|
0.83.1
by Ian Clatworthy
head tracking tests and fix |
69 |
def __init__(self, info=None, verbose=False, inventory_cache_size=10): |
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
70 |
"""Create a manager of caches. |
71 |
||
72 |
:param info: a ConfigObj holding the output from
|
|
73 |
the --info processor, or None if no hints are available
|
|
74 |
"""
|
|
75 |
self.verbose = verbose |
|
76 |
||
77 |
# dataref -> data. datref is either :mark or the sha-1.
|
|
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
78 |
# Sticky blobs are referenced more than once, and are saved until their
|
79 |
# refcount goes to 0
|
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
80 |
self._blobs = {} |
81 |
self._sticky_blobs = {} |
|
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
82 |
self._sticky_memory_bytes = 0 |
83 |
# if we overflow our memory cache, then we will dump large blobs to
|
|
84 |
# disk in this directory
|
|
85 |
self._tempdir = None |
|
86 |
# id => (offset, n_bytes, fname)
|
|
87 |
# if fname is None, then the content is stored in the small file
|
|
88 |
self._disk_blobs = {} |
|
89 |
self._cleanup = _Cleanup(self._disk_blobs) |
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
90 |
|
91 |
# revision-id -> Inventory cache
|
|
92 |
# these are large and we probably don't need too many as
|
|
93 |
# most parents are recent in history
|
|
94 |
self.inventories = lru_cache.LRUCache(inventory_cache_size) |
|
95 |
||
96 |
# import commmit-ids -> revision-id lookup table
|
|
97 |
# we need to keep all of these but they are small
|
|
|
0.129.2
by Jelmer Vernooij
Use lookup functions for committish. |
98 |
self.marks = {} |
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
99 |
|
|
0.64.159
by Ian Clatworthy
make the file-id cache optional and branch-ref aware |
100 |
# (path, branch_ref) -> file-ids - as generated.
|
101 |
# (Use store_file_id/fetch_fileid methods rather than direct access.)
|
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
102 |
|
103 |
# Work out the blobs to make sticky - None means all
|
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
104 |
self._blob_ref_counts = {} |
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
105 |
if info is not None: |
106 |
try: |
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
107 |
blobs_by_counts = info['Blob reference counts'] |
108 |
# The parser hands values back as lists, already parsed
|
|
109 |
for count, blob_list in blobs_by_counts.items(): |
|
110 |
n = int(count) |
|
111 |
for b in blob_list: |
|
112 |
self._blob_ref_counts[b] = n |
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
113 |
except KeyError: |
114 |
# info not in file - possible when no blobs used
|
|
115 |
pass
|
|
116 |
||
|
0.112.4
by Max Bowsher
Store the BranchMapper in the CacheManager so it can be got from other places. |
117 |
# BranchMapper has no state (for now?), but we keep it around rather
|
118 |
# than reinstantiate on every usage
|
|
119 |
self.branch_mapper = branch_mapper.BranchMapper() |
|
120 |
||
|
0.123.6
by Jelmer Vernooij
Split out reftracker. |
121 |
self.reftracker = RefTracker() |
122 |
||
|
0.129.2
by Jelmer Vernooij
Use lookup functions for committish. |
123 |
def add_mark(self, mark, commit_id): |
|
7027.2.1
by Jelmer Vernooij
Port fastimport to python3. |
124 |
if mark.startswith(b':'): |
125 |
raise ValueError(mark) |
|
|
6846.3.1
by Jelmer Vernooij
Support '0' marker in fastimport plugin. |
126 |
is_new = (mark in self.marks) |
|
0.129.2
by Jelmer Vernooij
Use lookup functions for committish. |
127 |
self.marks[mark] = commit_id |
|
6846.3.1
by Jelmer Vernooij
Support '0' marker in fastimport plugin. |
128 |
return is_new |
|
0.129.2
by Jelmer Vernooij
Use lookup functions for committish. |
129 |
|
130 |
def lookup_committish(self, committish): |
|
131 |
"""Resolve a 'committish' to a revision id. |
|
132 |
||
133 |
:param committish: A "committish" string
|
|
134 |
:return: Bazaar revision id
|
|
135 |
"""
|
|
|
7027.2.1
by Jelmer Vernooij
Port fastimport to python3. |
136 |
if not committish.startswith(b':'): |
137 |
raise ValueError(committish) |
|
138 |
return self.marks[committish.lstrip(b':')] |
|
|
0.129.2
by Jelmer Vernooij
Use lookup functions for committish. |
139 |
|
|
0.64.153
by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode |
140 |
def dump_stats(self, note=trace.note): |
141 |
"""Dump some statistics about what we cached.""" |
|
142 |
# TODO: add in inventory stastistics
|
|
143 |
note("Cache statistics:") |
|
144 |
self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note) |
|
|
0.129.2
by Jelmer Vernooij
Use lookup functions for committish. |
145 |
self._show_stats_for(self.marks, "revision-ids", note=note) |
|
0.64.153
by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode |
146 |
# These aren't interesting so omit from the output, at least for now
|
147 |
#self._show_stats_for(self._blobs, "other blobs", note=note)
|
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
148 |
# self.reftracker.dump_stats(note=note)
|
|
0.64.153
by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode |
149 |
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
150 |
def _show_stats_for(self, a_dict, label, note, tuple_key=False): |
|
0.64.153
by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode |
151 |
"""Dump statistics about a given dictionary. |
152 |
||
153 |
By the key and value need to support len().
|
|
154 |
"""
|
|
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
155 |
count = len(a_dict) |
|
0.64.159
by Ian Clatworthy
make the file-id cache optional and branch-ref aware |
156 |
if tuple_key: |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
157 |
size = sum(map(len, (''.join(k) for k in a_dict))) |
|
0.64.159
by Ian Clatworthy
make the file-id cache optional and branch-ref aware |
158 |
else: |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
159 |
size = sum(map(len, a_dict)) |
160 |
size += sum(map(len, a_dict.values())) |
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
161 |
size = size * 1.0 / 1024 |
162 |
unit = 'K' |
|
163 |
if size > 1024: |
|
164 |
size = size / 1024 |
|
165 |
unit = 'M' |
|
166 |
if size > 1024: |
|
167 |
size = size / 1024 |
|
168 |
unit = 'G' |
|
169 |
note(" %-12s: %8.1f %s (%d %s)" % (label, size, unit, count, |
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
170 |
single_plural(count, "item", "items"))) |
|
0.64.153
by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode |
171 |
|
172 |
def clear_all(self): |
|
173 |
"""Free up any memory used by the caches.""" |
|
174 |
self._blobs.clear() |
|
175 |
self._sticky_blobs.clear() |
|
|
0.129.2
by Jelmer Vernooij
Use lookup functions for committish. |
176 |
self.marks.clear() |
|
0.123.6
by Jelmer Vernooij
Split out reftracker. |
177 |
self.reftracker.clear() |
|
0.64.153
by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode |
178 |
self.inventories.clear() |
179 |
||
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
180 |
def _flush_blobs_to_disk(self): |
|
6656.1.1
by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers |
181 |
blobs = list(self._sticky_blobs) |
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
182 |
sticky_blobs = self._sticky_blobs |
183 |
total_blobs = len(sticky_blobs) |
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
184 |
blobs.sort(key=lambda k: len(sticky_blobs[k])) |
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
185 |
if self._tempdir is None: |
|
0.123.3
by Jelmer Vernooij
Fix some imports. |
186 |
tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-') |
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
187 |
self._tempdir = tempdir |
188 |
self._cleanup.tempdir = self._tempdir |
|
189 |
self._cleanup.small_blobs = tempfile.TemporaryFile( |
|
190 |
prefix='small-blobs-', dir=self._tempdir) |
|
191 |
small_blob_ref = weakref.ref(self._cleanup.small_blobs) |
|
192 |
# Even though we add it to _Cleanup it seems that the object can be
|
|
193 |
# destroyed 'too late' for cleanup to actually occur. Probably a
|
|
194 |
# combination of bzr's "die directly, don't clean up" and how
|
|
195 |
# exceptions close the running stack.
|
|
|
7143.15.2
by Jelmer Vernooij
Run autopep8. |
196 |
|
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
197 |
def exit_cleanup(): |
198 |
small_blob = small_blob_ref() |
|
199 |
if small_blob is not None: |
|
200 |
small_blob.close() |
|
201 |
shutil.rmtree(tempdir, ignore_errors=True) |
|
202 |
atexit.register(exit_cleanup) |
|
203 |
count = 0 |
|
204 |
bytes = 0 |
|
205 |
n_small_bytes = 0 |
|
206 |
while self._sticky_memory_bytes > self._sticky_flushed_size: |
|
207 |
id = blobs.pop() |
|
208 |
blob = self._sticky_blobs.pop(id) |
|
209 |
n_bytes = len(blob) |
|
210 |
self._sticky_memory_bytes -= n_bytes |
|
211 |
if n_bytes < self._small_blob_threshold: |
|
212 |
f = self._cleanup.small_blobs |
|
213 |
f.seek(0, os.SEEK_END) |
|
214 |
self._disk_blobs[id] = (f.tell(), n_bytes, None) |
|
215 |
f.write(blob) |
|
216 |
n_small_bytes += n_bytes |
|
217 |
else: |
|
218 |
fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir) |
|
219 |
os.write(fd, blob) |
|
220 |
os.close(fd) |
|
221 |
self._disk_blobs[id] = (0, n_bytes, name) |
|
222 |
bytes += n_bytes |
|
223 |
del blob |
|
224 |
count += 1 |
|
225 |
trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk' |
|
226 |
% (count, total_blobs, bytes / 1024. / 1024, |
|
227 |
n_small_bytes / 1024. / 1024)) |
|
228 |
||
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
229 |
def store_blob(self, id, data): |
230 |
"""Store a blob of data.""" |
|
|
0.64.169
by Ian Clatworthy
fix blob tracking when -v not given |
231 |
# Note: If we're not reference counting, everything has to be sticky
|
232 |
if not self._blob_ref_counts or id in self._blob_ref_counts: |
|
233 |
self._sticky_blobs[id] = data |
|
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
234 |
self._sticky_memory_bytes += len(data) |
235 |
if self._sticky_memory_bytes > self._sticky_cache_size: |
|
236 |
self._flush_blobs_to_disk() |
|
|
7027.2.1
by Jelmer Vernooij
Port fastimport to python3. |
237 |
elif data == b'': |
|
0.64.169
by Ian Clatworthy
fix blob tracking when -v not given |
238 |
# Empty data is always sticky
|
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
239 |
self._sticky_blobs[id] = data |
240 |
else: |
|
241 |
self._blobs[id] = data |
|
242 |
||
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
243 |
def _decref(self, id, cache, fn): |
244 |
if not self._blob_ref_counts: |
|
245 |
return False |
|
246 |
count = self._blob_ref_counts.get(id, None) |
|
247 |
if count is not None: |
|
248 |
count -= 1 |
|
249 |
if count <= 0: |
|
250 |
del cache[id] |
|
251 |
if fn is not None: |
|
252 |
os.unlink(fn) |
|
253 |
del self._blob_ref_counts[id] |
|
254 |
return True |
|
255 |
else: |
|
256 |
self._blob_ref_counts[id] = count |
|
257 |
return False |
|
258 |
||
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
259 |
def fetch_blob(self, id): |
260 |
"""Fetch a blob of data.""" |
|
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
261 |
if id in self._blobs: |
|
0.78.3
by Ian Clatworthy
move GenericCacheManager into its own module |
262 |
return self._blobs.pop(id) |
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
263 |
if id in self._disk_blobs: |
264 |
(offset, n_bytes, fn) = self._disk_blobs[id] |
|
265 |
if fn is None: |
|
266 |
f = self._cleanup.small_blobs |
|
267 |
f.seek(offset) |
|
268 |
content = f.read(n_bytes) |
|
269 |
else: |
|
|
7027.2.1
by Jelmer Vernooij
Port fastimport to python3. |
270 |
with open(fn, 'rb') as fp: |
|
0.64.264
by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint |
271 |
content = fp.read() |
272 |
self._decref(id, self._disk_blobs, fn) |
|
273 |
return content |
|
274 |
content = self._sticky_blobs[id] |
|
275 |
if self._decref(id, self._sticky_blobs, None): |
|
276 |
self._sticky_memory_bytes -= len(content) |
|
277 |
return content |