1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
"""A manager of caches."""
18
from __future__ import absolute_import
26
from ... import lru_cache, trace
30
from .reftracker import (
33
from .helpers import (
38
class _Cleanup(object):
39
"""This class makes sure we clean up when CacheManager goes away.
41
We use a helper class to ensure that we are never in a refcycle.
44
def __init__(self, disk_blobs):
45
self.disk_blobs = disk_blobs
47
self.small_blobs = None
53
if self.disk_blobs is not None:
54
for info in self.disk_blobs.itervalues():
55
if info[-1] is not None:
57
self.disk_blobs = None
58
if self.small_blobs is not None:
59
self.small_blobs.close()
60
self.small_blobs = None
61
if self.tempdir is not None:
62
shutil.rmtree(self.tempdir)
65
class CacheManager(object):
67
_small_blob_threshold = 25*1024
68
_sticky_cache_size = 300*1024*1024
69
_sticky_flushed_size = 100*1024*1024
71
def __init__(self, info=None, verbose=False, inventory_cache_size=10):
72
"""Create a manager of caches.
74
:param info: a ConfigObj holding the output from
75
the --info processor, or None if no hints are available
77
self.verbose = verbose
79
# dataref -> data. datref is either :mark or the sha-1.
80
# Sticky blobs are referenced more than once, and are saved until their
83
self._sticky_blobs = {}
84
self._sticky_memory_bytes = 0
85
# if we overflow our memory cache, then we will dump large blobs to
86
# disk in this directory
88
# id => (offset, n_bytes, fname)
89
# if fname is None, then the content is stored in the small file
91
self._cleanup = _Cleanup(self._disk_blobs)
93
# revision-id -> Inventory cache
94
# these are large and we probably don't need too many as
95
# most parents are recent in history
96
self.inventories = lru_cache.LRUCache(inventory_cache_size)
98
# import commmit-ids -> revision-id lookup table
99
# we need to keep all of these but they are small
102
# (path, branch_ref) -> file-ids - as generated.
103
# (Use store_file_id/fetch_fileid methods rather than direct access.)
105
# Work out the blobs to make sticky - None means all
106
self._blob_ref_counts = {}
109
blobs_by_counts = info['Blob reference counts']
110
# The parser hands values back as lists, already parsed
111
for count, blob_list in blobs_by_counts.items():
114
self._blob_ref_counts[b] = n
116
# info not in file - possible when no blobs used
119
# BranchMapper has no state (for now?), but we keep it around rather
120
# than reinstantiate on every usage
121
self.branch_mapper = branch_mapper.BranchMapper()
123
self.reftracker = RefTracker()
125
def add_mark(self, mark, commit_id):
126
assert mark[0] != ':'
127
self.marks[mark] = commit_id
129
def lookup_committish(self, committish):
130
"""Resolve a 'committish' to a revision id.
132
:param committish: A "committish" string
133
:return: Bazaar revision id
135
assert committish[0] == ':'
136
return self.marks[committish.lstrip(':')]
138
def dump_stats(self, note=trace.note):
139
"""Dump some statistics about what we cached."""
140
# TODO: add in inventory stastistics
141
note("Cache statistics:")
142
self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
143
self._show_stats_for(self.marks, "revision-ids", note=note)
144
# These aren't interesting so omit from the output, at least for now
145
#self._show_stats_for(self._blobs, "other blobs", note=note)
146
#self.reftracker.dump_stats(note=note)
148
def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
149
"""Dump statistics about a given dictionary.
151
By the key and value need to support len().
155
size = sum(map(len, (''.join(k) for k in dict.keys())))
157
size = sum(map(len, dict.keys()))
158
size += sum(map(len, dict.values()))
159
size = size * 1.0 / 1024
167
note(" %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
168
single_plural(count, "item", "items")))
171
"""Free up any memory used by the caches."""
173
self._sticky_blobs.clear()
175
self.reftracker.clear()
176
self.inventories.clear()
178
def _flush_blobs_to_disk(self):
179
blobs = self._sticky_blobs.keys()
180
sticky_blobs = self._sticky_blobs
181
total_blobs = len(sticky_blobs)
182
blobs.sort(key=lambda k:len(sticky_blobs[k]))
183
if self._tempdir is None:
184
tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-')
185
self._tempdir = tempdir
186
self._cleanup.tempdir = self._tempdir
187
self._cleanup.small_blobs = tempfile.TemporaryFile(
188
prefix='small-blobs-', dir=self._tempdir)
189
small_blob_ref = weakref.ref(self._cleanup.small_blobs)
190
# Even though we add it to _Cleanup it seems that the object can be
191
# destroyed 'too late' for cleanup to actually occur. Probably a
192
# combination of bzr's "die directly, don't clean up" and how
193
# exceptions close the running stack.
195
small_blob = small_blob_ref()
196
if small_blob is not None:
198
shutil.rmtree(tempdir, ignore_errors=True)
199
atexit.register(exit_cleanup)
203
while self._sticky_memory_bytes > self._sticky_flushed_size:
205
blob = self._sticky_blobs.pop(id)
207
self._sticky_memory_bytes -= n_bytes
208
if n_bytes < self._small_blob_threshold:
209
f = self._cleanup.small_blobs
210
f.seek(0, os.SEEK_END)
211
self._disk_blobs[id] = (f.tell(), n_bytes, None)
213
n_small_bytes += n_bytes
215
fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir)
218
self._disk_blobs[id] = (0, n_bytes, name)
222
trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk'
223
% (count, total_blobs, bytes / 1024. / 1024,
224
n_small_bytes / 1024. / 1024))
226
def store_blob(self, id, data):
227
"""Store a blob of data."""
228
# Note: If we're not reference counting, everything has to be sticky
229
if not self._blob_ref_counts or id in self._blob_ref_counts:
230
self._sticky_blobs[id] = data
231
self._sticky_memory_bytes += len(data)
232
if self._sticky_memory_bytes > self._sticky_cache_size:
233
self._flush_blobs_to_disk()
235
# Empty data is always sticky
236
self._sticky_blobs[id] = data
238
self._blobs[id] = data
240
def _decref(self, id, cache, fn):
241
if not self._blob_ref_counts:
243
count = self._blob_ref_counts.get(id, None)
244
if count is not None:
250
del self._blob_ref_counts[id]
253
self._blob_ref_counts[id] = count
256
def fetch_blob(self, id):
257
"""Fetch a blob of data."""
258
if id in self._blobs:
259
return self._blobs.pop(id)
260
if id in self._disk_blobs:
261
(offset, n_bytes, fn) = self._disk_blobs[id]
263
f = self._cleanup.small_blobs
265
content = f.read(n_bytes)
272
self._decref(id, self._disk_blobs, fn)
274
content = self._sticky_blobs[id]
275
if self._decref(id, self._sticky_blobs, None):
276
self._sticky_memory_bytes -= len(content)
280
def invert_dictset(d):
281
"""Invert a dictionary with keys matching a set of values, turned into lists."""
282
# Based on recipe from ASPN
284
for k, c in d.iteritems():
286
keys = result.setdefault(v, [])