/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
15
16
"""A manager of caches."""
17
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
18
import atexit
19
import os
20
import shutil
21
import tempfile
22
import weakref
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
23
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
24
from ... import lru_cache, trace
25
from . import (
0.123.6 by Jelmer Vernooij
Split out reftracker.
26
    branch_mapper,
27
    )
6929.13.2 by Jelmer Vernooij
Remove functionality moved to fastimport.
28
from fastimport.reftracker import (
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
29
    RefTracker,
30
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
31
from .helpers import (
0.123.3 by Jelmer Vernooij
Fix some imports.
32
    single_plural,
33
    )
0.123.6 by Jelmer Vernooij
Split out reftracker.
34
35
36
class _Cleanup(object):
37
    """This class makes sure we clean up when CacheManager goes away.
38
39
    We use a helper class to ensure that we are never in a refcycle.
40
    """
41
42
    def __init__(self, disk_blobs):
43
        self.disk_blobs = disk_blobs
44
        self.tempdir = None
45
        self.small_blobs = None
46
47
    def __del__(self):
48
        self.finalize()
49
50
    def finalize(self):
51
        if self.disk_blobs is not None:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
52
            for info in self.disk_blobs.values():
0.123.6 by Jelmer Vernooij
Split out reftracker.
53
                if info[-1] is not None:
54
                    os.unlink(info[-1])
55
            self.disk_blobs = None
56
        if self.small_blobs is not None:
57
            self.small_blobs.close()
58
            self.small_blobs = None
59
        if self.tempdir is not None:
60
            shutil.rmtree(self.tempdir)
61
62
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
63
class CacheManager(object):
0.123.6 by Jelmer Vernooij
Split out reftracker.
64
7143.15.2 by Jelmer Vernooij
Run autopep8.
65
    _small_blob_threshold = 25 * 1024
66
    _sticky_cache_size = 300 * 1024 * 1024
67
    _sticky_flushed_size = 100 * 1024 * 1024
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
68
0.83.1 by Ian Clatworthy
head tracking tests and fix
69
    def __init__(self, info=None, verbose=False, inventory_cache_size=10):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
70
        """Create a manager of caches.
71
72
        :param info: a ConfigObj holding the output from
73
            the --info processor, or None if no hints are available
74
        """
75
        self.verbose = verbose
76
77
        # dataref -> data. datref is either :mark or the sha-1.
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
78
        # Sticky blobs are referenced more than once, and are saved until their
79
        # refcount goes to 0
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
80
        self._blobs = {}
81
        self._sticky_blobs = {}
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
82
        self._sticky_memory_bytes = 0
83
        # if we overflow our memory cache, then we will dump large blobs to
84
        # disk in this directory
85
        self._tempdir = None
86
        # id => (offset, n_bytes, fname)
87
        #   if fname is None, then the content is stored in the small file
88
        self._disk_blobs = {}
89
        self._cleanup = _Cleanup(self._disk_blobs)
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
90
91
        # revision-id -> Inventory cache
92
        # these are large and we probably don't need too many as
93
        # most parents are recent in history
94
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
95
96
        # import commmit-ids -> revision-id lookup table
97
        # we need to keep all of these but they are small
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
98
        self.marks = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
99
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
100
        # (path, branch_ref) -> file-ids - as generated.
101
        # (Use store_file_id/fetch_fileid methods rather than direct access.)
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
102
103
        # Work out the blobs to make sticky - None means all
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
104
        self._blob_ref_counts = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
105
        if info is not None:
106
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
107
                blobs_by_counts = info['Blob reference counts']
108
                # The parser hands values back as lists, already parsed
109
                for count, blob_list in blobs_by_counts.items():
110
                    n = int(count)
111
                    for b in blob_list:
112
                        self._blob_ref_counts[b] = n
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
113
            except KeyError:
114
                # info not in file - possible when no blobs used
115
                pass
116
0.112.4 by Max Bowsher
Store the BranchMapper in the CacheManager so it can be got from other places.
117
        # BranchMapper has no state (for now?), but we keep it around rather
118
        # than reinstantiate on every usage
119
        self.branch_mapper = branch_mapper.BranchMapper()
120
0.123.6 by Jelmer Vernooij
Split out reftracker.
121
        self.reftracker = RefTracker()
122
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
123
    def add_mark(self, mark, commit_id):
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
124
        if mark.startswith(b':'):
125
            raise ValueError(mark)
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
126
        is_new = (mark in self.marks)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
127
        self.marks[mark] = commit_id
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
128
        return is_new
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
129
130
    def lookup_committish(self, committish):
131
        """Resolve a 'committish' to a revision id.
132
133
        :param committish: A "committish" string
134
        :return: Bazaar revision id
135
        """
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
136
        if not committish.startswith(b':'):
137
            raise ValueError(committish)
138
        return self.marks[committish.lstrip(b':')]
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
139
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
140
    def dump_stats(self, note=trace.note):
141
        """Dump some statistics about what we cached."""
142
        # TODO: add in inventory stastistics
143
        note("Cache statistics:")
144
        self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
145
        self._show_stats_for(self.marks, "revision-ids", note=note)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
146
        # These aren't interesting so omit from the output, at least for now
147
        #self._show_stats_for(self._blobs, "other blobs", note=note)
7143.15.2 by Jelmer Vernooij
Run autopep8.
148
        # self.reftracker.dump_stats(note=note)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
149
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
150
    def _show_stats_for(self, a_dict, label, note, tuple_key=False):
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
151
        """Dump statistics about a given dictionary.
152
153
        By the key and value need to support len().
154
        """
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
155
        count = len(a_dict)
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
156
        if tuple_key:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
157
            size = sum(map(len, (''.join(k) for k in a_dict)))
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
158
        else:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
159
            size = sum(map(len, a_dict))
160
        size += sum(map(len, a_dict.values()))
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
161
        size = size * 1.0 / 1024
162
        unit = 'K'
163
        if size > 1024:
164
            size = size / 1024
165
            unit = 'M'
166
            if size > 1024:
167
                size = size / 1024
168
                unit = 'G'
169
        note("    %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
7143.15.2 by Jelmer Vernooij
Run autopep8.
170
                                              single_plural(count, "item", "items")))
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
171
172
    def clear_all(self):
173
        """Free up any memory used by the caches."""
174
        self._blobs.clear()
175
        self._sticky_blobs.clear()
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
176
        self.marks.clear()
0.123.6 by Jelmer Vernooij
Split out reftracker.
177
        self.reftracker.clear()
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
178
        self.inventories.clear()
179
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
180
    def _flush_blobs_to_disk(self):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
181
        blobs = list(self._sticky_blobs)
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
182
        sticky_blobs = self._sticky_blobs
183
        total_blobs = len(sticky_blobs)
7143.15.2 by Jelmer Vernooij
Run autopep8.
184
        blobs.sort(key=lambda k: len(sticky_blobs[k]))
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
185
        if self._tempdir is None:
0.123.3 by Jelmer Vernooij
Fix some imports.
186
            tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-')
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
187
            self._tempdir = tempdir
188
            self._cleanup.tempdir = self._tempdir
189
            self._cleanup.small_blobs = tempfile.TemporaryFile(
190
                prefix='small-blobs-', dir=self._tempdir)
191
            small_blob_ref = weakref.ref(self._cleanup.small_blobs)
192
            # Even though we add it to _Cleanup it seems that the object can be
193
            # destroyed 'too late' for cleanup to actually occur. Probably a
194
            # combination of bzr's "die directly, don't clean up" and how
195
            # exceptions close the running stack.
7143.15.2 by Jelmer Vernooij
Run autopep8.
196
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
197
            def exit_cleanup():
198
                small_blob = small_blob_ref()
199
                if small_blob is not None:
200
                    small_blob.close()
201
                shutil.rmtree(tempdir, ignore_errors=True)
202
            atexit.register(exit_cleanup)
203
        count = 0
204
        bytes = 0
205
        n_small_bytes = 0
206
        while self._sticky_memory_bytes > self._sticky_flushed_size:
207
            id = blobs.pop()
208
            blob = self._sticky_blobs.pop(id)
209
            n_bytes = len(blob)
210
            self._sticky_memory_bytes -= n_bytes
211
            if n_bytes < self._small_blob_threshold:
212
                f = self._cleanup.small_blobs
213
                f.seek(0, os.SEEK_END)
214
                self._disk_blobs[id] = (f.tell(), n_bytes, None)
215
                f.write(blob)
216
                n_small_bytes += n_bytes
217
            else:
218
                fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir)
219
                os.write(fd, blob)
220
                os.close(fd)
221
                self._disk_blobs[id] = (0, n_bytes, name)
222
            bytes += n_bytes
223
            del blob
224
            count += 1
225
        trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk'
226
                   % (count, total_blobs, bytes / 1024. / 1024,
227
                      n_small_bytes / 1024. / 1024))
228
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
229
    def store_blob(self, id, data):
230
        """Store a blob of data."""
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
231
        # Note: If we're not reference counting, everything has to be sticky
232
        if not self._blob_ref_counts or id in self._blob_ref_counts:
233
            self._sticky_blobs[id] = data
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
234
            self._sticky_memory_bytes += len(data)
235
            if self._sticky_memory_bytes > self._sticky_cache_size:
236
                self._flush_blobs_to_disk()
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
237
        elif data == b'':
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
238
            # Empty data is always sticky
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
239
            self._sticky_blobs[id] = data
240
        else:
241
            self._blobs[id] = data
242
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
243
    def _decref(self, id, cache, fn):
244
        if not self._blob_ref_counts:
245
            return False
246
        count = self._blob_ref_counts.get(id, None)
247
        if count is not None:
248
            count -= 1
249
            if count <= 0:
250
                del cache[id]
251
                if fn is not None:
252
                    os.unlink(fn)
253
                del self._blob_ref_counts[id]
254
                return True
255
            else:
256
                self._blob_ref_counts[id] = count
257
        return False
258
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
259
    def fetch_blob(self, id):
260
        """Fetch a blob of data."""
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
261
        if id in self._blobs:
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
262
            return self._blobs.pop(id)
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
263
        if id in self._disk_blobs:
264
            (offset, n_bytes, fn) = self._disk_blobs[id]
265
            if fn is None:
266
                f = self._cleanup.small_blobs
267
                f.seek(offset)
268
                content = f.read(n_bytes)
269
            else:
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
270
                with open(fn, 'rb') as fp:
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
271
                    content = fp.read()
272
            self._decref(id, self._disk_blobs, fn)
273
            return content
274
        content = self._sticky_blobs[id]
275
        if self._decref(id, self._sticky_blobs, None):
276
            self._sticky_memory_bytes -= len(content)
277
        return content