/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
15
16
"""A manager of caches."""
17
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
18
from __future__ import absolute_import
19
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
20
import atexit
21
import os
22
import shutil
23
import tempfile
24
import weakref
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
25
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
26
from ... import lru_cache, trace
27
from . import (
0.123.6 by Jelmer Vernooij
Split out reftracker.
28
    branch_mapper,
29
    )
6929.13.2 by Jelmer Vernooij
Remove functionality moved to fastimport.
30
from fastimport.reftracker import (
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
31
    RefTracker,
32
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
33
from .helpers import (
0.123.3 by Jelmer Vernooij
Fix some imports.
34
    single_plural,
35
    )
0.123.6 by Jelmer Vernooij
Split out reftracker.
36
37
38
class _Cleanup(object):
39
    """This class makes sure we clean up when CacheManager goes away.
40
41
    We use a helper class to ensure that we are never in a refcycle.
42
    """
43
44
    def __init__(self, disk_blobs):
45
        self.disk_blobs = disk_blobs
46
        self.tempdir = None
47
        self.small_blobs = None
48
49
    def __del__(self):
50
        self.finalize()
51
52
    def finalize(self):
53
        if self.disk_blobs is not None:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
54
            for info in self.disk_blobs.values():
0.123.6 by Jelmer Vernooij
Split out reftracker.
55
                if info[-1] is not None:
56
                    os.unlink(info[-1])
57
            self.disk_blobs = None
58
        if self.small_blobs is not None:
59
            self.small_blobs.close()
60
            self.small_blobs = None
61
        if self.tempdir is not None:
62
            shutil.rmtree(self.tempdir)
63
64
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
65
class CacheManager(object):
0.123.6 by Jelmer Vernooij
Split out reftracker.
66
7143.15.2 by Jelmer Vernooij
Run autopep8.
67
    _small_blob_threshold = 25 * 1024
68
    _sticky_cache_size = 300 * 1024 * 1024
69
    _sticky_flushed_size = 100 * 1024 * 1024
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
70
0.83.1 by Ian Clatworthy
head tracking tests and fix
71
    def __init__(self, info=None, verbose=False, inventory_cache_size=10):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
72
        """Create a manager of caches.
73
74
        :param info: a ConfigObj holding the output from
75
            the --info processor, or None if no hints are available
76
        """
77
        self.verbose = verbose
78
79
        # dataref -> data. datref is either :mark or the sha-1.
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
80
        # Sticky blobs are referenced more than once, and are saved until their
81
        # refcount goes to 0
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
82
        self._blobs = {}
83
        self._sticky_blobs = {}
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
84
        self._sticky_memory_bytes = 0
85
        # if we overflow our memory cache, then we will dump large blobs to
86
        # disk in this directory
87
        self._tempdir = None
88
        # id => (offset, n_bytes, fname)
89
        #   if fname is None, then the content is stored in the small file
90
        self._disk_blobs = {}
91
        self._cleanup = _Cleanup(self._disk_blobs)
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
92
93
        # revision-id -> Inventory cache
94
        # these are large and we probably don't need too many as
95
        # most parents are recent in history
96
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
97
98
        # import commmit-ids -> revision-id lookup table
99
        # we need to keep all of these but they are small
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
100
        self.marks = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
101
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
102
        # (path, branch_ref) -> file-ids - as generated.
103
        # (Use store_file_id/fetch_fileid methods rather than direct access.)
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
104
105
        # Work out the blobs to make sticky - None means all
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
106
        self._blob_ref_counts = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
107
        if info is not None:
108
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
109
                blobs_by_counts = info['Blob reference counts']
110
                # The parser hands values back as lists, already parsed
111
                for count, blob_list in blobs_by_counts.items():
112
                    n = int(count)
113
                    for b in blob_list:
114
                        self._blob_ref_counts[b] = n
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
115
            except KeyError:
116
                # info not in file - possible when no blobs used
117
                pass
118
0.112.4 by Max Bowsher
Store the BranchMapper in the CacheManager so it can be got from other places.
119
        # BranchMapper has no state (for now?), but we keep it around rather
120
        # than reinstantiate on every usage
121
        self.branch_mapper = branch_mapper.BranchMapper()
122
0.123.6 by Jelmer Vernooij
Split out reftracker.
123
        self.reftracker = RefTracker()
124
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
125
    def add_mark(self, mark, commit_id):
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
126
        if mark.startswith(b':'):
127
            raise ValueError(mark)
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
128
        is_new = (mark in self.marks)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
129
        self.marks[mark] = commit_id
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
130
        return is_new
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
131
132
    def lookup_committish(self, committish):
133
        """Resolve a 'committish' to a revision id.
134
135
        :param committish: A "committish" string
136
        :return: Bazaar revision id
137
        """
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
138
        if not committish.startswith(b':'):
139
            raise ValueError(committish)
140
        return self.marks[committish.lstrip(b':')]
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
141
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
142
    def dump_stats(self, note=trace.note):
143
        """Dump some statistics about what we cached."""
144
        # TODO: add in inventory stastistics
145
        note("Cache statistics:")
146
        self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
147
        self._show_stats_for(self.marks, "revision-ids", note=note)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
148
        # These aren't interesting so omit from the output, at least for now
149
        #self._show_stats_for(self._blobs, "other blobs", note=note)
7143.15.2 by Jelmer Vernooij
Run autopep8.
150
        # self.reftracker.dump_stats(note=note)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
151
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
152
    def _show_stats_for(self, a_dict, label, note, tuple_key=False):
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
153
        """Dump statistics about a given dictionary.
154
155
        By the key and value need to support len().
156
        """
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
157
        count = len(a_dict)
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
158
        if tuple_key:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
159
            size = sum(map(len, (''.join(k) for k in a_dict)))
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
160
        else:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
161
            size = sum(map(len, a_dict))
162
        size += sum(map(len, a_dict.values()))
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
163
        size = size * 1.0 / 1024
164
        unit = 'K'
165
        if size > 1024:
166
            size = size / 1024
167
            unit = 'M'
168
            if size > 1024:
169
                size = size / 1024
170
                unit = 'G'
171
        note("    %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
7143.15.2 by Jelmer Vernooij
Run autopep8.
172
                                              single_plural(count, "item", "items")))
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
173
174
    def clear_all(self):
175
        """Free up any memory used by the caches."""
176
        self._blobs.clear()
177
        self._sticky_blobs.clear()
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
178
        self.marks.clear()
0.123.6 by Jelmer Vernooij
Split out reftracker.
179
        self.reftracker.clear()
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
180
        self.inventories.clear()
181
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
182
    def _flush_blobs_to_disk(self):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
183
        blobs = list(self._sticky_blobs)
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
184
        sticky_blobs = self._sticky_blobs
185
        total_blobs = len(sticky_blobs)
7143.15.2 by Jelmer Vernooij
Run autopep8.
186
        blobs.sort(key=lambda k: len(sticky_blobs[k]))
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
187
        if self._tempdir is None:
0.123.3 by Jelmer Vernooij
Fix some imports.
188
            tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-')
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
189
            self._tempdir = tempdir
190
            self._cleanup.tempdir = self._tempdir
191
            self._cleanup.small_blobs = tempfile.TemporaryFile(
192
                prefix='small-blobs-', dir=self._tempdir)
193
            small_blob_ref = weakref.ref(self._cleanup.small_blobs)
194
            # Even though we add it to _Cleanup it seems that the object can be
195
            # destroyed 'too late' for cleanup to actually occur. Probably a
196
            # combination of bzr's "die directly, don't clean up" and how
197
            # exceptions close the running stack.
7143.15.2 by Jelmer Vernooij
Run autopep8.
198
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
199
            def exit_cleanup():
200
                small_blob = small_blob_ref()
201
                if small_blob is not None:
202
                    small_blob.close()
203
                shutil.rmtree(tempdir, ignore_errors=True)
204
            atexit.register(exit_cleanup)
205
        count = 0
206
        bytes = 0
207
        n_small_bytes = 0
208
        while self._sticky_memory_bytes > self._sticky_flushed_size:
209
            id = blobs.pop()
210
            blob = self._sticky_blobs.pop(id)
211
            n_bytes = len(blob)
212
            self._sticky_memory_bytes -= n_bytes
213
            if n_bytes < self._small_blob_threshold:
214
                f = self._cleanup.small_blobs
215
                f.seek(0, os.SEEK_END)
216
                self._disk_blobs[id] = (f.tell(), n_bytes, None)
217
                f.write(blob)
218
                n_small_bytes += n_bytes
219
            else:
220
                fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir)
221
                os.write(fd, blob)
222
                os.close(fd)
223
                self._disk_blobs[id] = (0, n_bytes, name)
224
            bytes += n_bytes
225
            del blob
226
            count += 1
227
        trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk'
228
                   % (count, total_blobs, bytes / 1024. / 1024,
229
                      n_small_bytes / 1024. / 1024))
230
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
231
    def store_blob(self, id, data):
232
        """Store a blob of data."""
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
233
        # Note: If we're not reference counting, everything has to be sticky
234
        if not self._blob_ref_counts or id in self._blob_ref_counts:
235
            self._sticky_blobs[id] = data
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
236
            self._sticky_memory_bytes += len(data)
237
            if self._sticky_memory_bytes > self._sticky_cache_size:
238
                self._flush_blobs_to_disk()
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
239
        elif data == b'':
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
240
            # Empty data is always sticky
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
241
            self._sticky_blobs[id] = data
242
        else:
243
            self._blobs[id] = data
244
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
245
    def _decref(self, id, cache, fn):
246
        if not self._blob_ref_counts:
247
            return False
248
        count = self._blob_ref_counts.get(id, None)
249
        if count is not None:
250
            count -= 1
251
            if count <= 0:
252
                del cache[id]
253
                if fn is not None:
254
                    os.unlink(fn)
255
                del self._blob_ref_counts[id]
256
                return True
257
            else:
258
                self._blob_ref_counts[id] = count
259
        return False
260
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
261
    def fetch_blob(self, id):
262
        """Fetch a blob of data."""
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
263
        if id in self._blobs:
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
264
            return self._blobs.pop(id)
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
265
        if id in self._disk_blobs:
266
            (offset, n_bytes, fn) = self._disk_blobs[id]
267
            if fn is None:
268
                f = self._cleanup.small_blobs
269
                f.seek(offset)
270
                content = f.read(n_bytes)
271
            else:
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
272
                with open(fn, 'rb') as fp:
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
273
                    content = fp.read()
274
            self._decref(id, self._disk_blobs, fn)
275
            return content
276
        content = self._sticky_blobs[id]
277
        if self._decref(id, self._sticky_blobs, None):
278
            self._sticky_memory_bytes -= len(content)
279
        return content