/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
15
16
"""A manager of caches."""
17
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
18
from __future__ import absolute_import
19
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
20
import atexit
21
import os
22
import shutil
23
import tempfile
24
import weakref
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
25
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
26
from ... import lru_cache, trace
27
from . import (
0.123.6 by Jelmer Vernooij
Split out reftracker.
28
    branch_mapper,
29
    )
6929.13.2 by Jelmer Vernooij
Remove functionality moved to fastimport.
30
from fastimport.reftracker import (
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
31
    RefTracker,
32
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
33
from .helpers import (
0.123.3 by Jelmer Vernooij
Fix some imports.
34
    single_plural,
35
    )
0.123.6 by Jelmer Vernooij
Split out reftracker.
36
37
38
class _Cleanup(object):
39
    """This class makes sure we clean up when CacheManager goes away.
40
41
    We use a helper class to ensure that we are never in a refcycle.
42
    """
43
44
    def __init__(self, disk_blobs):
45
        self.disk_blobs = disk_blobs
46
        self.tempdir = None
47
        self.small_blobs = None
48
49
    def __del__(self):
50
        self.finalize()
51
52
    def finalize(self):
53
        if self.disk_blobs is not None:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
54
            for info in self.disk_blobs.values():
0.123.6 by Jelmer Vernooij
Split out reftracker.
55
                if info[-1] is not None:
56
                    os.unlink(info[-1])
57
            self.disk_blobs = None
58
        if self.small_blobs is not None:
59
            self.small_blobs.close()
60
            self.small_blobs = None
61
        if self.tempdir is not None:
62
            shutil.rmtree(self.tempdir)
63
64
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
65
class CacheManager(object):
0.123.6 by Jelmer Vernooij
Split out reftracker.
66
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
67
    _small_blob_threshold = 25*1024
68
    _sticky_cache_size = 300*1024*1024
69
    _sticky_flushed_size = 100*1024*1024
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
70
0.83.1 by Ian Clatworthy
head tracking tests and fix
71
    def __init__(self, info=None, verbose=False, inventory_cache_size=10):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
72
        """Create a manager of caches.
73
74
        :param info: a ConfigObj holding the output from
75
            the --info processor, or None if no hints are available
76
        """
77
        self.verbose = verbose
78
79
        # dataref -> data. datref is either :mark or the sha-1.
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
80
        # Sticky blobs are referenced more than once, and are saved until their
81
        # refcount goes to 0
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
82
        self._blobs = {}
83
        self._sticky_blobs = {}
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
84
        self._sticky_memory_bytes = 0
85
        # if we overflow our memory cache, then we will dump large blobs to
86
        # disk in this directory
87
        self._tempdir = None
88
        # id => (offset, n_bytes, fname)
89
        #   if fname is None, then the content is stored in the small file
90
        self._disk_blobs = {}
91
        self._cleanup = _Cleanup(self._disk_blobs)
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
92
93
        # revision-id -> Inventory cache
94
        # these are large and we probably don't need too many as
95
        # most parents are recent in history
96
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
97
98
        # import commmit-ids -> revision-id lookup table
99
        # we need to keep all of these but they are small
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
100
        self.marks = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
101
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
102
        # (path, branch_ref) -> file-ids - as generated.
103
        # (Use store_file_id/fetch_fileid methods rather than direct access.)
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
104
105
        # Work out the blobs to make sticky - None means all
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
106
        self._blob_ref_counts = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
107
        if info is not None:
108
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
109
                blobs_by_counts = info['Blob reference counts']
110
                # The parser hands values back as lists, already parsed
111
                for count, blob_list in blobs_by_counts.items():
112
                    n = int(count)
113
                    for b in blob_list:
114
                        self._blob_ref_counts[b] = n
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
115
            except KeyError:
116
                # info not in file - possible when no blobs used
117
                pass
118
0.112.4 by Max Bowsher
Store the BranchMapper in the CacheManager so it can be got from other places.
119
        # BranchMapper has no state (for now?), but we keep it around rather
120
        # than reinstantiate on every usage
121
        self.branch_mapper = branch_mapper.BranchMapper()
122
0.123.6 by Jelmer Vernooij
Split out reftracker.
123
        self.reftracker = RefTracker()
124
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
125
    def add_mark(self, mark, commit_id):
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
126
        if mark.startswith(b':'):
127
            raise ValueError(mark)
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
128
        is_new = (mark in self.marks)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
129
        self.marks[mark] = commit_id
6846.3.1 by Jelmer Vernooij
Support '0' marker in fastimport plugin.
130
        return is_new
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
131
132
    def lookup_committish(self, committish):
133
        """Resolve a 'committish' to a revision id.
134
135
        :param committish: A "committish" string
136
        :return: Bazaar revision id
137
        """
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
138
        if not committish.startswith(b':'):
139
            raise ValueError(committish)
140
        return self.marks[committish.lstrip(b':')]
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
141
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
142
    def dump_stats(self, note=trace.note):
143
        """Dump some statistics about what we cached."""
144
        # TODO: add in inventory stastistics
145
        note("Cache statistics:")
146
        self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
147
        self._show_stats_for(self.marks, "revision-ids", note=note)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
148
        # These aren't interesting so omit from the output, at least for now
149
        #self._show_stats_for(self._blobs, "other blobs", note=note)
0.123.6 by Jelmer Vernooij
Split out reftracker.
150
        #self.reftracker.dump_stats(note=note)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
151
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
152
    def _show_stats_for(self, a_dict, label, note, tuple_key=False):
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
153
        """Dump statistics about a given dictionary.
154
155
        By the key and value need to support len().
156
        """
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
157
        count = len(a_dict)
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
158
        if tuple_key:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
159
            size = sum(map(len, (''.join(k) for k in a_dict)))
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
160
        else:
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
161
            size = sum(map(len, a_dict))
162
        size += sum(map(len, a_dict.values()))
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
163
        size = size * 1.0 / 1024
164
        unit = 'K'
165
        if size > 1024:
166
            size = size / 1024
167
            unit = 'M'
168
            if size > 1024:
169
                size = size / 1024
170
                unit = 'G'
171
        note("    %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
0.123.3 by Jelmer Vernooij
Fix some imports.
172
            single_plural(count, "item", "items")))
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
173
174
    def clear_all(self):
175
        """Free up any memory used by the caches."""
176
        self._blobs.clear()
177
        self._sticky_blobs.clear()
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
178
        self.marks.clear()
0.123.6 by Jelmer Vernooij
Split out reftracker.
179
        self.reftracker.clear()
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
180
        self.inventories.clear()
181
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
182
    def _flush_blobs_to_disk(self):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
183
        blobs = list(self._sticky_blobs)
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
184
        sticky_blobs = self._sticky_blobs
185
        total_blobs = len(sticky_blobs)
186
        blobs.sort(key=lambda k:len(sticky_blobs[k]))
187
        if self._tempdir is None:
0.123.3 by Jelmer Vernooij
Fix some imports.
188
            tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-')
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
189
            self._tempdir = tempdir
190
            self._cleanup.tempdir = self._tempdir
191
            self._cleanup.small_blobs = tempfile.TemporaryFile(
192
                prefix='small-blobs-', dir=self._tempdir)
193
            small_blob_ref = weakref.ref(self._cleanup.small_blobs)
194
            # Even though we add it to _Cleanup it seems that the object can be
195
            # destroyed 'too late' for cleanup to actually occur. Probably a
196
            # combination of bzr's "die directly, don't clean up" and how
197
            # exceptions close the running stack.
198
            def exit_cleanup():
199
                small_blob = small_blob_ref()
200
                if small_blob is not None:
201
                    small_blob.close()
202
                shutil.rmtree(tempdir, ignore_errors=True)
203
            atexit.register(exit_cleanup)
204
        count = 0
205
        bytes = 0
206
        n_small_bytes = 0
207
        while self._sticky_memory_bytes > self._sticky_flushed_size:
208
            id = blobs.pop()
209
            blob = self._sticky_blobs.pop(id)
210
            n_bytes = len(blob)
211
            self._sticky_memory_bytes -= n_bytes
212
            if n_bytes < self._small_blob_threshold:
213
                f = self._cleanup.small_blobs
214
                f.seek(0, os.SEEK_END)
215
                self._disk_blobs[id] = (f.tell(), n_bytes, None)
216
                f.write(blob)
217
                n_small_bytes += n_bytes
218
            else:
219
                fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir)
220
                os.write(fd, blob)
221
                os.close(fd)
222
                self._disk_blobs[id] = (0, n_bytes, name)
223
            bytes += n_bytes
224
            del blob
225
            count += 1
226
        trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk'
227
                   % (count, total_blobs, bytes / 1024. / 1024,
228
                      n_small_bytes / 1024. / 1024))
229
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
230
    def store_blob(self, id, data):
231
        """Store a blob of data."""
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
232
        # Note: If we're not reference counting, everything has to be sticky
233
        if not self._blob_ref_counts or id in self._blob_ref_counts:
234
            self._sticky_blobs[id] = data
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
235
            self._sticky_memory_bytes += len(data)
236
            if self._sticky_memory_bytes > self._sticky_cache_size:
237
                self._flush_blobs_to_disk()
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
238
        elif data == b'':
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
239
            # Empty data is always sticky
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
240
            self._sticky_blobs[id] = data
241
        else:
242
            self._blobs[id] = data
243
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
244
    def _decref(self, id, cache, fn):
245
        if not self._blob_ref_counts:
246
            return False
247
        count = self._blob_ref_counts.get(id, None)
248
        if count is not None:
249
            count -= 1
250
            if count <= 0:
251
                del cache[id]
252
                if fn is not None:
253
                    os.unlink(fn)
254
                del self._blob_ref_counts[id]
255
                return True
256
            else:
257
                self._blob_ref_counts[id] = count
258
        return False
259
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
260
    def fetch_blob(self, id):
261
        """Fetch a blob of data."""
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
262
        if id in self._blobs:
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
263
            return self._blobs.pop(id)
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
264
        if id in self._disk_blobs:
265
            (offset, n_bytes, fn) = self._disk_blobs[id]
266
            if fn is None:
267
                f = self._cleanup.small_blobs
268
                f.seek(offset)
269
                content = f.read(n_bytes)
270
            else:
7027.2.1 by Jelmer Vernooij
Port fastimport to python3.
271
                with open(fn, 'rb') as fp:
0.64.264 by Ian Clatworthy
Merge John's smarter caching of blobs to improve memory footprint
272
                    content = fp.read()
273
            self._decref(id, self._disk_blobs, fn)
274
            return content
275
        content = self._sticky_blobs[id]
276
        if self._decref(id, self._sticky_blobs, None):
277
            self._sticky_memory_bytes -= len(content)
278
        return content