/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""A manager of caches."""
18
19
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
20
from bzrlib import lru_cache, trace
21
from bzrlib.plugins.fastimport import helpers
0.64.118 by Ian Clatworthy
fix lru_cache import
22
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
23
class CacheManager(object):
24
0.83.1 by Ian Clatworthy
head tracking tests and fix
25
    def __init__(self, info=None, verbose=False, inventory_cache_size=10):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
26
        """Create a manager of caches.
27
28
        :param info: a ConfigObj holding the output from
29
            the --info processor, or None if no hints are available
30
        """
31
        self.verbose = verbose
32
33
        # dataref -> data. datref is either :mark or the sha-1.
34
        # Sticky blobs aren't removed after being referenced.
35
        self._blobs = {}
36
        self._sticky_blobs = {}
37
38
        # revision-id -> Inventory cache
39
        # these are large and we probably don't need too many as
40
        # most parents are recent in history
41
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
42
43
        # import commmit-ids -> revision-id lookup table
44
        # we need to keep all of these but they are small
45
        self.revision_ids = {}
46
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
47
        # (path, branch_ref) -> file-ids - as generated.
48
        # (Use store_file_id/fetch_fileid methods rather than direct access.)
49
        self._file_ids = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
50
51
        # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
52
        self.last_ref = None
53
        self.last_ids = {}
54
        self.heads = {}
55
56
        # Work out the blobs to make sticky - None means all
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
57
        self._blob_ref_counts = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
58
        if info is not None:
59
            try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
60
                blobs_by_counts = info['Blob reference counts']
61
                # The parser hands values back as lists, already parsed
62
                for count, blob_list in blobs_by_counts.items():
63
                    n = int(count)
64
                    for b in blob_list:
65
                        self._blob_ref_counts[b] = n
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
66
            except KeyError:
67
                # info not in file - possible when no blobs used
68
                pass
69
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
70
    def dump_stats(self, note=trace.note):
71
        """Dump some statistics about what we cached."""
72
        # TODO: add in inventory stastistics
73
        note("Cache statistics:")
74
        self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
75
        self._show_stats_for(self.revision_ids, "revision-ids", note=note)
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
76
        self._show_stats_for(self._file_ids, "file-ids", note=note,
77
            tuple_key=True)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
78
        # These aren't interesting so omit from the output, at least for now
79
        #self._show_stats_for(self._blobs, "other blobs", note=note)
80
        #self._show_stats_for(self.last_ids, "last-ids", note=note)
81
        #self._show_stats_for(self.heads, "heads", note=note)
82
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
83
    def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
84
        """Dump statistics about a given dictionary.
85
86
        By the key and value need to support len().
87
        """
88
        count = len(dict)
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
89
        if tuple_key:
90
            size = sum(map(len, (''.join(k) for k in dict.keys())))
91
        else:
92
            size = sum(map(len, dict.keys()))
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
93
        size += sum(map(len, dict.values()))
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
94
        size = size * 1.0 / 1024
95
        unit = 'K'
96
        if size > 1024:
97
            size = size / 1024
98
            unit = 'M'
99
            if size > 1024:
100
                size = size / 1024
101
                unit = 'G'
102
        note("    %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
103
            helpers.single_plural(count, "item", "items")))
104
105
    def clear_all(self):
106
        """Free up any memory used by the caches."""
107
        self._blobs.clear()
108
        self._sticky_blobs.clear()
109
        self.revision_ids.clear()
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
110
        self._file_ids.clear()
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
111
        self.last_ids.clear()
112
        self.heads.clear()
113
        self.inventories.clear()
114
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
115
    def store_blob(self, id, data):
116
        """Store a blob of data."""
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
117
        # Note: If we're not reference counting, everything has to be sticky
118
        if not self._blob_ref_counts or id in self._blob_ref_counts:
119
            self._sticky_blobs[id] = data
120
        elif data == '':
121
            # Empty data is always sticky
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
122
            self._sticky_blobs[id] = data
123
        else:
124
            self._blobs[id] = data
125
126
    def fetch_blob(self, id):
127
        """Fetch a blob of data."""
128
        try:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
129
            b = self._sticky_blobs[id]
0.64.169 by Ian Clatworthy
fix blob tracking when -v not given
130
            if self._blob_ref_counts and b != '':
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
131
                self._blob_ref_counts[id] -= 1
132
                if self._blob_ref_counts[id] == 0:
133
                    del self._sticky_blobs[id]
134
            return b
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
135
        except KeyError:
136
            return self._blobs.pop(id)
137
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
138
    def store_file_id(self, branch_ref, path, id):
139
        """Store the path to file-id mapping for a branch."""
140
        key = self._fileid_key(path, branch_ref)
141
        self._file_ids[key] = id
142
143
    def fetch_file_id(self, branch_ref, path):
144
        """Lookup the file-id for a path in a branch.
145
        
146
        Raises KeyError if unsuccessful.
147
        """
148
        key = self._fileid_key(path, branch_ref)
149
        return self._file_ids[key]
150
151
    def _fileid_key(self, path, branch_ref):
152
        return (path, branch_ref)
153
154
    def delete_path(self, branch_ref, path):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
155
        """Remove a path from caches."""
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
156
        # We actually want to remember what file-id we gave a path,
157
        # even when that file is deleted, so doing nothing is correct.
158
        # It's quite possible for a path to be deleted twice where
159
        # the first time is in a merge branch (but the same branch_ref)
160
        # and the second time is when that branch is merged to mainline.
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
161
        pass
162
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
163
    def rename_path(self, branch_ref, old_path, new_path):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
164
        """Rename a path in the caches."""
165
        # In this case, we need to forget the file-id we gave a path,
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
166
        # otherwise, we'll get duplicate file-ids in the repository
167
        # if a new file is created at the old path.
168
        old_key = self._fileid_key(old_path, branch_ref)
169
        new_key = self._fileid_key(new_path, branch_ref)
170
        try:
171
            old_file_id = self._file_ids[old_key]
172
        except KeyError:
173
            # The old_key has already been removed, most likely
174
            # in a merge branch.
175
            pass
176
        else:
177
            self._file_ids[new_key] = old_file_id
178
            del self._file_ids[old_key]
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
179
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
180
    def track_heads(self, cmd):
181
        """Track the repository heads given a CommitCommand.
182
        
183
        :param cmd: the CommitCommand
184
        :return: the list of parents in terms of commit-ids
185
        """
186
        # Get the true set of parents
187
        if cmd.from_ is not None:
188
            parents = [cmd.from_]
189
        else:
190
            last_id = self.last_ids.get(cmd.ref)
191
            if last_id is not None:
192
                parents = [last_id]
193
            else:
194
                parents = []
195
        parents.extend(cmd.merges)
196
197
        # Track the heads
198
        self.track_heads_for_ref(cmd.ref, cmd.id, parents)
199
        return parents
200
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
201
    def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
202
        if parents is not None:
203
            for parent in parents:
0.83.1 by Ian Clatworthy
head tracking tests and fix
204
                if parent in self.heads:
205
                    del self.heads[parent]
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
206
        self.heads.setdefault(cmd_id, set()).add(cmd_ref)
207
        self.last_ids[cmd_ref] = cmd_id
208
        self.last_ref = cmd_ref