/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""A manager of caches."""
18
19
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
20
from bzrlib import lru_cache, trace
21
from bzrlib.plugins.fastimport import helpers
0.64.118 by Ian Clatworthy
fix lru_cache import
22
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
23
class CacheManager(object):
24
0.83.1 by Ian Clatworthy
head tracking tests and fix
25
    def __init__(self, info=None, verbose=False, inventory_cache_size=10):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
26
        """Create a manager of caches.
27
28
        :param info: a ConfigObj holding the output from
29
            the --info processor, or None if no hints are available
30
        """
31
        self.verbose = verbose
32
33
        # dataref -> data. datref is either :mark or the sha-1.
34
        # Sticky blobs aren't removed after being referenced.
35
        self._blobs = {}
36
        self._sticky_blobs = {}
37
38
        # revision-id -> Inventory cache
39
        # these are large and we probably don't need too many as
40
        # most parents are recent in history
41
        self.inventories = lru_cache.LRUCache(inventory_cache_size)
42
43
        # import commmit-ids -> revision-id lookup table
44
        # we need to keep all of these but they are small
45
        self.revision_ids = {}
46
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
47
        # (path, branch_ref) -> file-ids - as generated.
48
        # (Use store_file_id/fetch_fileid methods rather than direct access.)
49
        self._file_ids = {}
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
50
51
        # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
52
        self.last_ref = None
53
        self.last_ids = {}
54
        self.heads = {}
55
56
        # Work out the blobs to make sticky - None means all
57
        self._blobs_to_keep = None
58
        if info is not None:
59
            try:
60
                self._blobs_to_keep = info['Blob usage tracking']['multi']
61
            except KeyError:
62
                # info not in file - possible when no blobs used
63
                pass
64
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
65
    def dump_stats(self, note=trace.note):
66
        """Dump some statistics about what we cached."""
67
        # TODO: add in inventory stastistics
68
        note("Cache statistics:")
69
        self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
70
        self._show_stats_for(self.revision_ids, "revision-ids", note=note)
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
71
        self._show_stats_for(self._file_ids, "file-ids", note=note,
72
            tuple_key=True)
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
73
        # These aren't interesting so omit from the output, at least for now
74
        #self._show_stats_for(self._blobs, "other blobs", note=note)
75
        #self._show_stats_for(self.last_ids, "last-ids", note=note)
76
        #self._show_stats_for(self.heads, "heads", note=note)
77
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
78
    def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
79
        """Dump statistics about a given dictionary.
80
81
        By the key and value need to support len().
82
        """
83
        count = len(dict)
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
84
        if tuple_key:
85
            size = sum(map(len, (''.join(k) for k in dict.keys())))
86
        else:
87
            size = sum(map(len, dict.keys()))
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
88
        size += sum(map(len, dict.values()))
89
        kbytes = size * 1.0 / 1024
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
90
        note("    %-12s: %8.1f kB (%d %s)" % (label, kbytes, count,
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
91
            helpers.single_plural(count, "item", "items")))
92
93
    def clear_all(self):
94
        """Free up any memory used by the caches."""
95
        self._blobs.clear()
96
        self._sticky_blobs.clear()
97
        self.revision_ids.clear()
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
98
        self._file_ids.clear()
0.64.153 by Ian Clatworthy
clear caches before packing; show cache stats in verbose mode
99
        self.last_ids.clear()
100
        self.heads.clear()
101
        self.inventories.clear()
102
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
103
    def store_blob(self, id, data):
104
        """Store a blob of data."""
105
        if (self._blobs_to_keep is None or data == '' or
106
            id in self._blobs_to_keep):
107
            self._sticky_blobs[id] = data
108
        else:
109
            self._blobs[id] = data
110
111
    def fetch_blob(self, id):
112
        """Fetch a blob of data."""
113
        try:
114
            return self._sticky_blobs[id]
115
        except KeyError:
116
            return self._blobs.pop(id)
117
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
118
    def store_file_id(self, branch_ref, path, id):
119
        """Store the path to file-id mapping for a branch."""
120
        key = self._fileid_key(path, branch_ref)
121
        self._file_ids[key] = id
122
123
    def fetch_file_id(self, branch_ref, path):
124
        """Lookup the file-id for a path in a branch.
125
        
126
        Raises KeyError if unsuccessful.
127
        """
128
        key = self._fileid_key(path, branch_ref)
129
        return self._file_ids[key]
130
131
    def _fileid_key(self, path, branch_ref):
132
        return (path, branch_ref)
133
134
    def delete_path(self, branch_ref, path):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
135
        """Remove a path from caches."""
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
136
        # We actually want to remember what file-id we gave a path,
137
        # even when that file is deleted, so doing nothing is correct.
138
        # It's quite possible for a path to be deleted twice where
139
        # the first time is in a merge branch (but the same branch_ref)
140
        # and the second time is when that branch is merged to mainline.
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
141
        pass
142
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
143
    def rename_path(self, branch_ref, old_path, new_path):
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
144
        """Rename a path in the caches."""
145
        # In this case, we need to forget the file-id we gave a path,
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
146
        # otherwise, we'll get duplicate file-ids in the repository
147
        # if a new file is created at the old path.
148
        old_key = self._fileid_key(old_path, branch_ref)
149
        new_key = self._fileid_key(new_path, branch_ref)
150
        try:
151
            old_file_id = self._file_ids[old_key]
152
        except KeyError:
153
            # The old_key has already been removed, most likely
154
            # in a merge branch.
155
            pass
156
        else:
157
            self._file_ids[new_key] = old_file_id
158
            del self._file_ids[old_key]
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
159
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
160
    def track_heads(self, cmd):
161
        """Track the repository heads given a CommitCommand.
162
        
163
        :param cmd: the CommitCommand
164
        :return: the list of parents in terms of commit-ids
165
        """
166
        # Get the true set of parents
167
        if cmd.from_ is not None:
168
            parents = [cmd.from_]
169
        else:
170
            last_id = self.last_ids.get(cmd.ref)
171
            if last_id is not None:
172
                parents = [last_id]
173
            else:
174
                parents = []
175
        parents.extend(cmd.merges)
176
177
        # Track the heads
178
        self.track_heads_for_ref(cmd.ref, cmd.id, parents)
179
        return parents
180
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
181
    def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
182
        if parents is not None:
183
            for parent in parents:
0.83.1 by Ian Clatworthy
head tracking tests and fix
184
                if parent in self.heads:
185
                    del self.heads[parent]
0.78.3 by Ian Clatworthy
move GenericCacheManager into its own module
186
        self.heads.setdefault(cmd_id, set()).add(cmd_ref)
187
        self.last_ids[cmd_ref] = cmd_id
188
        self.last_ref = cmd_ref