# Copyright (C) 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""A manager of caches."""


from bzrlib import lru_cache, trace
from bzrlib.plugins.fastimport import helpers

class CacheManager(object):

    def __init__(self, info=None, verbose=False, inventory_cache_size=10):
        """Create a manager of caches.

        :param info: a ConfigObj holding the output from
            the --info processor, or None if no hints are available
        """
        self.verbose = verbose

        # dataref -> data. datref is either :mark or the sha-1.
        # Sticky blobs aren't removed after being referenced.
        self._blobs = {}
        self._sticky_blobs = {}

        # revision-id -> Inventory cache
        # these are large and we probably don't need too many as
        # most parents are recent in history
        self.inventories = lru_cache.LRUCache(inventory_cache_size)

        # import commmit-ids -> revision-id lookup table
        # we need to keep all of these but they are small
        self.revision_ids = {}

        # (path, branch_ref) -> file-ids - as generated.
        # (Use store_file_id/fetch_fileid methods rather than direct access.)
        self._file_ids = {}

        # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
        self.last_ref = None
        self.last_ids = {}
        self.heads = {}

        # Work out the blobs to make sticky - None means all
        self._blob_ref_counts = {}
        if info is not None:
            try:
                blobs_by_counts = info['Blob reference counts']
                # The parser hands values back as lists, already parsed
                for count, blob_list in blobs_by_counts.items():
                    n = int(count)
                    for b in blob_list:
                        self._blob_ref_counts[b] = n
            except KeyError:
                # info not in file - possible when no blobs used
                pass

    def dump_stats(self, note=trace.note):
        """Dump some statistics about what we cached."""
        # TODO: add in inventory stastistics
        note("Cache statistics:")
        self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
        self._show_stats_for(self.revision_ids, "revision-ids", note=note)
        self._show_stats_for(self._file_ids, "file-ids", note=note,
            tuple_key=True)
        # These aren't interesting so omit from the output, at least for now
        #self._show_stats_for(self._blobs, "other blobs", note=note)
        #self._show_stats_for(self.last_ids, "last-ids", note=note)
        #self._show_stats_for(self.heads, "heads", note=note)

    def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
        """Dump statistics about a given dictionary.

        By the key and value need to support len().
        """
        count = len(dict)
        if tuple_key:
            size = sum(map(len, (''.join(k) for k in dict.keys())))
        else:
            size = sum(map(len, dict.keys()))
        size += sum(map(len, dict.values()))
        size = size * 1.0 / 1024
        unit = 'K'
        if size > 1024:
            size = size / 1024
            unit = 'M'
            if size > 1024:
                size = size / 1024
                unit = 'G'
        note("    %-12s: %8.1f %s (%d %s)" % (label, size, unit, count,
            helpers.single_plural(count, "item", "items")))

    def clear_all(self):
        """Free up any memory used by the caches."""
        self._blobs.clear()
        self._sticky_blobs.clear()
        self.revision_ids.clear()
        self._file_ids.clear()
        self.last_ids.clear()
        self.heads.clear()
        self.inventories.clear()

    def store_blob(self, id, data):
        """Store a blob of data."""
        # Note: If we're not reference counting, everything has to be sticky
        if not self._blob_ref_counts or id in self._blob_ref_counts:
            self._sticky_blobs[id] = data
        elif data == '':
            # Empty data is always sticky
            self._sticky_blobs[id] = data
        else:
            self._blobs[id] = data

    def fetch_blob(self, id):
        """Fetch a blob of data."""
        try:
            b = self._sticky_blobs[id]
            if self._blob_ref_counts and b != '':
                self._blob_ref_counts[id] -= 1
                if self._blob_ref_counts[id] == 0:
                    del self._sticky_blobs[id]
            return b
        except KeyError:
            return self._blobs.pop(id)

    def store_file_id(self, branch_ref, path, id):
        """Store the path to file-id mapping for a branch."""
        key = self._fileid_key(path, branch_ref)
        self._file_ids[key] = id

    def fetch_file_id(self, branch_ref, path):
        """Lookup the file-id for a path in a branch.
        
        Raises KeyError if unsuccessful.
        """
        key = self._fileid_key(path, branch_ref)
        return self._file_ids[key]

    def _fileid_key(self, path, branch_ref):
        return (path, branch_ref)

    def delete_path(self, branch_ref, path):
        """Remove a path from caches."""
        # We actually want to remember what file-id we gave a path,
        # even when that file is deleted, so doing nothing is correct.
        # It's quite possible for a path to be deleted twice where
        # the first time is in a merge branch (but the same branch_ref)
        # and the second time is when that branch is merged to mainline.
        pass

    def rename_path(self, branch_ref, old_path, new_path):
        """Rename a path in the caches."""
        # In this case, we need to forget the file-id we gave a path,
        # otherwise, we'll get duplicate file-ids in the repository
        # if a new file is created at the old path.
        old_key = self._fileid_key(old_path, branch_ref)
        new_key = self._fileid_key(new_path, branch_ref)
        try:
            old_file_id = self._file_ids[old_key]
        except KeyError:
            # The old_key has already been removed, most likely
            # in a merge branch.
            pass
        else:
            self._file_ids[new_key] = old_file_id
            del self._file_ids[old_key]

    def track_heads(self, cmd):
        """Track the repository heads given a CommitCommand.
        
        :param cmd: the CommitCommand
        :return: the list of parents in terms of commit-ids
        """
        # Get the true set of parents
        if cmd.from_ is not None:
            parents = [cmd.from_]
        else:
            last_id = self.last_ids.get(cmd.ref)
            if last_id is not None:
                parents = [last_id]
            else:
                parents = []
        parents.extend(cmd.merges)

        # Track the heads
        self.track_heads_for_ref(cmd.ref, cmd.id, parents)
        return parents

    def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
        if parents is not None:
            for parent in parents:
                if parent in self.heads:
                    del self.heads[parent]
        self.heads.setdefault(cmd_id, set()).add(cmd_ref)
        self.last_ids[cmd_ref] = cmd_id
        self.last_ref = cmd_ref