/loggerhead/trunk : contents of loggerhead/changecache.py at revision 269.1.11

: (revision 269.1.11)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/loggerhead/trunk

#
# Copyright (C) 2006  Robey Pointer <robey@lag.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

"""
a cache for chewed-up "change" data structures, which are basically just a
different way of storing a revision.  the cache improves lookup times 10x
over bazaar's xml revision structure, though, so currently still worth doing.

once a revision is committed in bazaar, it never changes, so once we have
cached a change, it's good forever.
"""

import cPickle
import os

from loggerhead import util
from loggerhead.lockfile import LockFile

with_lock = util.with_lock('_lock', 'ChangeCache')

try:
    from sqlite3 import dbapi2
except ImportError:
    from pysqlite2 import dbapi2


class FakeShelf(object):

    def __init__(self, filename):
        create_table = not os.path.exists(filename)
        self.connection = dbapi2.connect(filename)
        self.cursor = self.connection.cursor()
        if create_table:
            self._create_table()

    def _create_table(self):
        self.cursor.execute(
            "create table RevisionData "
            "(revid binary primary key, data binary)")
        self.connection.commit()

    def _serialize(self, obj):
        r = dbapi2.Binary(cPickle.dumps(obj, protocol=2))
        return r

    def _unserialize(self, data):
        return cPickle.loads(str(data))

    def get(self, revid):
        self.cursor.execute(
            "select data from revisiondata where revid = ?", (revid, ))
        filechange = self.cursor.fetchone()
        if filechange is None:
            return None
        else:
            return self._unserialize(filechange[0])

    def add(self, revid_obj_pairs):
        for (r, d) in revid_obj_pairs:
            self.cursor.execute(
                "insert into revisiondata (revid, data) values (?, ?)",
                (r, self._serialize(d)))
        self.connection.commit()


class FileChangeCache(object):

    def __init__(self, history, cache_path):
        self.history = history

        if not os.path.exists(cache_path):
            os.mkdir(cache_path)

        self._changes_filename = os.path.join(cache_path, 'filechanges.sql')

        # use a lockfile since the cache folder could be shared across
        # different processes.
        self._lock = LockFile(os.path.join(cache_path, 'filechange-lock'))

    @with_lock
    def get_file_changes(self, entries):
        out = []
        missing_entries = []
        missing_entry_indices = []
        cache = FakeShelf(self._changes_filename)
        for entry in entries:
            changes = cache.get(entry.revid)
            if changes is not None:
                out.append(changes)
            else:
                missing_entries.append(entry)
                missing_entry_indices.append(len(out))
                out.append(None)
        if missing_entries:
            missing_changes = self.history.get_file_changes_uncached(
                                  missing_entries)
            revid_changes_pairs = []
            for i, entry, changes in zip(
                missing_entry_indices, missing_entries, missing_changes):
                revid_changes_pairs.append((entry.revid, changes))
                out[i] = changes
            cache.add(revid_changes_pairs)
        return out

47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	1	#
	2	# Copyright (C) 2006 Robey Pointer <robey@lag.net>
	3	#
	4	# This program is free software; you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation; either version 2 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program; if not, write to the Free Software
	16	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	#
	18
	19	"""
	20	a cache for chewed-up "change" data structures, which are basically just a
	21	different way of storing a revision. the cache improves lookup times 10x
	22	over bazaar's xml revision structure, though, so currently still worth doing.
	23
	24	once a revision is committed in bazaar, it never changes, so once we have
	25	cached a change, it's good forever.
	26	"""
	27
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	28	import cPickle
47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	29	import os
	30
	31	from loggerhead import util
69 by Robey Pointer switch the cache and text index to use file locking so they can be used by	32	from loggerhead.lockfile import LockFile
47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	33
49 by Robey Pointer add top-level page listing available branches. also a patch from matty to not require external-url in atom feeds any more	34	with_lock = util.with_lock('_lock', 'ChangeCache')
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	35
221.1.1 by Michael Hudson as branch nick	36	try:
189 by Martin Albisetti Use sqlite3 by default	37	from sqlite3 import dbapi2
221.1.1 by Michael Hudson as branch nick	38	except ImportError:
221.1.1 by Michael Hudson as branch nick	39	from pysqlite2 import dbapi2
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	40
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	41
	42	class FakeShelf(object):
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	43
128.4.6 by Michael Hudson remove one layer of ick	44	def __init__(self, filename):
128.4.6 by Michael Hudson remove one layer of ick	45	create_table = not os.path.exists(filename)
128.4.8 by Michael Hudson remove storm dependency (it'll be back)	46	self.connection = dbapi2.connect(filename)
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	47	self.cursor = self.connection.cursor()
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	48	if create_table:
128.4.8 by Michael Hudson remove storm dependency (it'll be back)	49	self._create_table()
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	50
128.4.8 by Michael Hudson remove storm dependency (it'll be back)	51	def _create_table(self):
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	52	self.cursor.execute(
128.4.6 by Michael Hudson remove one layer of ick	53	"create table RevisionData "
128.4.9 by Michael Hudson gar, fix problem with NULLs	54	"(revid binary primary key, data binary)")
128.4.8 by Michael Hudson remove storm dependency (it'll be back)	55	self.connection.commit()
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	56
128.4.9 by Michael Hudson gar, fix problem with NULLs	57	def _serialize(self, obj):
	58	r = dbapi2.Binary(cPickle.dumps(obj, protocol=2))
	59	return r
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	60
128.4.9 by Michael Hudson gar, fix problem with NULLs	61	def _unserialize(self, data):
128.4.10 by Michael Hudson don't commit so often when building the textindex search	62	return cPickle.loads(str(data))
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	63
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	64	def get(self, revid):
221.1.2 by Michael Hudson remove pointless parameterization	65	self.cursor.execute(
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	66	"select data from revisiondata where revid = ?", (revid, ))
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	67	filechange = self.cursor.fetchone()
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	68	if filechange is None:
	69	return None
	70	else:
128.4.9 by Michael Hudson gar, fix problem with NULLs	71	return self._unserialize(filechange[0])
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	72
159.1.7 by Michael Hudson more ruthless code deleting	73	def add(self, revid_obj_pairs):
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	74	for (r, d) in revid_obj_pairs:
221.1.2 by Michael Hudson remove pointless parameterization	75	self.cursor.execute(
	76	"insert into revisiondata (revid, data) values (?, ?)",
	77	(r, self._serialize(d)))
159.1.7 by Michael Hudson more ruthless code deleting	78	self.connection.commit()
47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	79
128.13.20 by Martin Albisetti Merge from trunk! Yay!	80
128.1.55 by Michael Hudson plumbing for a file change cache	81	class FileChangeCache(object):
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	82
128.1.55 by Michael Hudson plumbing for a file change cache	83	def __init__(self, history, cache_path):
	84	self.history = history
	85
	86	if not os.path.exists(cache_path):
	87	os.mkdir(cache_path)
	88
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	89	self._changes_filename = os.path.join(cache_path, 'filechanges.sql')
128.1.55 by Michael Hudson plumbing for a file change cache	90
	91	# use a lockfile since the cache folder could be shared across
	92	# different processes.
128.1.57 by Michael Hudson use a different lock file for the different caches	93	self._lock = LockFile(os.path.join(cache_path, 'filechange-lock'))
128.1.55 by Michael Hudson plumbing for a file change cache	94
	95	@with_lock
	96	def get_file_changes(self, entries):
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	97	out = []
	98	missing_entries = []
	99	missing_entry_indices = []
128.4.14 by Michael Hudson oops	100	cache = FakeShelf(self._changes_filename)
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	101	for entry in entries:
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	102	changes = cache.get(entry.revid)
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	103	if changes is not None:
	104	out.append(changes)
	105	else:
	106	missing_entries.append(entry)
	107	missing_entry_indices.append(len(out))
	108	out.append(None)
128.4.3 by Michael Hudson use storm for the sqlite cache, insert objects in batches	109	if missing_entries:
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	110	missing_changes = self.history.get_file_changes_uncached(
230.1.1 by Steve 'Ashcrow' Milner Updated to follow pep8.	111	missing_entries)
128.4.3 by Michael Hudson use storm for the sqlite cache, insert objects in batches	112	revid_changes_pairs = []
	113	for i, entry, changes in zip(
	114	missing_entry_indices, missing_entries, missing_changes):
	115	revid_changes_pairs.append((entry.revid, changes))
	116	out[i] = changes
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	117	cache.add(revid_changes_pairs)
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	118	return out