/loggerhead/trunk : contents of loggerhead/changecache.py at revision 157.3.1

: (revision 157.3.1)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/loggerhead/trunk

#
# Copyright (C) 2006  Robey Pointer <robey@lag.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

"""
a cache for chewed-up "change" data structures, which are basically just a
different way of storing a revision.  the cache improves lookup times 10x
over bazaar's xml revision structure, though, so currently still worth doing.

once a revision is committed in bazaar, it never changes, so once we have
cached a change, it's good forever.
"""

import cPickle
import os

from loggerhead import util
from loggerhead.lockfile import LockFile

with_lock = util.with_lock('_lock', 'ChangeCache')

SQLITE_INTERFACE = os.environ.get('SQLITE_INTERFACE', 'sqlite')

if SQLITE_INTERFACE == 'pysqlite2':
    from pysqlite2 import dbapi2
    _param_marker = '?'
elif SQLITE_INTERFACE == 'sqlite':
    import sqlite as dbapi2
    _param_marker = '%s'


_select_stmt = ("select data from revisiondata where revid = ?"
                ).replace('?', _param_marker)
_insert_stmt = ("insert into revisiondata (revid, data) "
                "values (?, ?)").replace('?', _param_marker)




class FakeShelf(object):
    def __init__(self, filename):
        create_table = not os.path.exists(filename)
        self.connection = dbapi2.connect(filename)
        self.cursor = self.connection.cursor()
        if create_table:
            self._create_table()
    def _create_table(self):
        self.cursor.execute(
            "create table RevisionData "
            "(revid binary primary key, data binary)")
        self.connection.commit()
    def _serialize(self, obj):
        r = dbapi2.Binary(cPickle.dumps(obj, protocol=2))
        return r
    def _unserialize(self, data):
        return cPickle.loads(str(data))
    def get(self, revid):
        self.cursor.execute(_select_stmt, (revid,))
        filechange = self.cursor.fetchone()
        if filechange is None:
            return None
        else:
            return self._unserialize(filechange[0])
    def add(self, revid_obj_pairs):
        for  (r, d) in revid_obj_pairs:
            self.cursor.execute(_insert_stmt, (r, self._serialize(d)))
        self.connection.commit()


class FileChangeCache(object):
    def __init__(self, history, cache_path):
        self.history = history

        if not os.path.exists(cache_path):
            os.mkdir(cache_path)

        self._changes_filename = os.path.join(cache_path, 'filechanges.sql')

        # use a lockfile since the cache folder could be shared across
        # different processes.
        self._lock = LockFile(os.path.join(cache_path, 'filechange-lock'))

    @with_lock
    def get_file_changes(self, entries):
        out = []
        missing_entries = []
        missing_entry_indices = []
        cache = FakeShelf(self._changes_filename)
        for entry in entries:
            changes = cache.get(entry.revid)
            if changes is not None:
                out.append(changes)
            else:
                missing_entries.append(entry)
                missing_entry_indices.append(len(out))
                out.append(None)
        if missing_entries:
            missing_changes = self.history.get_file_changes_uncached(missing_entries)
            revid_changes_pairs = []
            for i, entry, changes in zip(
                missing_entry_indices, missing_entries, missing_changes):
                revid_changes_pairs.append((entry.revid, changes))
                out[i] = changes
            cache.add(revid_changes_pairs)
        return out

47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	1	#
	2	# Copyright (C) 2006 Robey Pointer <robey@lag.net>
	3	#
	4	# This program is free software; you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation; either version 2 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU General Public License
	15	# along with this program; if not, write to the Free Software
	16	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17	#
	18
	19	"""
	20	a cache for chewed-up "change" data structures, which are basically just a
	21	different way of storing a revision. the cache improves lookup times 10x
	22	over bazaar's xml revision structure, though, so currently still worth doing.
	23
	24	once a revision is committed in bazaar, it never changes, so once we have
	25	cached a change, it's good forever.
	26	"""
	27
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	28	import cPickle
47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	29	import os
	30
	31	from loggerhead import util
69 by Robey Pointer switch the cache and text index to use file locking so they can be used by	32	from loggerhead.lockfile import LockFile
47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	33
49 by Robey Pointer add top-level page listing available branches. also a patch from matty to not require external-url in atom feeds any more	34	with_lock = util.with_lock('_lock', 'ChangeCache')
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	35
128.4.15 by Michael Hudson barry suggested i switch sqlite interface based on an environment variable, which makes much sense	36	SQLITE_INTERFACE = os.environ.get('SQLITE_INTERFACE', 'sqlite')
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	37
	38	if SQLITE_INTERFACE == 'pysqlite2':
	39	from pysqlite2 import dbapi2
	40	_param_marker = '?'
	41	elif SQLITE_INTERFACE == 'sqlite':
	42	import sqlite as dbapi2
	43	_param_marker = '%s'
173.1.2 by Martin Albisetti * Try to load sqlite, continue anyway if not available, but without caching	44
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	45
	46	_select_stmt = ("select data from revisiondata where revid = ?"
	47	).replace('?', _param_marker)
	48	_insert_stmt = ("insert into revisiondata (revid, data) "
	49	"values (?, ?)").replace('?', _param_marker)
	50
	51
	52
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	53
	54	class FakeShelf(object):
128.4.6 by Michael Hudson remove one layer of ick	55	def __init__(self, filename):
128.4.6 by Michael Hudson remove one layer of ick	56	create_table = not os.path.exists(filename)
128.4.8 by Michael Hudson remove storm dependency (it'll be back)	57	self.connection = dbapi2.connect(filename)
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	58	self.cursor = self.connection.cursor()
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	59	if create_table:
128.4.8 by Michael Hudson remove storm dependency (it'll be back)	60	self._create_table()
	61	def _create_table(self):
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	62	self.cursor.execute(
128.4.6 by Michael Hudson remove one layer of ick	63	"create table RevisionData "
128.4.9 by Michael Hudson gar, fix problem with NULLs	64	"(revid binary primary key, data binary)")
128.4.8 by Michael Hudson remove storm dependency (it'll be back)	65	self.connection.commit()
128.4.9 by Michael Hudson gar, fix problem with NULLs	66	def _serialize(self, obj):
	67	r = dbapi2.Binary(cPickle.dumps(obj, protocol=2))
	68	return r
	69	def _unserialize(self, data):
128.4.10 by Michael Hudson don't commit so often when building the textindex search	70	return cPickle.loads(str(data))
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	71	def get(self, revid):
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	72	self.cursor.execute(_select_stmt, (revid,))
	73	filechange = self.cursor.fetchone()
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	74	if filechange is None:
	75	return None
	76	else:
128.4.9 by Michael Hudson gar, fix problem with NULLs	77	return self._unserialize(filechange[0])
159.1.7 by Michael Hudson more ruthless code deleting	78	def add(self, revid_obj_pairs):
128.4.11 by Michael Hudson fiddle things around so you can use the python-sqlite or python-pysqlite	79	for (r, d) in revid_obj_pairs:
	80	self.cursor.execute(_insert_stmt, (r, self._serialize(d)))
159.1.7 by Michael Hudson more ruthless code deleting	81	self.connection.commit()
47 by Robey Pointer slowly moving the branch-specific stuff into a common structure...	82
128.13.20 by Martin Albisetti Merge from trunk! Yay!	83
128.1.55 by Michael Hudson plumbing for a file change cache	84	class FileChangeCache(object):
	85	def __init__(self, history, cache_path):
	86	self.history = history
	87
	88	if not os.path.exists(cache_path):
	89	os.mkdir(cache_path)
	90
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	91	self._changes_filename = os.path.join(cache_path, 'filechanges.sql')
128.1.55 by Michael Hudson plumbing for a file change cache	92
	93	# use a lockfile since the cache folder could be shared across
	94	# different processes.
128.1.57 by Michael Hudson use a different lock file for the different caches	95	self._lock = LockFile(os.path.join(cache_path, 'filechange-lock'))
128.1.55 by Michael Hudson plumbing for a file change cache	96
	97	@with_lock
	98	def get_file_changes(self, entries):
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	99	out = []
	100	missing_entries = []
	101	missing_entry_indices = []
128.4.14 by Michael Hudson oops	102	cache = FakeShelf(self._changes_filename)
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	103	for entry in entries:
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	104	changes = cache.get(entry.revid)
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	105	if changes is not None:
	106	out.append(changes)
	107	else:
	108	missing_entries.append(entry)
	109	missing_entry_indices.append(len(out))
	110	out.append(None)
128.4.3 by Michael Hudson use storm for the sqlite cache, insert objects in batches	111	if missing_entries:
	112	missing_changes = self.history.get_file_changes_uncached(missing_entries)
	113	revid_changes_pairs = []
	114	for i, entry, changes in zip(
	115	missing_entry_indices, missing_entries, missing_changes):
	116	revid_changes_pairs.append((entry.revid, changes))
	117	out[i] = changes
128.4.5 by Michael Hudson reorganizations, cleanups. still utterly horrid though.	118	cache.add(revid_changes_pairs)
128.4.2 by Michael Hudson rather brainlessly store the filechange cache in a sql database instead of a shelve store	119	return out