1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
31
from bzrlib.transport import (
38
from xdg.BaseDirectory import xdg_cache_home
40
from bzrlib.config import config_dir
41
ret = os.path.join(config_dir(), "git")
43
ret = os.path.join(xdg_cache_home, "bazaar", "git")
44
if not os.path.isdir(ret):
49
def get_remote_cache_transport():
50
return get_transport(get_cache_dir())
53
def check_pysqlite_version(sqlite3):
54
"""Check that sqlite library is compatible.
57
if (sqlite3.sqlite_version_info[0] < 3 or
58
(sqlite3.sqlite_version_info[0] == 3 and
59
sqlite3.sqlite_version_info[1] < 3)):
60
trace.warning('Needs at least sqlite 3.3.x')
61
raise bzrlib.errors.BzrError("incompatible sqlite library")
66
check_pysqlite_version(sqlite3)
67
except (ImportError, bzrlib.errors.BzrError), e:
68
from pysqlite2 import dbapi2 as sqlite3
69
check_pysqlite_version(sqlite3)
71
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
73
raise bzrlib.errors.BzrError("missing sqlite library")
76
_mapdbs = threading.local()
78
"""Get a cache for this thread's db connections."""
81
except AttributeError:
86
class GitShaMap(object):
87
"""Git<->Bzr revision id mapping database."""
89
def lookup_git_sha(self, sha):
90
"""Lookup a Git sha in the database.
91
:param sha: Git object sha
92
:return: (type, type_data) with type_data:
93
revision: revid, tree sha
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
111
"""List the revision ids known."""
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
122
"""List the SHA1s."""
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
raise NotImplementedError(self.get_format_string)
148
def open(self, transport):
149
raise NotImplementedError(self.open)
151
def initialize(self, transport):
152
transport.put_bytes('format', self.get_format_string())
155
def from_repository(self, repository):
156
repo_transport = getattr(repository, "_transport", None)
157
if repo_transport is not None:
159
repo_transport.mkdir('git')
160
except bzrlib.errors.FileExists:
162
transport = repo_transport.clone('git')
164
transport = get_remote_cache_transport()
166
format_name = transport.get_bytes('format')
167
format = formats.get(format_name)
168
except bzrlib.errors.NoSuchFile:
169
format = formats.get('default')
170
format.initialize(transport)
171
return format.open(transport)
174
class CacheUpdater(object):
176
def add_object(self, obj, ie):
177
raise NotImplementedError(self.add_object)
180
raise NotImplementedError(self.finish)
183
class BzrGitCache(object):
184
"""Caching backend."""
186
def __init__(self, idmap, content_cache, cache_updater_klass):
188
self.content_cache = content_cache
189
self._cache_updater_klass = cache_updater_klass
191
def get_updater(self, rev):
192
return self._cache_updater_klass(self, rev)
195
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
198
class DictCacheUpdater(CacheUpdater):
200
def __init__(self, cache, rev):
202
self.revid = rev.revision_id
203
self.parent_revids = rev.parent_ids
207
def add_object(self, obj, ie):
208
if obj.type_name == "commit":
211
type_data = (self.revid, self._commit.tree)
212
elif obj.type_name in ("blob", "tree"):
213
if obj.type_name == "blob":
214
revision = ie.revision
216
revision = self.revid
217
type_data = (ie.file_id, revision)
218
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
221
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
224
if self._commit is None:
225
raise AssertionError("No commit object added")
229
class DictGitShaMap(GitShaMap):
235
def lookup_blob_id(self, fileid, revision):
236
return self._by_fileid[revision][fileid]
238
def lookup_git_sha(self, sha):
239
return self._by_sha[sha]
241
def lookup_tree_id(self, fileid, revision):
242
return self._base._by_fileid[revision][fileid]
245
for key, (type, type_data) in self._by_sha.iteritems():
250
return self._by_sha.iterkeys()
253
class SqliteCacheUpdater(CacheUpdater):
255
def __init__(self, cache, rev):
257
self.db = self.cache.idmap.db
258
self.revid = rev.revision_id
263
def add_object(self, obj, ie):
264
if obj.type_name == "commit":
267
elif obj.type_name == "tree":
268
self._trees.append((obj.id, ie.file_id, self.revid))
269
elif obj.type_name == "blob":
270
self._blobs.append((obj.id, ie.file_id, ie.revision))
275
if self._commit is None:
276
raise AssertionError("No commit object added")
278
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
281
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
284
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
285
(self._commit.id, self.revid, self._commit.tree))
289
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
292
class SqliteGitCacheFormat(BzrGitCacheFormat):
294
def get_format_string(self):
295
return 'bzr-git sha map version 1 using sqlite\n'
297
def open(self, transport):
299
basepath = transport.local_abspath(".")
300
except bzrlib.errors.NotLocalUrl:
301
basepath = get_cache_dir()
302
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
305
class SqliteGitShaMap(GitShaMap):
307
def __init__(self, path=None):
310
self.db = sqlite3.connect(":memory:")
312
if not mapdbs().has_key(path):
313
mapdbs()[path] = sqlite3.connect(path)
314
self.db = mapdbs()[path]
315
self.db.text_factory = str
316
self.db.executescript("""
317
create table if not exists commits(
318
sha1 text not null check(length(sha1) == 40),
320
tree_sha text not null check(length(tree_sha) == 40)
322
create index if not exists commit_sha1 on commits(sha1);
323
create unique index if not exists commit_revid on commits(revid);
324
create table if not exists blobs(
325
sha1 text not null check(length(sha1) == 40),
326
fileid text not null,
329
create index if not exists blobs_sha1 on blobs(sha1);
330
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
331
create table if not exists trees(
332
sha1 text unique not null check(length(sha1) == 40),
333
fileid text not null,
336
create unique index if not exists trees_sha1 on trees(sha1);
337
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
341
return "%s(%r)" % (self.__class__.__name__, self.path)
343
def lookup_commit(self, revid):
344
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
349
def commit_write_group(self):
352
def lookup_blob_id(self, fileid, revision):
353
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
356
raise KeyError(fileid)
358
def lookup_tree_id(self, fileid, revision):
359
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, self.revid)).fetchone()
362
raise KeyError(fileid)
364
def lookup_git_sha(self, sha):
365
"""Lookup a Git sha in the database.
367
:param sha: Git object sha
368
:return: (type, type_data) with type_data:
369
revision: revid, tree sha
371
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
373
return ("commit", row)
374
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
377
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
383
"""List the revision ids known."""
384
return (row for (row,) in self.db.execute("select revid from commits"))
387
"""List the SHA1s."""
388
for table in ("blobs", "commits", "trees"):
389
for (sha,) in self.db.execute("select sha1 from %s" % table):
393
class TdbCacheUpdater(CacheUpdater):
395
def __init__(self, cache, rev):
397
self.db = cache.idmap.db
398
self.revid = rev.revision_id
399
self.parent_revids = rev.parent_ids
403
def add_object(self, obj, ie):
404
sha = obj.sha().digest()
405
if obj.type_name == "commit":
406
self.db["commit\0" + self.revid] = "\0".join((obj.id, obj.tree))
407
type_data = (self.revid, obj.tree)
410
elif obj.type_name == "blob":
411
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
412
type_data = (ie.file_id, ie.revision)
413
elif obj.type_name == "tree":
414
type_data = (ie.file_id, self.revid)
417
self.db["git\0" + sha] = "\0".join((obj.type_name,
418
type_data[0], type_data[1]))
421
if self._commit is None:
422
raise AssertionError("No commit object added")
426
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
428
class TdbGitCacheFormat(BzrGitCacheFormat):
430
def get_format_string(self):
431
return 'bzr-git sha map version 3 using tdb\n'
433
def open(self, transport):
435
basepath = transport.local_abspath(".")
436
except bzrlib.errors.NotLocalUrl:
437
basepath = get_cache_dir()
439
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
442
"Unable to open existing bzr-git cache because 'tdb' is not "
446
class TdbGitShaMap(GitShaMap):
447
"""SHA Map that uses a TDB database.
451
"git <sha1>" -> "<type> <type-data1> <type-data2>"
452
"commit revid" -> "<sha1> <tree-id>"
453
"tree fileid revid" -> "<sha1>"
454
"blob fileid revid" -> "<sha1>"
458
TDB_HASH_SIZE = 50000
460
def __init__(self, path=None):
466
if not mapdbs().has_key(path):
467
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
468
os.O_RDWR|os.O_CREAT)
469
self.db = mapdbs()[path]
471
if int(self.db["version"]) not in (2, 3):
472
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
473
self.db["version"], self.TDB_MAP_VERSION)
477
self.db["version"] = str(self.TDB_MAP_VERSION)
479
def start_write_group(self):
480
"""Start writing changes."""
481
self.db.transaction_start()
483
def commit_write_group(self):
484
"""Commit any pending changes."""
485
self.db.transaction_commit()
487
def abort_write_group(self):
488
"""Abort any pending changes."""
489
self.db.transaction_cancel()
492
return "%s(%r)" % (self.__class__.__name__, self.path)
494
def lookup_commit(self, revid):
495
return sha_to_hex(self.db["commit\0" + revid][:20])
497
def lookup_blob_id(self, fileid, revision):
498
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
500
def lookup_git_sha(self, sha):
501
"""Lookup a Git sha in the database.
503
:param sha: Git object sha
504
:return: (type, type_data) with type_data:
505
revision: revid, tree sha
508
sha = hex_to_sha(sha)
509
data = self.db["git\0" + sha].split("\0")
510
return (data[0], (data[1], data[2]))
512
def missing_revisions(self, revids):
515
if self.db.get("commit\0" + revid) is None:
520
"""List the revision ids known."""
521
for key in self.db.iterkeys():
522
if key.startswith("commit\0"):
526
"""List the SHA1s."""
527
for key in self.db.iterkeys():
528
if key.startswith("git\0"):
529
yield sha_to_hex(key[4:])
532
formats = registry.Registry()
533
formats.register(TdbGitCacheFormat().get_format_string(),
535
formats.register(SqliteGitCacheFormat().get_format_string(),
536
SqliteGitCacheFormat())
540
formats.register('default', SqliteGitCacheFormat())
542
formats.register('default', TdbGitCacheFormat())
545
def migrate_ancient_formats(repo_transport):
546
if repo_transport.has("git.tdb"):
547
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
548
repo_transport.rename("git.tdb", "git/idmap.tdb")
549
elif repo_transport.has("git.db"):
550
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
551
repo_transport.rename("git.db", "git/idmap.db")
554
def from_repository(repository):
555
repo_transport = getattr(repository, "_transport", None)
556
if repo_transport is not None:
557
# Migrate older cache formats
559
repo_transport.mkdir("git")
560
except bzrlib.errors.FileExists:
563
migrate_ancient_formats(repo_transport)
564
return BzrGitCacheFormat.from_repository(repository)