1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
31
from bzrlib.transport import (
38
from xdg.BaseDirectory import xdg_cache_home
40
from bzrlib.config import config_dir
41
ret = os.path.join(config_dir(), "git")
43
ret = os.path.join(xdg_cache_home, "bazaar", "git")
44
if not os.path.isdir(ret):
49
def get_remote_cache_transport():
50
return get_transport(get_cache_dir())
53
def check_pysqlite_version(sqlite3):
54
"""Check that sqlite library is compatible.
57
if (sqlite3.sqlite_version_info[0] < 3 or
58
(sqlite3.sqlite_version_info[0] == 3 and
59
sqlite3.sqlite_version_info[1] < 3)):
60
trace.warning('Needs at least sqlite 3.3.x')
61
raise bzrlib.errors.BzrError("incompatible sqlite library")
66
check_pysqlite_version(sqlite3)
67
except (ImportError, bzrlib.errors.BzrError), e:
68
from pysqlite2 import dbapi2 as sqlite3
69
check_pysqlite_version(sqlite3)
71
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
73
raise bzrlib.errors.BzrError("missing sqlite library")
76
_mapdbs = threading.local()
78
"""Get a cache for this thread's db connections."""
81
except AttributeError:
86
class GitShaMap(object):
87
"""Git<->Bzr revision id mapping database."""
89
def lookup_git_sha(self, sha):
90
"""Lookup a Git sha in the database.
91
:param sha: Git object sha
92
:return: (type, type_data) with type_data:
93
revision: revid, tree sha
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
111
"""List the revision ids known."""
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
122
"""List the SHA1s."""
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
raise NotImplementedError(self.get_format_string)
148
def open(self, transport):
149
raise NotImplementedError(self.open)
151
def initialize(self, transport):
152
transport.put_bytes('format', self.get_format_string())
155
def from_repository(self, repository):
156
repo_transport = getattr(repository, "_transport", None)
157
if repo_transport is not None:
159
repo_transport.mkdir('git')
160
except bzrlib.errors.FileExists:
162
transport = repo_transport.clone('git')
164
transport = get_remote_cache_transport()
166
format_name = transport.get_bytes('format')
167
format = formats.get(format_name)
168
except bzrlib.errors.NoSuchFile:
169
format = formats.get('default')
170
format.initialize(transport)
171
return format.open(transport)
174
class CacheUpdater(object):
176
def add_object(self, obj, ie):
177
raise NotImplementedError(self.add_object)
180
raise NotImplementedError(self.finish)
183
class BzrGitCache(object):
184
"""Caching backend."""
186
def __init__(self, idmap, content_cache, cache_updater_klass):
188
self.content_cache = content_cache
189
self._cache_updater_klass = cache_updater_klass
191
def get_updater(self, rev):
192
return self._cache_updater_klass(self, rev)
195
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
198
class DictCacheUpdater(CacheUpdater):
200
def __init__(self, cache, rev):
202
self.revid = rev.revision_id
203
self.parent_revids = rev.parent_ids
207
def add_object(self, obj, ie):
208
if obj.type_name == "commit":
211
type_data = (self.revid, self._commit.tree)
212
self.cache.idmap._by_revid[self.revid] = obj.id
213
elif obj.type_name in ("blob", "tree"):
214
if obj.type_name == "blob":
215
revision = ie.revision
217
revision = self.revid
218
type_data = (ie.file_id, revision)
219
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
222
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
225
if self._commit is None:
226
raise AssertionError("No commit object added")
230
class DictGitShaMap(GitShaMap):
237
def lookup_blob_id(self, fileid, revision):
238
return self._by_fileid[revision][fileid]
240
def lookup_git_sha(self, sha):
241
return self._by_sha[sha]
243
def lookup_tree_id(self, fileid, revision):
244
return self._base._by_fileid[revision][fileid]
246
def lookup_commit(self, revid):
247
return self._by_revid[revid]
250
for key, (type, type_data) in self._by_sha.iteritems():
255
return self._by_sha.iterkeys()
258
class SqliteCacheUpdater(CacheUpdater):
260
def __init__(self, cache, rev):
262
self.db = self.cache.idmap.db
263
self.revid = rev.revision_id
268
def add_object(self, obj, ie):
269
if obj.type_name == "commit":
272
elif obj.type_name == "tree":
273
self._trees.append((obj.id, ie.file_id, self.revid))
274
elif obj.type_name == "blob":
275
self._blobs.append((obj.id, ie.file_id, ie.revision))
280
if self._commit is None:
281
raise AssertionError("No commit object added")
283
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
286
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
289
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
290
(self._commit.id, self.revid, self._commit.tree))
294
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
297
class SqliteGitCacheFormat(BzrGitCacheFormat):
299
def get_format_string(self):
300
return 'bzr-git sha map version 1 using sqlite\n'
302
def open(self, transport):
304
basepath = transport.local_abspath(".")
305
except bzrlib.errors.NotLocalUrl:
306
basepath = get_cache_dir()
307
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
310
class SqliteGitShaMap(GitShaMap):
312
def __init__(self, path=None):
315
self.db = sqlite3.connect(":memory:")
317
if not mapdbs().has_key(path):
318
mapdbs()[path] = sqlite3.connect(path)
319
self.db = mapdbs()[path]
320
self.db.text_factory = str
321
self.db.executescript("""
322
create table if not exists commits(
323
sha1 text not null check(length(sha1) == 40),
325
tree_sha text not null check(length(tree_sha) == 40)
327
create index if not exists commit_sha1 on commits(sha1);
328
create unique index if not exists commit_revid on commits(revid);
329
create table if not exists blobs(
330
sha1 text not null check(length(sha1) == 40),
331
fileid text not null,
334
create index if not exists blobs_sha1 on blobs(sha1);
335
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
336
create table if not exists trees(
337
sha1 text unique not null check(length(sha1) == 40),
338
fileid text not null,
341
create unique index if not exists trees_sha1 on trees(sha1);
342
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
346
return "%s(%r)" % (self.__class__.__name__, self.path)
348
def lookup_commit(self, revid):
349
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
354
def commit_write_group(self):
357
def lookup_blob_id(self, fileid, revision):
358
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
361
raise KeyError(fileid)
363
def lookup_tree_id(self, fileid, revision):
364
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, self.revid)).fetchone()
367
raise KeyError(fileid)
369
def lookup_git_sha(self, sha):
370
"""Lookup a Git sha in the database.
372
:param sha: Git object sha
373
:return: (type, type_data) with type_data:
374
revision: revid, tree sha
376
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
378
return ("commit", row)
379
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
382
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
388
"""List the revision ids known."""
389
return (row for (row,) in self.db.execute("select revid from commits"))
392
"""List the SHA1s."""
393
for table in ("blobs", "commits", "trees"):
394
for (sha,) in self.db.execute("select sha1 from %s" % table):
398
class TdbCacheUpdater(CacheUpdater):
400
def __init__(self, cache, rev):
402
self.db = cache.idmap.db
403
self.revid = rev.revision_id
404
self.parent_revids = rev.parent_ids
408
def add_object(self, obj, ie):
409
sha = obj.sha().digest()
410
if obj.type_name == "commit":
411
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
412
type_data = (self.revid, obj.tree)
415
elif obj.type_name == "blob":
416
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
417
type_data = (ie.file_id, ie.revision)
418
elif obj.type_name == "tree":
419
type_data = (ie.file_id, self.revid)
422
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
425
if self._commit is None:
426
raise AssertionError("No commit object added")
430
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
432
class TdbGitCacheFormat(BzrGitCacheFormat):
434
def get_format_string(self):
435
return 'bzr-git sha map version 3 using tdb\n'
437
def open(self, transport):
439
basepath = transport.local_abspath(".")
440
except bzrlib.errors.NotLocalUrl:
441
basepath = get_cache_dir()
443
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
446
"Unable to open existing bzr-git cache because 'tdb' is not "
450
class TdbGitShaMap(GitShaMap):
451
"""SHA Map that uses a TDB database.
455
"git <sha1>" -> "<type> <type-data1> <type-data2>"
456
"commit revid" -> "<sha1> <tree-id>"
457
"tree fileid revid" -> "<sha1>"
458
"blob fileid revid" -> "<sha1>"
462
TDB_HASH_SIZE = 50000
464
def __init__(self, path=None):
470
if not mapdbs().has_key(path):
471
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
472
os.O_RDWR|os.O_CREAT)
473
self.db = mapdbs()[path]
475
if int(self.db["version"]) not in (2, 3):
476
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
477
self.db["version"], self.TDB_MAP_VERSION)
481
self.db["version"] = str(self.TDB_MAP_VERSION)
483
def start_write_group(self):
484
"""Start writing changes."""
485
self.db.transaction_start()
487
def commit_write_group(self):
488
"""Commit any pending changes."""
489
self.db.transaction_commit()
491
def abort_write_group(self):
492
"""Abort any pending changes."""
493
self.db.transaction_cancel()
496
return "%s(%r)" % (self.__class__.__name__, self.path)
498
def lookup_commit(self, revid):
499
return sha_to_hex(self.db["commit\0" + revid][:20])
501
def lookup_blob_id(self, fileid, revision):
502
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
504
def lookup_git_sha(self, sha):
505
"""Lookup a Git sha in the database.
507
:param sha: Git object sha
508
:return: (type, type_data) with type_data:
509
revision: revid, tree sha
512
sha = hex_to_sha(sha)
513
data = self.db["git\0" + sha].split("\0")
514
return (data[0], (data[1], data[2]))
516
def missing_revisions(self, revids):
519
if self.db.get("commit\0" + revid) is None:
524
"""List the revision ids known."""
525
for key in self.db.iterkeys():
526
if key.startswith("commit\0"):
530
"""List the SHA1s."""
531
for key in self.db.iterkeys():
532
if key.startswith("git\0"):
533
yield sha_to_hex(key[4:])
536
formats = registry.Registry()
537
formats.register(TdbGitCacheFormat().get_format_string(),
539
formats.register(SqliteGitCacheFormat().get_format_string(),
540
SqliteGitCacheFormat())
544
formats.register('default', SqliteGitCacheFormat())
546
formats.register('default', TdbGitCacheFormat())
549
def migrate_ancient_formats(repo_transport):
550
if repo_transport.has("git.tdb"):
551
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
552
repo_transport.rename("git.tdb", "git/idmap.tdb")
553
elif repo_transport.has("git.db"):
554
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
555
repo_transport.rename("git.db", "git/idmap.db")
558
def from_repository(repository):
559
repo_transport = getattr(repository, "_transport", None)
560
if repo_transport is not None:
561
# Migrate older cache formats
563
repo_transport.mkdir("git")
564
except bzrlib.errors.FileExists:
567
migrate_ancient_formats(repo_transport)
568
return BzrGitCacheFormat.from_repository(repository)