1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
31
from bzrlib.transport import (
38
from xdg.BaseDirectory import xdg_cache_home
40
from bzrlib.config import config_dir
41
ret = os.path.join(config_dir(), "git")
43
ret = os.path.join(xdg_cache_home, "bazaar", "git")
44
if not os.path.isdir(ret):
49
def get_remote_cache_transport():
50
return get_transport(get_cache_dir())
53
def check_pysqlite_version(sqlite3):
54
"""Check that sqlite library is compatible.
57
if (sqlite3.sqlite_version_info[0] < 3 or
58
(sqlite3.sqlite_version_info[0] == 3 and
59
sqlite3.sqlite_version_info[1] < 3)):
60
trace.warning('Needs at least sqlite 3.3.x')
61
raise bzrlib.errors.BzrError("incompatible sqlite library")
66
check_pysqlite_version(sqlite3)
67
except (ImportError, bzrlib.errors.BzrError), e:
68
from pysqlite2 import dbapi2 as sqlite3
69
check_pysqlite_version(sqlite3)
71
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
73
raise bzrlib.errors.BzrError("missing sqlite library")
76
_mapdbs = threading.local()
78
"""Get a cache for this thread's db connections."""
81
except AttributeError:
86
class GitShaMap(object):
87
"""Git<->Bzr revision id mapping database."""
89
def lookup_git_sha(self, sha):
90
"""Lookup a Git sha in the database.
91
:param sha: Git object sha
92
:return: (type, type_data) with type_data:
93
revision: revid, tree sha
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
111
"""List the revision ids known."""
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
122
"""List the SHA1s."""
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
"""Return a single-line unique format string for this cache format."""
147
raise NotImplementedError(self.get_format_string)
149
def open(self, transport):
150
"""Open this format on a transport."""
151
raise NotImplementedError(self.open)
153
def initialize(self, transport):
154
transport.put_bytes('format', self.get_format_string())
157
def from_transport(self, transport):
158
"""Open a cache file present on a transport, or initialize one.
160
:param transport: Transport to use
161
:return: A BzrGitCache instance
164
format_name = transport.get_bytes('format')
165
format = formats.get(format_name)
166
except bzrlib.errors.NoSuchFile:
167
format = formats.get('default')
168
format.initialize(transport)
169
return format.open(transport)
172
def from_repository(cls, repository):
173
"""Open a cache file for a repository.
175
This will use the repository's transport to store the cache file, or
176
use the users global cache directory if the repository has no
177
transport associated with it.
179
:param repository: Repository to open the cache for
180
:return: A `BzrGitCache`
182
repo_transport = getattr(repository, "_transport", None)
183
if repo_transport is not None:
184
# Even if we don't write to this repo, we should be able
185
# to update its cache.
186
repo_transport = remove_readonly_transport_decorator(repo_transport)
188
repo_transport.mkdir('git')
189
except bzrlib.errors.FileExists:
191
transport = repo_transport.clone('git')
193
transport = get_remote_cache_transport()
194
return cls.from_transport(transport)
197
class CacheUpdater(object):
199
def add_object(self, obj, ie):
200
raise NotImplementedError(self.add_object)
203
raise NotImplementedError(self.finish)
206
class BzrGitCache(object):
207
"""Caching backend."""
209
def __init__(self, idmap, content_cache, cache_updater_klass):
211
self.content_cache = content_cache
212
self._cache_updater_klass = cache_updater_klass
214
def get_updater(self, rev):
215
return self._cache_updater_klass(self, rev)
218
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
221
class DictCacheUpdater(CacheUpdater):
223
def __init__(self, cache, rev):
225
self.revid = rev.revision_id
226
self.parent_revids = rev.parent_ids
230
def add_object(self, obj, ie):
231
if obj.type_name == "commit":
234
type_data = (self.revid, self._commit.tree)
235
self.cache.idmap._by_revid[self.revid] = obj.id
236
elif obj.type_name in ("blob", "tree"):
237
if obj.type_name == "blob":
238
revision = ie.revision
240
revision = self.revid
241
type_data = (ie.file_id, revision)
242
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
245
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
248
if self._commit is None:
249
raise AssertionError("No commit object added")
253
class DictGitShaMap(GitShaMap):
260
def lookup_blob_id(self, fileid, revision):
261
return self._by_fileid[revision][fileid]
263
def lookup_git_sha(self, sha):
264
return self._by_sha[sha]
266
def lookup_tree_id(self, fileid, revision):
267
return self._by_fileid[revision][fileid]
269
def lookup_commit(self, revid):
270
return self._by_revid[revid]
273
for key, (type, type_data) in self._by_sha.iteritems():
278
return self._by_sha.iterkeys()
281
class SqliteCacheUpdater(CacheUpdater):
283
def __init__(self, cache, rev):
285
self.db = self.cache.idmap.db
286
self.revid = rev.revision_id
291
def add_object(self, obj, ie):
292
if obj.type_name == "commit":
295
elif obj.type_name == "tree":
296
self._trees.append((obj.id, ie.file_id, self.revid))
297
elif obj.type_name == "blob":
298
self._blobs.append((obj.id, ie.file_id, ie.revision))
303
if self._commit is None:
304
raise AssertionError("No commit object added")
306
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
309
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
312
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
313
(self._commit.id, self.revid, self._commit.tree))
317
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
320
class SqliteGitCacheFormat(BzrGitCacheFormat):
322
def get_format_string(self):
323
return 'bzr-git sha map version 1 using sqlite\n'
325
def open(self, transport):
327
basepath = transport.local_abspath(".")
328
except bzrlib.errors.NotLocalUrl:
329
basepath = get_cache_dir()
330
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
333
class SqliteGitShaMap(GitShaMap):
335
def __init__(self, path=None):
338
self.db = sqlite3.connect(":memory:")
340
if not mapdbs().has_key(path):
341
mapdbs()[path] = sqlite3.connect(path)
342
self.db = mapdbs()[path]
343
self.db.text_factory = str
344
self.db.executescript("""
345
create table if not exists commits(
346
sha1 text not null check(length(sha1) == 40),
348
tree_sha text not null check(length(tree_sha) == 40)
350
create index if not exists commit_sha1 on commits(sha1);
351
create unique index if not exists commit_revid on commits(revid);
352
create table if not exists blobs(
353
sha1 text not null check(length(sha1) == 40),
354
fileid text not null,
357
create index if not exists blobs_sha1 on blobs(sha1);
358
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
359
create table if not exists trees(
360
sha1 text unique not null check(length(sha1) == 40),
361
fileid text not null,
364
create unique index if not exists trees_sha1 on trees(sha1);
365
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
369
return "%s(%r)" % (self.__class__.__name__, self.path)
371
def lookup_commit(self, revid):
372
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
377
def commit_write_group(self):
380
def lookup_blob_id(self, fileid, revision):
381
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
384
raise KeyError(fileid)
386
def lookup_tree_id(self, fileid, revision):
387
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
390
raise KeyError(fileid)
392
def lookup_git_sha(self, sha):
393
"""Lookup a Git sha in the database.
395
:param sha: Git object sha
396
:return: (type, type_data) with type_data:
397
revision: revid, tree sha
399
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
401
return ("commit", row)
402
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
405
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
411
"""List the revision ids known."""
412
return (row for (row,) in self.db.execute("select revid from commits"))
415
"""List the SHA1s."""
416
for table in ("blobs", "commits", "trees"):
417
for (sha,) in self.db.execute("select sha1 from %s" % table):
421
class TdbCacheUpdater(CacheUpdater):
423
def __init__(self, cache, rev):
425
self.db = cache.idmap.db
426
self.revid = rev.revision_id
427
self.parent_revids = rev.parent_ids
431
def add_object(self, obj, ie):
432
sha = obj.sha().digest()
433
if obj.type_name == "commit":
434
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
435
type_data = (self.revid, obj.tree)
438
elif obj.type_name == "blob":
439
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
440
type_data = (ie.file_id, ie.revision)
441
elif obj.type_name == "tree":
442
type_data = (ie.file_id, self.revid)
445
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
448
if self._commit is None:
449
raise AssertionError("No commit object added")
453
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
455
class TdbGitCacheFormat(BzrGitCacheFormat):
457
def get_format_string(self):
458
return 'bzr-git sha map version 3 using tdb\n'
460
def open(self, transport):
462
basepath = transport.local_abspath(".")
463
except bzrlib.errors.NotLocalUrl:
464
basepath = get_cache_dir()
466
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
469
"Unable to open existing bzr-git cache because 'tdb' is not "
473
class TdbGitShaMap(GitShaMap):
474
"""SHA Map that uses a TDB database.
478
"git <sha1>" -> "<type> <type-data1> <type-data2>"
479
"commit revid" -> "<sha1> <tree-id>"
480
"tree fileid revid" -> "<sha1>"
481
"blob fileid revid" -> "<sha1>"
485
TDB_HASH_SIZE = 50000
487
def __init__(self, path=None):
493
if not mapdbs().has_key(path):
494
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
495
os.O_RDWR|os.O_CREAT)
496
self.db = mapdbs()[path]
498
if int(self.db["version"]) not in (2, 3):
499
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
500
self.db["version"], self.TDB_MAP_VERSION)
504
self.db["version"] = str(self.TDB_MAP_VERSION)
506
def start_write_group(self):
507
"""Start writing changes."""
508
self.db.transaction_start()
510
def commit_write_group(self):
511
"""Commit any pending changes."""
512
self.db.transaction_commit()
514
def abort_write_group(self):
515
"""Abort any pending changes."""
516
self.db.transaction_cancel()
519
return "%s(%r)" % (self.__class__.__name__, self.path)
521
def lookup_commit(self, revid):
522
return sha_to_hex(self.db["commit\0" + revid][:20])
524
def lookup_blob_id(self, fileid, revision):
525
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
527
def lookup_git_sha(self, sha):
528
"""Lookup a Git sha in the database.
530
:param sha: Git object sha
531
:return: (type, type_data) with type_data:
532
revision: revid, tree sha
535
sha = hex_to_sha(sha)
536
data = self.db["git\0" + sha].split("\0")
537
return (data[0], (data[1], data[2]))
539
def missing_revisions(self, revids):
542
if self.db.get("commit\0" + revid) is None:
547
"""List the revision ids known."""
548
for key in self.db.iterkeys():
549
if key.startswith("commit\0"):
553
"""List the SHA1s."""
554
for key in self.db.iterkeys():
555
if key.startswith("git\0"):
556
yield sha_to_hex(key[4:])
559
formats = registry.Registry()
560
formats.register(TdbGitCacheFormat().get_format_string(),
562
formats.register(SqliteGitCacheFormat().get_format_string(),
563
SqliteGitCacheFormat())
567
formats.register('default', SqliteGitCacheFormat())
569
formats.register('default', TdbGitCacheFormat())
572
def migrate_ancient_formats(repo_transport):
573
# Prefer migrating git.db over git.tdb, since the latter may not
574
# be openable on some platforms.
575
if repo_transport.has("git.db"):
576
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
577
repo_transport.rename("git.db", "git/idmap.db")
578
elif repo_transport.has("git.tdb"):
579
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
580
repo_transport.rename("git.tdb", "git/idmap.tdb")
583
def remove_readonly_transport_decorator(transport):
584
if transport.is_readonly():
585
return transport._decorated
589
def from_repository(repository):
590
"""Open a cache file for a repository.
592
If the repository is remote and there is no transport available from it
593
this will use a local file in the users cache directory
594
(typically ~/.cache/bazaar/git/)
596
:param repository: A repository object
598
repo_transport = getattr(repository, "_transport", None)
599
if repo_transport is not None:
600
# Migrate older cache formats
601
repo_transport = remove_readonly_transport_decorator(repo_transport)
603
repo_transport.mkdir("git")
604
except bzrlib.errors.FileExists:
607
migrate_ancient_formats(repo_transport)
608
return BzrGitCacheFormat.from_repository(repository)