1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
31
from bzrlib.transport import (
38
from xdg.BaseDirectory import xdg_cache_home
40
from bzrlib.config import config_dir
41
ret = os.path.join(config_dir(), "git")
43
ret = os.path.join(xdg_cache_home, "bazaar", "git")
44
if not os.path.isdir(ret):
49
def get_remote_cache_transport():
50
return get_transport(get_cache_dir())
53
def check_pysqlite_version(sqlite3):
54
"""Check that sqlite library is compatible.
57
if (sqlite3.sqlite_version_info[0] < 3 or
58
(sqlite3.sqlite_version_info[0] == 3 and
59
sqlite3.sqlite_version_info[1] < 3)):
60
trace.warning('Needs at least sqlite 3.3.x')
61
raise bzrlib.errors.BzrError("incompatible sqlite library")
66
check_pysqlite_version(sqlite3)
67
except (ImportError, bzrlib.errors.BzrError), e:
68
from pysqlite2 import dbapi2 as sqlite3
69
check_pysqlite_version(sqlite3)
71
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
73
raise bzrlib.errors.BzrError("missing sqlite library")
76
_mapdbs = threading.local()
78
"""Get a cache for this thread's db connections."""
81
except AttributeError:
86
class GitShaMap(object):
87
"""Git<->Bzr revision id mapping database."""
89
def lookup_git_sha(self, sha):
90
"""Lookup a Git sha in the database.
91
:param sha: Git object sha
92
:return: (type, type_data) with type_data:
93
revision: revid, tree sha
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
111
"""List the revision ids known."""
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
122
"""List the SHA1s."""
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
"""Return a single-line unique format string for this cache format."""
147
raise NotImplementedError(self.get_format_string)
149
def open(self, transport):
150
"""Open this format on a transport."""
151
raise NotImplementedError(self.open)
153
def initialize(self, transport):
154
transport.put_bytes('format', self.get_format_string())
157
def from_transport(self, transport):
158
"""Open a cache file present on a transport, or initialize one.
160
:param transport: Transport to use
161
:return: A BzrGitCache instance
164
format_name = transport.get_bytes('format')
165
format = formats.get(format_name)
166
except bzrlib.errors.NoSuchFile:
167
format = formats.get('default')
168
format.initialize(transport)
169
return format.open(transport)
172
def from_repository(cls, repository):
173
"""Open a cache file for a repository.
175
This will use the repository's transport to store the cache file, or
176
use the users global cache directory if the repository has no
177
transport associated with it.
179
:param repository: Repository to open the cache for
180
:return: A `BzrGitCache`
182
repo_transport = getattr(repository, "_transport", None)
183
if repo_transport is not None:
184
# Even if we don't write to this repo, we should be able
185
# to update its cache.
186
repo_transport = remove_readonly_transport_decorator(repo_transport)
188
repo_transport.mkdir('git')
189
except bzrlib.errors.FileExists:
191
transport = repo_transport.clone('git')
193
transport = get_remote_cache_transport()
194
return cls.from_transport(transport)
197
class CacheUpdater(object):
199
def add_object(self, obj, ie):
200
raise NotImplementedError(self.add_object)
203
raise NotImplementedError(self.finish)
206
class BzrGitCache(object):
207
"""Caching backend."""
209
def __init__(self, idmap, content_cache, cache_updater_klass):
211
self.content_cache = content_cache
212
self._cache_updater_klass = cache_updater_klass
214
def get_updater(self, rev):
215
return self._cache_updater_klass(self, rev)
218
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
221
class DictCacheUpdater(CacheUpdater):
223
def __init__(self, cache, rev):
225
self.revid = rev.revision_id
226
self.parent_revids = rev.parent_ids
230
def add_object(self, obj, ie):
231
if obj.type_name == "commit":
234
type_data = (self.revid, self._commit.tree)
235
self.cache.idmap._by_revid[self.revid] = obj.id
236
elif obj.type_name in ("blob", "tree"):
238
if obj.type_name == "blob":
239
revision = ie.revision
241
revision = self.revid
242
type_data = (ie.file_id, revision)
243
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] =\
247
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
250
if self._commit is None:
251
raise AssertionError("No commit object added")
255
class DictGitShaMap(GitShaMap):
262
def lookup_blob_id(self, fileid, revision):
263
return self._by_fileid[revision][fileid]
265
def lookup_git_sha(self, sha):
266
return self._by_sha[sha]
268
def lookup_tree_id(self, fileid, revision):
269
return self._by_fileid[revision][fileid]
271
def lookup_commit(self, revid):
272
return self._by_revid[revid]
275
for key, (type, type_data) in self._by_sha.iteritems():
280
return self._by_sha.iterkeys()
283
class SqliteCacheUpdater(CacheUpdater):
285
def __init__(self, cache, rev):
287
self.db = self.cache.idmap.db
288
self.revid = rev.revision_id
293
def add_object(self, obj, ie):
294
if obj.type_name == "commit":
297
elif obj.type_name == "tree":
299
self._trees.append((obj.id, ie.file_id, self.revid))
300
elif obj.type_name == "blob":
302
self._blobs.append((obj.id, ie.file_id, ie.revision))
307
if self._commit is None:
308
raise AssertionError("No commit object added")
310
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
313
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
316
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
317
(self._commit.id, self.revid, self._commit.tree))
321
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
324
class SqliteGitCacheFormat(BzrGitCacheFormat):
326
def get_format_string(self):
327
return 'bzr-git sha map version 1 using sqlite\n'
329
def open(self, transport):
331
basepath = transport.local_abspath(".")
332
except bzrlib.errors.NotLocalUrl:
333
basepath = get_cache_dir()
334
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
337
class SqliteGitShaMap(GitShaMap):
339
def __init__(self, path=None):
342
self.db = sqlite3.connect(":memory:")
344
if not mapdbs().has_key(path):
345
mapdbs()[path] = sqlite3.connect(path)
346
self.db = mapdbs()[path]
347
self.db.text_factory = str
348
self.db.executescript("""
349
create table if not exists commits(
350
sha1 text not null check(length(sha1) == 40),
352
tree_sha text not null check(length(tree_sha) == 40)
354
create index if not exists commit_sha1 on commits(sha1);
355
create unique index if not exists commit_revid on commits(revid);
356
create table if not exists blobs(
357
sha1 text not null check(length(sha1) == 40),
358
fileid text not null,
361
create index if not exists blobs_sha1 on blobs(sha1);
362
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
363
create table if not exists trees(
364
sha1 text unique not null check(length(sha1) == 40),
365
fileid text not null,
368
create unique index if not exists trees_sha1 on trees(sha1);
369
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
373
return "%s(%r)" % (self.__class__.__name__, self.path)
375
def lookup_commit(self, revid):
376
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
381
def commit_write_group(self):
384
def lookup_blob_id(self, fileid, revision):
385
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
388
raise KeyError(fileid)
390
def lookup_tree_id(self, fileid, revision):
391
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
394
raise KeyError(fileid)
396
def lookup_git_sha(self, sha):
397
"""Lookup a Git sha in the database.
399
:param sha: Git object sha
400
:return: (type, type_data) with type_data:
401
revision: revid, tree sha
403
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
405
return ("commit", row)
406
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
409
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
415
"""List the revision ids known."""
416
return (row for (row,) in self.db.execute("select revid from commits"))
419
"""List the SHA1s."""
420
for table in ("blobs", "commits", "trees"):
421
for (sha,) in self.db.execute("select sha1 from %s" % table):
425
class TdbCacheUpdater(CacheUpdater):
427
def __init__(self, cache, rev):
429
self.db = cache.idmap.db
430
self.revid = rev.revision_id
431
self.parent_revids = rev.parent_ids
435
def add_object(self, obj, ie):
436
sha = obj.sha().digest()
437
if obj.type_name == "commit":
438
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
439
type_data = (self.revid, obj.tree)
442
elif obj.type_name == "blob":
445
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
446
type_data = (ie.file_id, ie.revision)
447
elif obj.type_name == "tree":
450
type_data = (ie.file_id, self.revid)
453
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
456
if self._commit is None:
457
raise AssertionError("No commit object added")
461
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
463
class TdbGitCacheFormat(BzrGitCacheFormat):
465
def get_format_string(self):
466
return 'bzr-git sha map version 3 using tdb\n'
468
def open(self, transport):
470
basepath = transport.local_abspath(".")
471
except bzrlib.errors.NotLocalUrl:
472
basepath = get_cache_dir()
474
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
477
"Unable to open existing bzr-git cache because 'tdb' is not "
481
class TdbGitShaMap(GitShaMap):
482
"""SHA Map that uses a TDB database.
486
"git <sha1>" -> "<type> <type-data1> <type-data2>"
487
"commit revid" -> "<sha1> <tree-id>"
488
"tree fileid revid" -> "<sha1>"
489
"blob fileid revid" -> "<sha1>"
493
TDB_HASH_SIZE = 50000
495
def __init__(self, path=None):
501
if not mapdbs().has_key(path):
502
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
503
os.O_RDWR|os.O_CREAT)
504
self.db = mapdbs()[path]
506
if int(self.db["version"]) not in (2, 3):
507
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
508
self.db["version"], self.TDB_MAP_VERSION)
512
self.db["version"] = str(self.TDB_MAP_VERSION)
514
def start_write_group(self):
515
"""Start writing changes."""
516
self.db.transaction_start()
518
def commit_write_group(self):
519
"""Commit any pending changes."""
520
self.db.transaction_commit()
522
def abort_write_group(self):
523
"""Abort any pending changes."""
524
self.db.transaction_cancel()
527
return "%s(%r)" % (self.__class__.__name__, self.path)
529
def lookup_commit(self, revid):
530
return sha_to_hex(self.db["commit\0" + revid][:20])
532
def lookup_blob_id(self, fileid, revision):
533
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
535
def lookup_git_sha(self, sha):
536
"""Lookup a Git sha in the database.
538
:param sha: Git object sha
539
:return: (type, type_data) with type_data:
540
revision: revid, tree sha
543
sha = hex_to_sha(sha)
544
data = self.db["git\0" + sha].split("\0")
545
return (data[0], (data[1], data[2]))
547
def missing_revisions(self, revids):
550
if self.db.get("commit\0" + revid) is None:
555
"""List the revision ids known."""
556
for key in self.db.iterkeys():
557
if key.startswith("commit\0"):
561
"""List the SHA1s."""
562
for key in self.db.iterkeys():
563
if key.startswith("git\0"):
564
yield sha_to_hex(key[4:])
567
formats = registry.Registry()
568
formats.register(TdbGitCacheFormat().get_format_string(),
570
formats.register(SqliteGitCacheFormat().get_format_string(),
571
SqliteGitCacheFormat())
575
formats.register('default', SqliteGitCacheFormat())
577
formats.register('default', TdbGitCacheFormat())
580
def migrate_ancient_formats(repo_transport):
581
# Prefer migrating git.db over git.tdb, since the latter may not
582
# be openable on some platforms.
583
if repo_transport.has("git.db"):
584
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
585
repo_transport.rename("git.db", "git/idmap.db")
586
elif repo_transport.has("git.tdb"):
587
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
588
repo_transport.rename("git.tdb", "git/idmap.tdb")
591
def remove_readonly_transport_decorator(transport):
592
if transport.is_readonly():
593
return transport._decorated
597
def from_repository(repository):
598
"""Open a cache file for a repository.
600
If the repository is remote and there is no transport available from it
601
this will use a local file in the users cache directory
602
(typically ~/.cache/bazaar/git/)
604
:param repository: A repository object
606
repo_transport = getattr(repository, "_transport", None)
607
if repo_transport is not None:
608
# Migrate older cache formats
609
repo_transport = remove_readonly_transport_decorator(repo_transport)
611
repo_transport.mkdir("git")
612
except bzrlib.errors.FileExists:
615
migrate_ancient_formats(repo_transport)
616
return BzrGitCacheFormat.from_repository(repository)