1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
31
from bzrlib.transport import (
38
from xdg.BaseDirectory import xdg_cache_home
40
from bzrlib.config import config_dir
41
ret = os.path.join(config_dir(), "git")
43
ret = os.path.join(xdg_cache_home, "bazaar", "git")
44
if not os.path.isdir(ret):
49
def get_remote_cache_transport():
50
return get_transport(get_cache_dir())
53
def check_pysqlite_version(sqlite3):
54
"""Check that sqlite library is compatible.
57
if (sqlite3.sqlite_version_info[0] < 3 or
58
(sqlite3.sqlite_version_info[0] == 3 and
59
sqlite3.sqlite_version_info[1] < 3)):
60
trace.warning('Needs at least sqlite 3.3.x')
61
raise bzrlib.errors.BzrError("incompatible sqlite library")
66
check_pysqlite_version(sqlite3)
67
except (ImportError, bzrlib.errors.BzrError), e:
68
from pysqlite2 import dbapi2 as sqlite3
69
check_pysqlite_version(sqlite3)
71
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
73
raise bzrlib.errors.BzrError("missing sqlite library")
76
_mapdbs = threading.local()
78
"""Get a cache for this thread's db connections."""
81
except AttributeError:
86
class GitShaMap(object):
87
"""Git<->Bzr revision id mapping database."""
89
def _add_entry(self, sha, type, type_data):
90
"""Add a new entry to the database.
92
raise NotImplementedError(self._add_entry)
94
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
96
"""Add multiple new entries to the database.
98
for (fileid, kind, hexsha, revision) in entries:
99
self._add_entry(hexsha, kind, (fileid, revision))
100
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
102
def lookup_git_sha(self, sha):
103
"""Lookup a Git sha in the database.
104
:param sha: Git object sha
105
:return: (type, type_data) with type_data:
106
revision: revid, tree sha
108
raise NotImplementedError(self.lookup_git_sha)
110
def lookup_blob_id(self, file_id, revision):
111
"""Retrieve a Git blob SHA by file id.
113
:param file_id: File id of the file/symlink
114
:param revision: revision in which the file was last changed.
116
raise NotImplementedError(self.lookup_blob_id)
118
def lookup_tree_id(self, file_id, revision):
119
"""Retrieve a Git tree SHA by file id.
121
raise NotImplementedError(self.lookup_tree_id)
124
"""List the revision ids known."""
125
raise NotImplementedError(self.revids)
127
def missing_revisions(self, revids):
128
"""Return set of all the revisions that are not present."""
129
present_revids = set(self.revids())
130
if not isinstance(revids, set):
132
return revids - present_revids
135
"""List the SHA1s."""
136
raise NotImplementedError(self.sha1s)
138
def start_write_group(self):
139
"""Start writing changes."""
141
def commit_write_group(self):
142
"""Commit any pending changes."""
144
def abort_write_group(self):
145
"""Abort any pending changes."""
148
class ContentCache(object):
149
"""Object that can cache Git objects."""
151
def __getitem__(self, sha):
152
"""Retrieve an item, by SHA."""
153
raise NotImplementedError(self.__getitem__)
156
"""Add an object to the cache."""
157
raise NotImplementedError(self.add)
160
class BzrGitCacheFormat(object):
162
def get_format_string(self):
163
raise NotImplementedError(self.get_format_string)
165
def open(self, transport):
166
raise NotImplementedError(self.open)
168
def initialize(self, transport):
169
transport.put_bytes('format', self.get_format_string())
172
def from_repository(self, repository):
173
repo_transport = getattr(repository, "_transport", None)
174
if repo_transport is not None:
176
repo_transport.mkdir('git')
177
except bzrlib.errors.FileExists:
179
transport = repo_transport.clone('git')
181
transport = get_remote_cache_transport()
183
format_name = transport.get_bytes('format')
184
format = formats.get(format_name)
185
except bzrlib.errors.NoSuchFile:
186
format = formats.get('default')
187
format.initialize(transport)
188
return format.open(transport)
191
class CacheUpdater(object):
193
def __init__(self, cache, rev, content_cache_types):
195
self.content_cache_types = content_cache_types
196
self.revid = rev.revision_id
197
self.parent_revids = rev.parent_ids
201
def add_object(self, obj, ie):
202
if obj.type_name == "commit":
205
elif obj.type_name in ("blob", "tree"):
206
if obj.type_name == "blob":
207
revision = ie.revision
209
revision = self.revid
210
self._entries.append((ie.file_id, obj.type_name, obj.id, revision))
213
if (self.cache.content_cache and
214
obj.type_name in self.content_cache_types):
215
self.cache.content_cache.add(obj)
218
if self._commit is None:
219
raise AssertionError("No commit object added")
220
self.cache.idmap.add_entries(self.revid, self.parent_revids,
221
self._commit.id, self._commit.tree, self._entries)
225
class BzrGitCache(object):
226
"""Caching backend."""
228
def __init__(self, idmap, content_cache):
230
self.content_cache = content_cache
232
def get_updater(self, rev, content_cache_types):
233
return CacheUpdater(self, rev, content_cache_types)
236
class DictGitShaMap(GitShaMap):
242
def _add_entry(self, sha, type, type_data):
243
self._by_sha[sha] = (type, type_data)
244
if type in ("blob", "tree"):
245
self._by_fileid.setdefault(type_data[1], {})[type_data[0]] = sha
247
def lookup_blob_id(self, fileid, revision):
248
return self._by_fileid[revision][fileid]
250
def lookup_git_sha(self, sha):
251
return self._by_sha[sha]
253
def lookup_tree_id(self, fileid, revision):
254
return self._base._by_fileid[revision][fileid]
257
for key, (type, type_data) in self._by_sha.iteritems():
262
return self._by_sha.iterkeys()
265
class SqliteGitCacheFormat(BzrGitCacheFormat):
267
def get_format_string(self):
268
return 'bzr-git sha map version 1 using sqlite\n'
270
def open(self, transport):
272
basepath = transport.local_abspath(".")
273
except bzrlib.errors.NotLocalUrl:
274
basepath = get_cache_dir()
276
SqliteGitShaMap(os.path.join(get_cache_dir(), "idmap.db")),
280
class SqliteGitShaMap(GitShaMap):
282
def __init__(self, path=None):
285
self.db = sqlite3.connect(":memory:")
287
if not mapdbs().has_key(path):
288
mapdbs()[path] = sqlite3.connect(path)
289
self.db = mapdbs()[path]
290
self.db.text_factory = str
291
self.db.executescript("""
292
create table if not exists commits(
293
sha1 text not null check(length(sha1) == 40),
295
tree_sha text not null check(length(tree_sha) == 40)
297
create index if not exists commit_sha1 on commits(sha1);
298
create unique index if not exists commit_revid on commits(revid);
299
create table if not exists blobs(
300
sha1 text not null check(length(sha1) == 40),
301
fileid text not null,
304
create index if not exists blobs_sha1 on blobs(sha1);
305
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
306
create table if not exists trees(
307
sha1 text unique not null check(length(sha1) == 40),
308
fileid text not null,
311
create unique index if not exists trees_sha1 on trees(sha1);
312
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
316
return "%s(%r)" % (self.__class__.__name__, self.path)
318
def lookup_commit(self, revid):
319
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
324
def commit_write_group(self):
327
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
331
for (fileid, kind, hexsha, revision) in entries:
333
trees.append((hexsha, fileid, revid))
335
blobs.append((hexsha, fileid, revision))
339
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
341
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
342
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
344
def _add_entry(self, sha, type, type_data):
345
"""Add a new entry to the database.
347
assert isinstance(type_data, tuple)
350
assert isinstance(sha, str), "type was %r" % sha
352
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
353
elif type in ("blob", "tree"):
354
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
356
raise AssertionError("Unknown type %s" % type)
358
def lookup_blob_id(self, fileid, revision):
359
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
362
raise KeyError(fileid)
364
def lookup_tree_id(self, fileid, revision):
365
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, self.revid)).fetchone()
368
raise KeyError(fileid)
370
def lookup_git_sha(self, sha):
371
"""Lookup a Git sha in the database.
373
:param sha: Git object sha
374
:return: (type, type_data) with type_data:
375
revision: revid, tree sha
377
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
379
return ("commit", row)
380
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
383
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
389
"""List the revision ids known."""
390
return (row for (row,) in self.db.execute("select revid from commits"))
393
"""List the SHA1s."""
394
for table in ("blobs", "commits", "trees"):
395
for (sha,) in self.db.execute("select sha1 from %s" % table):
400
TDB_HASH_SIZE = 50000
403
class TdbGitCacheFormat(BzrGitCacheFormat):
405
def get_format_string(self):
406
return 'bzr-git sha map version 3 using tdb\n'
408
def open(self, transport):
410
basepath = transport.local_abspath(".")
411
except bzrlib.errors.NotLocalUrl:
412
basepath = get_cache_dir()
415
TdbGitShaMap(os.path.join(get_cache_dir(), "idmap.tdb")),
419
"Unable to open existing bzr-git cache because 'tdb' is not "
423
class TdbGitShaMap(GitShaMap):
424
"""SHA Map that uses a TDB database.
428
"git <sha1>" -> "<type> <type-data1> <type-data2>"
429
"commit revid" -> "<sha1> <tree-id>"
430
"tree fileid revid" -> "<sha1>"
431
"blob fileid revid" -> "<sha1>"
434
def __init__(self, path=None):
440
if not mapdbs().has_key(path):
441
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
442
os.O_RDWR|os.O_CREAT)
443
self.db = mapdbs()[path]
445
if int(self.db["version"]) not in (2, 3):
446
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
447
self.db["version"], TDB_MAP_VERSION)
451
self.db["version"] = str(TDB_MAP_VERSION)
453
def start_write_group(self):
454
"""Start writing changes."""
455
self.db.transaction_start()
457
def commit_write_group(self):
458
"""Commit any pending changes."""
459
self.db.transaction_commit()
461
def abort_write_group(self):
462
"""Abort any pending changes."""
463
self.db.transaction_cancel()
466
return "%s(%r)" % (self.__class__.__name__, self.path)
468
def lookup_commit(self, revid):
469
return sha_to_hex(self.db["commit\0" + revid][:20])
471
def _add_entry(self, hexsha, type, type_data):
472
"""Add a new entry to the database.
477
sha = hex_to_sha(hexsha)
478
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
480
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
482
self.db["\0".join(("blob", type_data[0], type_data[1]))] = sha
484
def lookup_blob_id(self, fileid, revision):
485
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
487
def lookup_git_sha(self, sha):
488
"""Lookup a Git sha in the database.
490
:param sha: Git object sha
491
:return: (type, type_data) with type_data:
492
revision: revid, tree sha
495
sha = hex_to_sha(sha)
496
data = self.db["git\0" + sha].split("\0")
497
return (data[0], (data[1], data[2]))
499
def missing_revisions(self, revids):
502
if self.db.get("commit\0" + revid) is None:
507
"""List the revision ids known."""
508
for key in self.db.iterkeys():
509
if key.startswith("commit\0"):
513
"""List the SHA1s."""
514
for key in self.db.iterkeys():
515
if key.startswith("git\0"):
516
yield sha_to_hex(key[4:])
519
formats = registry.Registry()
520
formats.register(TdbGitCacheFormat().get_format_string(),
522
formats.register(SqliteGitCacheFormat().get_format_string(),
523
SqliteGitCacheFormat())
527
formats.register('default', SqliteGitCacheFormat())
529
formats.register('default', TdbGitCacheFormat())
532
def migrate_ancient_formats(repo_transport):
533
if repo_transport.has("git.tdb"):
534
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
535
repo_transport.rename("git.tdb", "git/idmap.tdb")
536
elif repo_transport.has("git.db"):
537
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
538
repo_transport.rename("git.db", "git/idmap.db")
541
def from_repository(repository):
542
repo_transport = getattr(repository, "_transport", None)
543
if repo_transport is not None:
544
# Migrate older cache formats
546
repo_transport.mkdir("git")
547
except bzrlib.errors.FileExists:
550
migrate_ancient_formats(repo_transport)
551
return BzrGitCacheFormat.from_repository(repository)