1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
34
from xdg.BaseDirectory import xdg_cache_home
36
from bzrlib.config import config_dir
37
ret = os.path.join(config_dir(), "git")
39
ret = os.path.join(xdg_cache_home, "bazaar", "git")
40
if not os.path.isdir(ret):
45
def check_pysqlite_version(sqlite3):
46
"""Check that sqlite library is compatible.
49
if (sqlite3.sqlite_version_info[0] < 3 or
50
(sqlite3.sqlite_version_info[0] == 3 and
51
sqlite3.sqlite_version_info[1] < 3)):
52
trace.warning('Needs at least sqlite 3.3.x')
53
raise bzrlib.errors.BzrError("incompatible sqlite library")
58
check_pysqlite_version(sqlite3)
59
except (ImportError, bzrlib.errors.BzrError), e:
60
from pysqlite2 import dbapi2 as sqlite3
61
check_pysqlite_version(sqlite3)
63
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
65
raise bzrlib.errors.BzrError("missing sqlite library")
68
_mapdbs = threading.local()
70
"""Get a cache for this thread's db connections."""
73
except AttributeError:
78
class InventorySHAMap(object):
79
"""Maps inventory file ids to Git SHAs."""
82
class GitShaMap(object):
83
"""Git<->Bzr revision id mapping database."""
85
def _add_entry(self, sha, type, type_data):
86
"""Add a new entry to the database.
88
raise NotImplementedError(self._add_entry)
90
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
92
"""Add multiple new entries to the database.
94
for (fileid, kind, hexsha, revision) in entries:
95
self._add_entry(hexsha, kind, (fileid, revision))
96
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
98
def lookup_git_sha(self, sha):
99
"""Lookup a Git sha in the database.
100
:param sha: Git object sha
101
:return: (type, type_data) with type_data:
102
revision: revid, tree sha
104
raise NotImplementedError(self.lookup_git_sha)
106
def lookup_blob_id(self, file_id, revision):
107
"""Retrieve a Git blob SHA by file id.
109
:param file_id: File id of the file/symlink
110
:param revision: revision in which the file was last changed.
112
raise NotImplementedError(self.lookup_blob_id)
114
def lookup_tree_id(self, file_id, revision):
115
"""Retrieve a Git tree SHA by file id.
117
raise NotImplementedError(self.lookup_tree_id)
120
"""List the revision ids known."""
121
raise NotImplementedError(self.revids)
123
def missing_revisions(self, revids):
124
"""Return set of all the revisions that are not present."""
125
present_revids = set(self.revids())
126
if not isinstance(revids, set):
128
return revids - present_revids
131
"""List the SHA1s."""
132
raise NotImplementedError(self.sha1s)
134
def start_write_group(self):
135
"""Start writing changes."""
137
def commit_write_group(self):
138
"""Commit any pending changes."""
140
def abort_write_group(self):
141
"""Abort any pending changes."""
144
class DictGitShaMap(GitShaMap):
150
def _add_entry(self, sha, type, type_data):
151
self._by_sha[sha] = (type, type_data)
152
if type in ("blob", "tree"):
153
self._by_fileid.setdefault(type_data[1], {})[type_data[0]] = sha
155
def lookup_blob_id(self, fileid, revision):
156
return self._by_fileid[revision][fileid]
158
def lookup_git_sha(self, sha):
159
return self._by_sha[sha]
161
def lookup_tree_id(self, fileid, revision):
162
return self._base._by_fileid[revision][fileid]
165
for key, (type, type_data) in self._by_sha.iteritems():
170
return self._by_sha.iterkeys()
173
class SqliteGitShaMap(GitShaMap):
175
def __init__(self, path=None):
178
self.db = sqlite3.connect(":memory:")
180
if not mapdbs().has_key(path):
181
mapdbs()[path] = sqlite3.connect(path)
182
self.db = mapdbs()[path]
183
self.db.text_factory = str
184
self.db.executescript("""
185
create table if not exists commits(
186
sha1 text not null check(length(sha1) == 40),
188
tree_sha text not null check(length(tree_sha) == 40)
190
create index if not exists commit_sha1 on commits(sha1);
191
create unique index if not exists commit_revid on commits(revid);
192
create table if not exists blobs(
193
sha1 text not null check(length(sha1) == 40),
194
fileid text not null,
197
create index if not exists blobs_sha1 on blobs(sha1);
198
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
199
create table if not exists trees(
200
sha1 text unique not null check(length(sha1) == 40),
201
fileid text not null,
204
create unique index if not exists trees_sha1 on trees(sha1);
205
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
209
return "%s(%r)" % (self.__class__.__name__, self.path)
212
def from_repository(cls, repository):
214
transport = getattr(repository, "_transport", None)
215
if transport is not None:
216
return cls(os.path.join(transport.local_abspath("."), "git.db"))
217
except bzrlib.errors.NotLocalUrl:
219
return cls(os.path.join(get_cache_dir(), "remote.db"))
221
def lookup_commit(self, revid):
222
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
227
def commit_write_group(self):
230
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
234
for (fileid, kind, hexsha, revision) in entries:
238
trees.append((hexsha, fileid, revid))
240
blobs.append((hexsha, fileid, revision))
244
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
246
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
247
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
249
def _add_entry(self, sha, type, type_data):
250
"""Add a new entry to the database.
252
assert isinstance(type_data, tuple)
255
assert isinstance(sha, str), "type was %r" % sha
257
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
258
elif type in ("blob", "tree"):
259
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
261
raise AssertionError("Unknown type %s" % type)
263
def lookup_blob_id(self, fileid, revision):
264
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
267
raise KeyError(fileid)
269
def lookup_tree_id(self, fileid, revision):
270
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, self.revid)).fetchone()
273
raise KeyError(fileid)
275
def lookup_git_sha(self, sha):
276
"""Lookup a Git sha in the database.
278
:param sha: Git object sha
279
:return: (type, type_data) with type_data:
280
revision: revid, tree sha
282
def format(type, row):
283
return (type, (row[0], row[1]))
284
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
286
return format("commit", row)
287
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
289
return format("blob", row)
290
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
292
return format("tree", row)
296
"""List the revision ids known."""
297
return (row for (row,) in self.db.execute("select revid from commits"))
300
"""List the SHA1s."""
301
for table in ("blobs", "commits", "trees"):
303
for (row,) in self.db.execute("select sha1 from %s" % table):
308
TDB_HASH_SIZE = 50000
311
class TdbGitShaMap(GitShaMap):
312
"""SHA Map that uses a TDB database.
316
"git <sha1>" -> "<type> <type-data1> <type-data2>"
317
"commit revid" -> "<sha1> <tree-id>"
318
"tree fileid revid" -> "<sha1>"
319
"blob fileid revid" -> "<sha1>"
322
def __init__(self, path=None):
328
if not mapdbs().has_key(path):
329
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
330
os.O_RDWR|os.O_CREAT)
331
self.db = mapdbs()[path]
333
if int(self.db["version"]) not in (2, 3):
334
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
335
self.db["version"], TDB_MAP_VERSION)
339
self.db["version"] = str(TDB_MAP_VERSION)
341
def start_write_group(self):
342
"""Start writing changes."""
343
self.db.transaction_start()
345
def commit_write_group(self):
346
"""Commit any pending changes."""
347
self.db.transaction_commit()
349
def abort_write_group(self):
350
"""Abort any pending changes."""
351
self.db.transaction_cancel()
354
return "%s(%r)" % (self.__class__.__name__, self.path)
357
def from_repository(cls, repository):
359
transport = getattr(repository, "_transport", None)
360
if transport is not None:
361
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
362
except bzrlib.errors.NotLocalUrl:
364
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
366
def lookup_commit(self, revid):
367
return sha_to_hex(self.db["commit\0" + revid][:20])
369
def _add_entry(self, hexsha, type, type_data):
370
"""Add a new entry to the database.
375
sha = hex_to_sha(hexsha)
376
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
378
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
380
self.db["\0".join(("blob", type_data[0], type_data[1]))] = sha
382
def lookup_blob_id(self, fileid, revision):
383
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
385
def lookup_git_sha(self, sha):
386
"""Lookup a Git sha in the database.
388
:param sha: Git object sha
389
:return: (type, type_data) with type_data:
390
revision: revid, tree sha
393
sha = hex_to_sha(sha)
394
data = self.db["git\0" + sha].split("\0")
395
return (data[0], (data[1], data[2]))
397
def missing_revisions(self, revids):
400
if self.db.get("commit\0" + revid) is None:
405
"""List the revision ids known."""
406
for key in self.db.iterkeys():
407
if key.startswith("commit\0"):
411
"""List the SHA1s."""
412
for key in self.db.iterkeys():
413
if key.startswith("git\0"):
414
yield sha_to_hex(key[4:])
417
def from_repository(repository):
419
return TdbGitShaMap.from_repository(repository)
421
return SqliteGitShaMap.from_repository(repository)