17
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
21
from bzrlib.errors import NoSuchRevision
34
from xdg.BaseDirectory import xdg_cache_home
36
from bzrlib.config import config_dir
37
ret = os.path.join(config_dir(), "git")
39
ret = os.path.join(xdg_cache_home, "bazaar", "git")
40
if not os.path.isdir(ret):
26
45
def check_pysqlite_version(sqlite3):
27
46
"""Check that sqlite library is compatible.
30
if (sqlite3.sqlite_version_info[0] < 3 or
31
(sqlite3.sqlite_version_info[0] == 3 and
49
if (sqlite3.sqlite_version_info[0] < 3 or
50
(sqlite3.sqlite_version_info[0] == 3 and
32
51
sqlite3.sqlite_version_info[1] < 3)):
33
warning('Needs at least sqlite 3.3.x')
52
trace.warning('Needs at least sqlite 3.3.x')
34
53
raise bzrlib.errors.BzrError("incompatible sqlite library")
39
58
check_pysqlite_version(sqlite3)
40
except (ImportError, bzrlib.errors.BzrError), e:
59
except (ImportError, bzrlib.errors.BzrError), e:
41
60
from pysqlite2 import dbapi2 as sqlite3
42
61
check_pysqlite_version(sqlite3)
44
warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
63
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
46
65
raise bzrlib.errors.BzrError("missing sqlite library")
68
_mapdbs = threading.local()
70
"""Get a cache for this thread's db connections."""
73
except AttributeError:
78
class InventorySHAMap(object):
79
"""Maps inventory file ids to Git SHAs."""
49
82
class GitShaMap(object):
51
def __init__(self, transport):
52
self.transport = transport
53
self.db = sqlite3.connect(
54
os.path.join(self.transport.local_abspath("."), "git.db"))
83
"""Git<->Bzr revision id mapping database."""
85
def _add_entry(self, sha, type, type_data):
86
"""Add a new entry to the database.
88
raise NotImplementedError(self._add_entry)
90
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
92
"""Add multiple new entries to the database.
94
for (fileid, kind, hexsha, revision) in entries:
95
self._add_entry(hexsha, kind, (fileid, revision))
96
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
98
def lookup_git_sha(self, sha):
99
"""Lookup a Git sha in the database.
100
:param sha: Git object sha
101
:return: (type, type_data) with type_data:
102
revision: revid, tree sha
104
raise NotImplementedError(self.lookup_git_sha)
106
def lookup_blob_id(self, file_id, revision):
107
"""Retrieve a Git blob SHA by file id.
109
:param file_id: File id of the file/symlink
110
:param revision: revision in which the file was last changed.
112
raise NotImplementedError(self.lookup_blob_id)
114
def lookup_tree_id(self, file_id, revision):
115
"""Retrieve a Git tree SHA by file id.
117
raise NotImplementedError(self.lookup_tree_id)
120
"""List the revision ids known."""
121
raise NotImplementedError(self.revids)
123
def missing_revisions(self, revids):
124
"""Return set of all the revisions that are not present."""
125
present_revids = set(self.revids())
126
if not isinstance(revids, set):
128
return revids - present_revids
131
"""List the SHA1s."""
132
raise NotImplementedError(self.sha1s)
134
def start_write_group(self):
135
"""Start writing changes."""
137
def commit_write_group(self):
138
"""Commit any pending changes."""
140
def abort_write_group(self):
141
"""Abort any pending changes."""
144
class DictGitShaMap(GitShaMap):
150
def _add_entry(self, sha, type, type_data):
151
self._by_sha[sha] = (type, type_data)
152
if type in ("blob", "tree"):
153
self._by_fileid.setdefault(type_data[1], {})[type_data[0]] = sha
155
def lookup_blob_id(self, fileid, revision):
156
return self._by_fileid[revision][fileid]
158
def lookup_git_sha(self, sha):
159
return self._by_sha[sha]
161
def lookup_tree_id(self, fileid, revision):
162
return self._base._by_fileid[revision][fileid]
165
for key, (type, type_data) in self._by_sha.iteritems():
170
return self._by_sha.iterkeys()
173
class SqliteGitShaMap(GitShaMap):
175
def __init__(self, path=None):
178
self.db = sqlite3.connect(":memory:")
180
if not mapdbs().has_key(path):
181
mapdbs()[path] = sqlite3.connect(path)
182
self.db = mapdbs()[path]
183
self.db.text_factory = str
55
184
self.db.executescript("""
56
create table if not exists commits(sha1 text, revid text, tree_sha text);
185
create table if not exists commits(
186
sha1 text not null check(length(sha1) == 40),
188
tree_sha text not null check(length(tree_sha) == 40)
57
190
create index if not exists commit_sha1 on commits(sha1);
58
create table if not exists blobs(sha1 text, fileid text, revid text);
191
create unique index if not exists commit_revid on commits(revid);
192
create table if not exists blobs(
193
sha1 text not null check(length(sha1) == 40),
194
fileid text not null,
59
197
create index if not exists blobs_sha1 on blobs(sha1);
60
create table if not exists trees(sha1 text, fileid text, revid text);
61
create index if not exists trees_sha1 on trees(sha1);
198
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
199
create table if not exists trees(
200
sha1 text unique not null check(length(sha1) == 40),
201
fileid text not null,
204
create unique index if not exists trees_sha1 on trees(sha1);
205
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
64
def _parent_lookup(self, revid):
65
return self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()[0].encode("utf-8")
67
def add_entry(self, sha, type, type_data):
209
return "%s(%r)" % (self.__class__.__name__, self.path)
212
def from_repository(cls, repository):
214
transport = getattr(repository, "_transport", None)
215
if transport is not None:
216
return cls(os.path.join(transport.local_abspath("."), "git.db"))
217
except bzrlib.errors.NotLocalUrl:
219
return cls(os.path.join(get_cache_dir(), "remote.db"))
221
def lookup_commit(self, revid):
222
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
227
def commit_write_group(self):
230
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
234
for (fileid, kind, hexsha, revision) in entries:
238
trees.append((hexsha, fileid, revid))
240
blobs.append((hexsha, fileid, revision))
244
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
246
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
247
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
249
def _add_entry(self, sha, type, type_data):
68
250
"""Add a new entry to the database.
70
252
assert isinstance(type_data, tuple)
71
255
assert isinstance(sha, str), "type was %r" % sha
72
256
if type == "commit":
73
257
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
75
self.db.execute("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
77
self.db.execute("replace into trees (sha1, fileid, revid) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
258
elif type in ("blob", "tree"):
259
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
79
261
raise AssertionError("Unknown type %s" % type)
263
def lookup_blob_id(self, fileid, revision):
264
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
267
raise KeyError(fileid)
269
def lookup_tree_id(self, fileid, revision):
270
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, self.revid)).fetchone()
273
raise KeyError(fileid)
81
275
def lookup_git_sha(self, sha):
82
276
"""Lookup a Git sha in the database.
85
279
:return: (type, type_data) with type_data:
86
280
revision: revid, tree sha
282
def format(type, row):
283
return (type, (row[0], row[1]))
88
284
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
89
285
if row is not None:
90
return ("commit", row)
286
return format("commit", row)
91
287
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
92
288
if row is not None:
289
return format("blob", row)
94
290
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
95
291
if row is not None:
292
return format("tree", row)
97
293
raise KeyError(sha)
100
for row in self.db.execute("select revid from commits").fetchall():
296
"""List the revision ids known."""
297
return (row for (row,) in self.db.execute("select revid from commits"))
300
"""List the SHA1s."""
301
for table in ("blobs", "commits", "trees"):
303
for (row,) in self.db.execute("select sha1 from %s" % table):
308
TDB_HASH_SIZE = 50000
311
class TdbGitShaMap(GitShaMap):
312
"""SHA Map that uses a TDB database.
316
"git <sha1>" -> "<type> <type-data1> <type-data2>"
317
"commit revid" -> "<sha1> <tree-id>"
318
"tree fileid revid" -> "<sha1>"
319
"blob fileid revid" -> "<sha1>"
322
def __init__(self, path=None):
328
if not mapdbs().has_key(path):
329
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
330
os.O_RDWR|os.O_CREAT)
331
self.db = mapdbs()[path]
333
if int(self.db["version"]) not in (2, 3):
334
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
335
self.db["version"], TDB_MAP_VERSION)
339
self.db["version"] = str(TDB_MAP_VERSION)
341
def start_write_group(self):
342
"""Start writing changes."""
343
self.db.transaction_start()
345
def commit_write_group(self):
346
"""Commit any pending changes."""
347
self.db.transaction_commit()
349
def abort_write_group(self):
350
"""Abort any pending changes."""
351
self.db.transaction_cancel()
354
return "%s(%r)" % (self.__class__.__name__, self.path)
357
def from_repository(cls, repository):
359
transport = getattr(repository, "_transport", None)
360
if transport is not None:
361
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
362
except bzrlib.errors.NotLocalUrl:
364
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
366
def lookup_commit(self, revid):
367
return sha_to_hex(self.db["commit\0" + revid][:20])
369
def _add_entry(self, hexsha, type, type_data):
370
"""Add a new entry to the database.
375
sha = hex_to_sha(hexsha)
376
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
378
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
380
self.db["\0".join(("blob", type_data[0], type_data[1]))] = sha
382
def lookup_blob_id(self, fileid, revision):
383
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
385
def lookup_git_sha(self, sha):
386
"""Lookup a Git sha in the database.
388
:param sha: Git object sha
389
:return: (type, type_data) with type_data:
390
revision: revid, tree sha
393
sha = hex_to_sha(sha)
394
data = self.db["git\0" + sha].split("\0")
395
return (data[0], (data[1], data[2]))
397
def missing_revisions(self, revids):
400
if self.db.get("commit\0" + revid) is None:
405
"""List the revision ids known."""
406
for key in self.db.iterkeys():
407
if key.startswith("commit\0"):
411
"""List the SHA1s."""
412
for key in self.db.iterkeys():
413
if key.startswith("git\0"):
414
yield sha_to_hex(key[4:])
417
def from_repository(repository):
419
return TdbGitShaMap.from_repository(repository)
421
return SqliteGitShaMap.from_repository(repository)