78
86
class GitShaMap(object):
79
87
"""Git<->Bzr revision id mapping database."""
81
def add_entry(self, sha, type, type_data):
82
"""Add a new entry to the database.
84
raise NotImplementedError(self.add_entry)
86
def add_entries(self, entries):
87
"""Add multiple new entries to the database.
92
def lookup_tree(self, fileid, revid):
93
"""Lookup the SHA of a git tree."""
94
raise NotImplementedError(self.lookup_tree)
96
def lookup_blob(self, fileid, revid):
97
"""Lookup a blob by the fileid it has in a bzr revision."""
98
raise NotImplementedError(self.lookup_blob)
100
89
def lookup_git_sha(self, sha):
101
90
"""Lookup a Git sha in the database.
103
91
:param sha: Git object sha
104
92
:return: (type, type_data) with type_data:
105
93
revision: revid, tree sha
107
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
109
110
def revids(self):
110
111
"""List the revision ids known."""
111
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
114
122
"""List the SHA1s."""
115
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
118
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
"""Return a single-line unique format string for this cache format."""
147
raise NotImplementedError(self.get_format_string)
149
def open(self, transport):
150
"""Open this format on a transport."""
151
raise NotImplementedError(self.open)
153
def initialize(self, transport):
154
transport.put_bytes('format', self.get_format_string())
157
def from_transport(self, transport):
158
"""Open a cache file present on a transport, or initialize one.
160
:param transport: Transport to use
161
:return: A BzrGitCache instance
164
format_name = transport.get_bytes('format')
165
format = formats.get(format_name)
166
except bzrlib.errors.NoSuchFile:
167
format = formats.get('default')
168
format.initialize(transport)
169
return format.open(transport)
172
def from_repository(cls, repository):
173
"""Open a cache file for a repository.
175
This will use the repository's transport to store the cache file, or
176
use the users global cache directory if the repository has no
177
transport associated with it.
179
:param repository: Repository to open the cache for
180
:return: A `BzrGitCache`
182
repo_transport = getattr(repository, "_transport", None)
183
if repo_transport is not None:
184
# Even if we don't write to this repo, we should be able
185
# to update its cache.
186
repo_transport = remove_readonly_transport_decorator(repo_transport)
188
repo_transport.mkdir('git')
189
except bzrlib.errors.FileExists:
191
transport = repo_transport.clone('git')
193
transport = get_remote_cache_transport()
194
return cls.from_transport(transport)
197
class CacheUpdater(object):
199
def add_object(self, obj, ie):
200
raise NotImplementedError(self.add_object)
203
raise NotImplementedError(self.finish)
206
class BzrGitCache(object):
207
"""Caching backend."""
209
def __init__(self, idmap, content_cache, cache_updater_klass):
211
self.content_cache = content_cache
212
self._cache_updater_klass = cache_updater_klass
214
def get_updater(self, rev):
215
return self._cache_updater_klass(self, rev)
218
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
221
class DictCacheUpdater(CacheUpdater):
223
def __init__(self, cache, rev):
225
self.revid = rev.revision_id
226
self.parent_revids = rev.parent_ids
230
def add_object(self, obj, ie):
231
if obj.type_name == "commit":
234
type_data = (self.revid, self._commit.tree)
235
self.cache.idmap._by_revid[self.revid] = obj.id
236
elif obj.type_name in ("blob", "tree"):
238
if obj.type_name == "blob":
239
revision = ie.revision
241
revision = self.revid
242
type_data = (ie.file_id, revision)
243
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] =\
247
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
250
if self._commit is None:
251
raise AssertionError("No commit object added")
121
255
class DictGitShaMap(GitShaMap):
123
257
def __init__(self):
126
def add_entry(self, sha, type, type_data):
127
self.dict[sha] = (type, type_data)
262
def lookup_blob_id(self, fileid, revision):
263
return self._by_fileid[revision][fileid]
129
265
def lookup_git_sha(self, sha):
130
return self.dict[sha]
132
def lookup_tree(self, fileid, revid):
133
for k, v in self.dict.iteritems():
134
if v == ("tree", (fileid, revid)):
136
raise KeyError((fileid, revid))
138
def lookup_blob(self, fileid, revid):
139
for k, v in self.dict.iteritems():
140
if v == ("blob", (fileid, revid)):
142
raise KeyError((fileid, revid))
266
return self._by_sha[sha]
268
def lookup_tree_id(self, fileid, revision):
269
return self._by_fileid[revision][fileid]
271
def lookup_commit(self, revid):
272
return self._by_revid[revid]
144
274
def revids(self):
145
for key, (type, type_data) in self.dict.iteritems():
275
for key, (type, type_data) in self._by_sha.iteritems():
146
276
if type == "commit":
147
277
yield type_data[0]
150
return self.dict.iterkeys()
280
return self._by_sha.iterkeys()
283
class SqliteCacheUpdater(CacheUpdater):
285
def __init__(self, cache, rev):
287
self.db = self.cache.idmap.db
288
self.revid = rev.revision_id
293
def add_object(self, obj, ie):
294
if obj.type_name == "commit":
297
elif obj.type_name == "tree":
299
self._trees.append((obj.id, ie.file_id, self.revid))
300
elif obj.type_name == "blob":
302
self._blobs.append((obj.id, ie.file_id, ie.revision))
307
if self._commit is None:
308
raise AssertionError("No commit object added")
310
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
313
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
316
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
317
(self._commit.id, self.revid, self._commit.tree))
321
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
324
class SqliteGitCacheFormat(BzrGitCacheFormat):
326
def get_format_string(self):
327
return 'bzr-git sha map version 1 using sqlite\n'
329
def open(self, transport):
331
basepath = transport.local_abspath(".")
332
except bzrlib.errors.NotLocalUrl:
333
basepath = get_cache_dir()
334
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
153
337
class SqliteGitShaMap(GitShaMap):
160
344
if not mapdbs().has_key(path):
161
345
mapdbs()[path] = sqlite3.connect(path)
162
self.db = mapdbs()[path]
346
self.db = mapdbs()[path]
347
self.db.text_factory = str
163
348
self.db.executescript("""
164
create table if not exists commits(sha1 text, revid text, tree_sha text);
349
create table if not exists commits(
350
sha1 text not null check(length(sha1) == 40),
352
tree_sha text not null check(length(tree_sha) == 40)
165
354
create index if not exists commit_sha1 on commits(sha1);
166
355
create unique index if not exists commit_revid on commits(revid);
167
create table if not exists blobs(sha1 text, fileid text, revid text);
356
create table if not exists blobs(
357
sha1 text not null check(length(sha1) == 40),
358
fileid text not null,
168
361
create index if not exists blobs_sha1 on blobs(sha1);
169
362
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
170
create table if not exists trees(sha1 text, fileid text, revid text);
171
create index if not exists trees_sha1 on trees(sha1);
363
create table if not exists trees(
364
sha1 text unique not null check(length(sha1) == 40),
365
fileid text not null,
368
create unique index if not exists trees_sha1 on trees(sha1);
172
369
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
176
def from_repository(cls, repository):
178
transport = getattr(repository, "_transport", None)
179
if transport is not None:
180
return cls(os.path.join(transport.local_abspath("."), "git.db"))
181
except bzrlib.errors.NotLocalUrl:
183
return cls(os.path.join(get_cache_dir(), "remote.db"))
373
return "%s(%r)" % (self.__class__.__name__, self.path)
185
375
def lookup_commit(self, revid):
186
376
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
187
377
if row is not None:
188
return row[0].encode("utf-8")
381
def commit_write_group(self):
194
def add_entries(self, entries):
197
for sha, type, type_data in entries:
198
assert isinstance(type_data[0], str)
199
assert isinstance(type_data[1], str)
200
entry = (sha.decode("utf-8"), type_data[0].decode("utf-8"),
201
type_data[1].decode("utf-8"))
209
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
211
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
214
def add_entry(self, sha, type, type_data):
215
"""Add a new entry to the database.
217
assert isinstance(type_data, tuple)
218
assert isinstance(sha, str), "type was %r" % sha
220
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
221
elif type in ("blob", "tree"):
222
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
224
raise AssertionError("Unknown type %s" % type)
226
def lookup_tree(self, fileid, revid):
227
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid,revid)).fetchone()
229
raise KeyError((fileid, revid))
230
return row[0].encode("utf-8")
232
def lookup_blob(self, fileid, revid):
233
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revid)).fetchone()
235
raise KeyError((fileid, revid))
236
return row[0].encode("utf-8")
384
def lookup_blob_id(self, fileid, revision):
385
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
388
raise KeyError(fileid)
390
def lookup_tree_id(self, fileid, revision):
391
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
394
raise KeyError(fileid)
238
396
def lookup_git_sha(self, sha):
239
397
"""Lookup a Git sha in the database.
242
400
:return: (type, type_data) with type_data:
243
401
revision: revid, tree sha
245
def format(type, row):
246
return (type, (row[0].encode("utf-8"), row[1].encode("utf-8")))
247
403
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
248
404
if row is not None:
249
return format("commit", row)
405
return ("commit", row)
250
406
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
251
407
if row is not None:
252
return format("blob", row)
253
409
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
254
410
if row is not None:
255
return format("tree", row)
256
412
raise KeyError(sha)
258
414
def revids(self):
259
415
"""List the revision ids known."""
260
for row in self.db.execute("select revid from commits").fetchall():
261
yield row[0].encode("utf-8")
416
return (row for (row,) in self.db.execute("select revid from commits"))
264
419
"""List the SHA1s."""
265
420
for table in ("blobs", "commits", "trees"):
266
for row in self.db.execute("select sha1 from %s" % table).fetchall():
267
yield row[0].encode("utf-8")
271
TDB_HASH_SIZE = 50000
421
for (sha,) in self.db.execute("select sha1 from %s" % table):
425
class TdbCacheUpdater(CacheUpdater):
427
def __init__(self, cache, rev):
429
self.db = cache.idmap.db
430
self.revid = rev.revision_id
431
self.parent_revids = rev.parent_ids
435
def add_object(self, obj, ie):
436
sha = obj.sha().digest()
437
if obj.type_name == "commit":
438
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
439
type_data = (self.revid, obj.tree)
442
elif obj.type_name == "blob":
445
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
446
type_data = (ie.file_id, ie.revision)
447
elif obj.type_name == "tree":
450
type_data = (ie.file_id, self.revid)
453
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
456
if self._commit is None:
457
raise AssertionError("No commit object added")
461
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
463
class TdbGitCacheFormat(BzrGitCacheFormat):
465
def get_format_string(self):
466
return 'bzr-git sha map version 3 using tdb\n'
468
def open(self, transport):
470
basepath = transport.local_abspath(".")
471
except bzrlib.errors.NotLocalUrl:
472
basepath = get_cache_dir()
474
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
477
"Unable to open existing bzr-git cache because 'tdb' is not "
274
481
class TdbGitShaMap(GitShaMap):
291
501
if not mapdbs().has_key(path):
292
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
502
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
293
503
os.O_RDWR|os.O_CREAT)
294
self.db = mapdbs()[path]
295
if not "version" in self.db:
296
self.db["version"] = str(TDB_MAP_VERSION)
298
if int(self.db["version"]) != TDB_MAP_VERSION:
504
self.db = mapdbs()[path]
506
if int(self.db["version"]) not in (2, 3):
299
507
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
300
self.db["version"], TDB_MAP_VERSION)
508
self.db["version"], self.TDB_MAP_VERSION)
302
self.db["version"] = str(TDB_MAP_VERSION)
305
def from_repository(cls, repository):
307
transport = getattr(repository, "_transport", None)
308
if transport is not None:
309
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
310
except bzrlib.errors.NotLocalUrl:
312
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
512
self.db["version"] = str(self.TDB_MAP_VERSION)
514
def start_write_group(self):
515
"""Start writing changes."""
516
self.db.transaction_start()
518
def commit_write_group(self):
519
"""Commit any pending changes."""
520
self.db.transaction_commit()
522
def abort_write_group(self):
523
"""Abort any pending changes."""
524
self.db.transaction_cancel()
527
return "%s(%r)" % (self.__class__.__name__, self.path)
314
529
def lookup_commit(self, revid):
315
530
return sha_to_hex(self.db["commit\0" + revid][:20])
320
def add_entry(self, hexsha, type, type_data):
321
"""Add a new entry to the database.
326
sha = hex_to_sha(hexsha)
327
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
329
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
331
self.db["\0".join((type, type_data[0], type_data[1]))] = sha
333
def lookup_tree(self, fileid, revid):
334
sha = self.db["\0".join(("tree", fileid, revid))]
338
return sha_to_hex(sha)
340
def lookup_blob(self, fileid, revid):
341
return sha_to_hex(self.db["\0".join(("blob", fileid, revid))])
532
def lookup_blob_id(self, fileid, revision):
533
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
343
535
def lookup_git_sha(self, sha):
344
536
"""Lookup a Git sha in the database.
363
562
for key in self.db.iterkeys():
364
563
if key.startswith("git\0"):
365
564
yield sha_to_hex(key[4:])
567
formats = registry.Registry()
568
formats.register(TdbGitCacheFormat().get_format_string(),
570
formats.register(SqliteGitCacheFormat().get_format_string(),
571
SqliteGitCacheFormat())
575
formats.register('default', SqliteGitCacheFormat())
577
formats.register('default', TdbGitCacheFormat())
580
def migrate_ancient_formats(repo_transport):
581
# Prefer migrating git.db over git.tdb, since the latter may not
582
# be openable on some platforms.
583
if repo_transport.has("git.db"):
584
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
585
repo_transport.rename("git.db", "git/idmap.db")
586
elif repo_transport.has("git.tdb"):
587
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
588
repo_transport.rename("git.tdb", "git/idmap.tdb")
591
def remove_readonly_transport_decorator(transport):
592
if transport.is_readonly():
593
return transport._decorated
597
def from_repository(repository):
598
"""Open a cache file for a repository.
600
If the repository is remote and there is no transport available from it
601
this will use a local file in the users cache directory
602
(typically ~/.cache/bazaar/git/)
604
:param repository: A repository object
606
repo_transport = getattr(repository, "_transport", None)
607
if repo_transport is not None:
608
# Migrate older cache formats
609
repo_transport = remove_readonly_transport_decorator(repo_transport)
611
repo_transport.mkdir("git")
612
except bzrlib.errors.FileExists:
615
migrate_ancient_formats(repo_transport)
616
return BzrGitCacheFormat.from_repository(repository)