81
86
class GitShaMap(object):
82
87
"""Git<->Bzr revision id mapping database."""
84
def add_entry(self, sha, type, type_data):
85
"""Add a new entry to the database.
87
raise NotImplementedError(self.add_entry)
89
def add_entries(self, entries):
90
"""Add multiple new entries to the database.
95
def lookup_tree(self, fileid, revid):
96
"""Lookup the SHA of a git tree."""
97
raise NotImplementedError(self.lookup_tree)
99
def lookup_blob(self, fileid, revid):
100
"""Lookup a blob by the fileid it has in a bzr revision."""
101
raise NotImplementedError(self.lookup_blob)
103
89
def lookup_git_sha(self, sha):
104
90
"""Lookup a Git sha in the database.
106
91
:param sha: Git object sha
107
92
:return: (type, type_data) with type_data:
108
93
revision: revid, tree sha
110
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
112
110
def revids(self):
113
111
"""List the revision ids known."""
114
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
117
122
"""List the SHA1s."""
118
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
121
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
"""Return a single-line unique format string for this cache format."""
147
raise NotImplementedError(self.get_format_string)
149
def open(self, transport):
150
"""Open this format on a transport."""
151
raise NotImplementedError(self.open)
153
def initialize(self, transport):
154
transport.put_bytes('format', self.get_format_string())
157
def from_transport(self, transport):
158
"""Open a cache file present on a transport, or initialize one.
160
:param transport: Transport to use
161
:return: A BzrGitCache instance
164
format_name = transport.get_bytes('format')
165
format = formats.get(format_name)
166
except bzrlib.errors.NoSuchFile:
167
format = formats.get('default')
168
format.initialize(transport)
169
return format.open(transport)
172
def from_repository(cls, repository):
173
"""Open a cache file for a repository.
175
This will use the repository's transport to store the cache file, or
176
use the users global cache directory if the repository has no
177
transport associated with it.
179
:param repository: Repository to open the cache for
180
:return: A `BzrGitCache`
182
repo_transport = getattr(repository, "_transport", None)
183
if repo_transport is not None:
184
# Even if we don't write to this repo, we should be able
185
# to update its cache.
186
repo_transport = remove_readonly_transport_decorator(repo_transport)
188
repo_transport.mkdir('git')
189
except bzrlib.errors.FileExists:
191
transport = repo_transport.clone('git')
193
transport = get_remote_cache_transport()
194
return cls.from_transport(transport)
197
class CacheUpdater(object):
199
def add_object(self, obj, ie):
200
raise NotImplementedError(self.add_object)
203
raise NotImplementedError(self.finish)
206
class BzrGitCache(object):
207
"""Caching backend."""
209
def __init__(self, idmap, content_cache, cache_updater_klass):
211
self.content_cache = content_cache
212
self._cache_updater_klass = cache_updater_klass
214
def get_updater(self, rev):
215
return self._cache_updater_klass(self, rev)
218
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
221
class DictCacheUpdater(CacheUpdater):
223
def __init__(self, cache, rev):
225
self.revid = rev.revision_id
226
self.parent_revids = rev.parent_ids
230
def add_object(self, obj, ie):
231
if obj.type_name == "commit":
234
type_data = (self.revid, self._commit.tree)
235
self.cache.idmap._by_revid[self.revid] = obj.id
236
elif obj.type_name in ("blob", "tree"):
238
if obj.type_name == "blob":
239
revision = ie.revision
241
revision = self.revid
242
type_data = (ie.file_id, revision)
243
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] =\
247
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
250
if self._commit is None:
251
raise AssertionError("No commit object added")
124
255
class DictGitShaMap(GitShaMap):
126
257
def __init__(self):
129
def add_entry(self, sha, type, type_data):
130
self.dict[sha] = (type, type_data)
262
def lookup_blob_id(self, fileid, revision):
263
return self._by_fileid[revision][fileid]
132
265
def lookup_git_sha(self, sha):
133
return self.dict[sha]
135
def lookup_tree(self, fileid, revid):
136
for k, v in self.dict.iteritems():
137
if v == ("tree", (fileid, revid)):
139
raise KeyError((fileid, revid))
141
def lookup_blob(self, fileid, revid):
142
for k, v in self.dict.iteritems():
143
if v == ("blob", (fileid, revid)):
145
raise KeyError((fileid, revid))
266
return self._by_sha[sha]
268
def lookup_tree_id(self, fileid, revision):
269
return self._by_fileid[revision][fileid]
271
def lookup_commit(self, revid):
272
return self._by_revid[revid]
147
274
def revids(self):
148
for key, (type, type_data) in self.dict.iteritems():
275
for key, (type, type_data) in self._by_sha.iteritems():
149
276
if type == "commit":
150
277
yield type_data[0]
153
return self.dict.iterkeys()
280
return self._by_sha.iterkeys()
283
class SqliteCacheUpdater(CacheUpdater):
285
def __init__(self, cache, rev):
287
self.db = self.cache.idmap.db
288
self.revid = rev.revision_id
293
def add_object(self, obj, ie):
294
if obj.type_name == "commit":
297
elif obj.type_name == "tree":
299
self._trees.append((obj.id, ie.file_id, self.revid))
300
elif obj.type_name == "blob":
302
self._blobs.append((obj.id, ie.file_id, ie.revision))
307
if self._commit is None:
308
raise AssertionError("No commit object added")
310
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
313
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
316
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
317
(self._commit.id, self.revid, self._commit.tree))
321
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
324
class SqliteGitCacheFormat(BzrGitCacheFormat):
326
def get_format_string(self):
327
return 'bzr-git sha map version 1 using sqlite\n'
329
def open(self, transport):
331
basepath = transport.local_abspath(".")
332
except bzrlib.errors.NotLocalUrl:
333
basepath = get_cache_dir()
334
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
156
337
class SqliteGitShaMap(GitShaMap):
163
344
if not mapdbs().has_key(path):
164
345
mapdbs()[path] = sqlite3.connect(path)
165
self.db = mapdbs()[path]
346
self.db = mapdbs()[path]
347
self.db.text_factory = str
166
348
self.db.executescript("""
167
create table if not exists commits(sha1 text, revid text, tree_sha text);
349
create table if not exists commits(
350
sha1 text not null check(length(sha1) == 40),
352
tree_sha text not null check(length(tree_sha) == 40)
168
354
create index if not exists commit_sha1 on commits(sha1);
169
355
create unique index if not exists commit_revid on commits(revid);
170
create table if not exists blobs(sha1 text, fileid text, revid text);
356
create table if not exists blobs(
357
sha1 text not null check(length(sha1) == 40),
358
fileid text not null,
171
361
create index if not exists blobs_sha1 on blobs(sha1);
172
362
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
173
create table if not exists trees(sha1 text, fileid text, revid text);
174
create index if not exists trees_sha1 on trees(sha1);
363
create table if not exists trees(
364
sha1 text unique not null check(length(sha1) == 40),
365
fileid text not null,
368
create unique index if not exists trees_sha1 on trees(sha1);
175
369
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
179
def from_repository(cls, repository):
181
transport = getattr(repository, "_transport", None)
182
if transport is not None:
183
return cls(os.path.join(transport.local_abspath("."), "git.db"))
184
except bzrlib.errors.NotLocalUrl:
186
return cls(os.path.join(get_cache_dir(), "remote.db"))
373
return "%s(%r)" % (self.__class__.__name__, self.path)
188
375
def lookup_commit(self, revid):
189
376
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
190
377
if row is not None:
191
return row[0].encode("utf-8")
381
def commit_write_group(self):
197
def add_entries(self, entries):
200
for sha, type, type_data in entries:
201
assert isinstance(type_data[0], str)
202
assert isinstance(type_data[1], str)
203
entry = (sha.decode("utf-8"), type_data[0].decode("utf-8"),
204
type_data[1].decode("utf-8"))
212
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
214
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
217
def add_entry(self, sha, type, type_data):
218
"""Add a new entry to the database.
220
assert isinstance(type_data, tuple)
221
assert isinstance(sha, str), "type was %r" % sha
223
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
224
elif type in ("blob", "tree"):
225
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
227
raise AssertionError("Unknown type %s" % type)
229
def lookup_tree(self, fileid, revid):
230
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid,revid)).fetchone()
232
raise KeyError((fileid, revid))
233
return row[0].encode("utf-8")
235
def lookup_blob(self, fileid, revid):
236
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revid)).fetchone()
238
raise KeyError((fileid, revid))
239
return row[0].encode("utf-8")
384
def lookup_blob_id(self, fileid, revision):
385
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
388
raise KeyError(fileid)
390
def lookup_tree_id(self, fileid, revision):
391
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
394
raise KeyError(fileid)
241
396
def lookup_git_sha(self, sha):
242
397
"""Lookup a Git sha in the database.
245
400
:return: (type, type_data) with type_data:
246
401
revision: revid, tree sha
248
def format(type, row):
249
return (type, (row[0].encode("utf-8"), row[1].encode("utf-8")))
250
403
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
251
404
if row is not None:
252
return format("commit", row)
405
return ("commit", row)
253
406
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
254
407
if row is not None:
255
return format("blob", row)
256
409
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
257
410
if row is not None:
258
return format("tree", row)
259
412
raise KeyError(sha)
261
414
def revids(self):
262
415
"""List the revision ids known."""
263
for row in self.db.execute("select revid from commits").fetchall():
264
yield row[0].encode("utf-8")
416
return (row for (row,) in self.db.execute("select revid from commits"))
267
419
"""List the SHA1s."""
268
420
for table in ("blobs", "commits", "trees"):
269
for row in self.db.execute("select sha1 from %s" % table).fetchall():
270
yield row[0].encode("utf-8")
274
TDB_HASH_SIZE = 10000
421
for (sha,) in self.db.execute("select sha1 from %s" % table):
425
class TdbCacheUpdater(CacheUpdater):
427
def __init__(self, cache, rev):
429
self.db = cache.idmap.db
430
self.revid = rev.revision_id
431
self.parent_revids = rev.parent_ids
435
def add_object(self, obj, ie):
436
sha = obj.sha().digest()
437
if obj.type_name == "commit":
438
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
439
type_data = (self.revid, obj.tree)
442
elif obj.type_name == "blob":
445
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
446
type_data = (ie.file_id, ie.revision)
447
elif obj.type_name == "tree":
450
type_data = (ie.file_id, self.revid)
453
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
456
if self._commit is None:
457
raise AssertionError("No commit object added")
461
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
463
class TdbGitCacheFormat(BzrGitCacheFormat):
465
def get_format_string(self):
466
return 'bzr-git sha map version 3 using tdb\n'
468
def open(self, transport):
470
basepath = transport.local_abspath(".")
471
except bzrlib.errors.NotLocalUrl:
472
basepath = get_cache_dir()
474
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
477
"Unable to open existing bzr-git cache because 'tdb' is not "
277
481
class TdbGitShaMap(GitShaMap):
294
501
if not mapdbs().has_key(path):
295
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
502
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
296
503
os.O_RDWR|os.O_CREAT)
297
self.db = mapdbs()[path]
298
if not "version" in self.db:
299
self.db["version"] = str(TDB_MAP_VERSION)
301
if int(self.db["version"]) != TDB_MAP_VERSION:
504
self.db = mapdbs()[path]
506
if int(self.db["version"]) not in (2, 3):
302
507
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
303
self.db["version"], TDB_MAP_VERSION)
508
self.db["version"], self.TDB_MAP_VERSION)
305
self.db["version"] = str(TDB_MAP_VERSION)
308
def from_repository(cls, repository):
310
transport = getattr(repository, "_transport", None)
311
if transport is not None:
312
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
313
except bzrlib.errors.NotLocalUrl:
315
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
512
self.db["version"] = str(self.TDB_MAP_VERSION)
514
def start_write_group(self):
515
"""Start writing changes."""
516
self.db.transaction_start()
518
def commit_write_group(self):
519
"""Commit any pending changes."""
520
self.db.transaction_commit()
522
def abort_write_group(self):
523
"""Abort any pending changes."""
524
self.db.transaction_cancel()
527
return "%s(%r)" % (self.__class__.__name__, self.path)
317
529
def lookup_commit(self, revid):
318
530
return sha_to_hex(self.db["commit\0" + revid][:20])
323
def add_entry(self, sha, type, type_data):
324
"""Add a new entry to the database.
326
self.db["git\0" + hex_to_sha(sha)] = "\0".join((type, type_data[0], type_data[1]))
328
self.db["commit\0" + type_data[0]] = "\0".join((hex_to_sha(sha), type_data[1]))
330
self.db["\0".join((type, type_data[0], type_data[1]))] = hex_to_sha(sha)
332
def lookup_tree(self, fileid, revid):
333
return sha_to_hex(self.db["\0".join(("tree", fileid, revid))])
335
def lookup_blob(self, fileid, revid):
336
return sha_to_hex(self.db["\0".join(("blob", fileid, revid))])
532
def lookup_blob_id(self, fileid, revision):
533
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
338
535
def lookup_git_sha(self, sha):
339
536
"""Lookup a Git sha in the database.
356
562
for key in self.db.iterkeys():
357
563
if key.startswith("git\0"):
358
564
yield sha_to_hex(key[4:])
567
formats = registry.Registry()
568
formats.register(TdbGitCacheFormat().get_format_string(),
570
formats.register(SqliteGitCacheFormat().get_format_string(),
571
SqliteGitCacheFormat())
575
formats.register('default', SqliteGitCacheFormat())
577
formats.register('default', TdbGitCacheFormat())
580
def migrate_ancient_formats(repo_transport):
581
# Prefer migrating git.db over git.tdb, since the latter may not
582
# be openable on some platforms.
583
if repo_transport.has("git.db"):
584
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
585
repo_transport.rename("git.db", "git/idmap.db")
586
elif repo_transport.has("git.tdb"):
587
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
588
repo_transport.rename("git.tdb", "git/idmap.tdb")
591
def remove_readonly_transport_decorator(transport):
592
if transport.is_readonly():
593
return transport._decorated
597
def from_repository(repository):
598
"""Open a cache file for a repository.
600
If the repository is remote and there is no transport available from it
601
this will use a local file in the users cache directory
602
(typically ~/.cache/bazaar/git/)
604
:param repository: A repository object
606
repo_transport = getattr(repository, "_transport", None)
607
if repo_transport is not None:
608
# Migrate older cache formats
609
repo_transport = remove_readonly_transport_decorator(repo_transport)
611
repo_transport.mkdir("git")
612
except bzrlib.errors.FileExists:
615
migrate_ancient_formats(repo_transport)
616
return BzrGitCacheFormat.from_repository(repository)