81
86
class GitShaMap(object):
82
87
"""Git<->Bzr revision id mapping database."""
84
def add_entry(self, sha, type, type_data):
85
"""Add a new entry to the database.
87
raise NotImplementedError(self.add_entry)
89
def add_entries(self, entries):
90
"""Add multiple new entries to the database.
95
def lookup_tree(self, fileid, revid):
96
"""Lookup the SHA of a git tree."""
97
raise NotImplementedError(self.lookup_tree)
99
def lookup_blob(self, fileid, revid):
100
"""Lookup a blob by the fileid it has in a bzr revision."""
101
raise NotImplementedError(self.lookup_blob)
103
89
def lookup_git_sha(self, sha):
104
90
"""Lookup a Git sha in the database.
106
91
:param sha: Git object sha
107
92
:return: (type, type_data) with type_data:
108
93
revision: revid, tree sha
110
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
112
110
def revids(self):
113
111
"""List the revision ids known."""
114
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
117
122
"""List the SHA1s."""
118
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
121
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
"""Return a single-line unique format string for this cache format."""
147
raise NotImplementedError(self.get_format_string)
149
def open(self, transport):
150
"""Open this format on a transport."""
151
raise NotImplementedError(self.open)
153
def initialize(self, transport):
154
transport.put_bytes('format', self.get_format_string())
157
def from_transport(self, transport):
158
"""Open a cache file present on a transport, or initialize one.
160
:param transport: Transport to use
161
:return: A BzrGitCache instance
164
format_name = transport.get_bytes('format')
165
format = formats.get(format_name)
166
except bzrlib.errors.NoSuchFile:
167
format = formats.get('default')
168
format.initialize(transport)
169
return format.open(transport)
172
def from_repository(cls, repository):
173
"""Open a cache file for a repository.
175
This will use the repository's transport to store the cache file, or
176
use the users global cache directory if the repository has no
177
transport associated with it.
179
:param repository: Repository to open the cache for
180
:return: A `BzrGitCache`
182
repo_transport = getattr(repository, "_transport", None)
183
if repo_transport is not None:
184
# Even if we don't write to this repo, we should be able
185
# to update its cache.
186
repo_transport = remove_readonly_transport_decorator(repo_transport)
188
repo_transport.mkdir('git')
189
except bzrlib.errors.FileExists:
191
transport = repo_transport.clone('git')
193
transport = get_remote_cache_transport()
194
return cls.from_transport(transport)
197
class CacheUpdater(object):
199
def add_object(self, obj, ie):
200
raise NotImplementedError(self.add_object)
203
raise NotImplementedError(self.finish)
206
class BzrGitCache(object):
207
"""Caching backend."""
209
def __init__(self, idmap, content_cache, cache_updater_klass):
211
self.content_cache = content_cache
212
self._cache_updater_klass = cache_updater_klass
214
def get_updater(self, rev):
215
return self._cache_updater_klass(self, rev)
218
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
221
class DictCacheUpdater(CacheUpdater):
223
def __init__(self, cache, rev):
225
self.revid = rev.revision_id
226
self.parent_revids = rev.parent_ids
230
def add_object(self, obj, ie):
231
if obj.type_name == "commit":
234
type_data = (self.revid, self._commit.tree)
235
self.cache.idmap._by_revid[self.revid] = obj.id
236
elif obj.type_name in ("blob", "tree"):
237
if obj.type_name == "blob":
238
revision = ie.revision
240
revision = self.revid
241
type_data = (ie.file_id, revision)
242
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
245
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
248
if self._commit is None:
249
raise AssertionError("No commit object added")
124
253
class DictGitShaMap(GitShaMap):
126
255
def __init__(self):
129
def add_entry(self, sha, type, type_data):
130
self.dict[sha] = (type, type_data)
260
def lookup_blob_id(self, fileid, revision):
261
return self._by_fileid[revision][fileid]
132
263
def lookup_git_sha(self, sha):
133
return self.dict[sha]
135
def lookup_tree(self, fileid, revid):
136
for k, v in self.dict.iteritems():
137
if v == ("tree", (fileid, revid)):
139
raise KeyError((fileid, revid))
141
def lookup_blob(self, fileid, revid):
142
for k, v in self.dict.iteritems():
143
if v == ("blob", (fileid, revid)):
145
raise KeyError((fileid, revid))
264
return self._by_sha[sha]
266
def lookup_tree_id(self, fileid, revision):
267
return self._by_fileid[revision][fileid]
269
def lookup_commit(self, revid):
270
return self._by_revid[revid]
147
272
def revids(self):
148
for key, (type, type_data) in self.dict.iteritems():
273
for key, (type, type_data) in self._by_sha.iteritems():
149
274
if type == "commit":
150
275
yield type_data[0]
153
return self.dict.iterkeys()
278
return self._by_sha.iterkeys()
281
class SqliteCacheUpdater(CacheUpdater):
283
def __init__(self, cache, rev):
285
self.db = self.cache.idmap.db
286
self.revid = rev.revision_id
291
def add_object(self, obj, ie):
292
if obj.type_name == "commit":
295
elif obj.type_name == "tree":
296
self._trees.append((obj.id, ie.file_id, self.revid))
297
elif obj.type_name == "blob":
298
self._blobs.append((obj.id, ie.file_id, ie.revision))
303
if self._commit is None:
304
raise AssertionError("No commit object added")
306
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
309
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
312
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
313
(self._commit.id, self.revid, self._commit.tree))
317
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
320
class SqliteGitCacheFormat(BzrGitCacheFormat):
322
def get_format_string(self):
323
return 'bzr-git sha map version 1 using sqlite\n'
325
def open(self, transport):
327
basepath = transport.local_abspath(".")
328
except bzrlib.errors.NotLocalUrl:
329
basepath = get_cache_dir()
330
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
156
333
class SqliteGitShaMap(GitShaMap):
163
340
if not mapdbs().has_key(path):
164
341
mapdbs()[path] = sqlite3.connect(path)
165
self.db = mapdbs()[path]
342
self.db = mapdbs()[path]
343
self.db.text_factory = str
166
344
self.db.executescript("""
167
create table if not exists commits(sha1 text, revid text, tree_sha text);
345
create table if not exists commits(
346
sha1 text not null check(length(sha1) == 40),
348
tree_sha text not null check(length(tree_sha) == 40)
168
350
create index if not exists commit_sha1 on commits(sha1);
169
351
create unique index if not exists commit_revid on commits(revid);
170
create table if not exists blobs(sha1 text, fileid text, revid text);
352
create table if not exists blobs(
353
sha1 text not null check(length(sha1) == 40),
354
fileid text not null,
171
357
create index if not exists blobs_sha1 on blobs(sha1);
172
358
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
173
create table if not exists trees(sha1 text, fileid text, revid text);
174
create index if not exists trees_sha1 on trees(sha1);
359
create table if not exists trees(
360
sha1 text unique not null check(length(sha1) == 40),
361
fileid text not null,
364
create unique index if not exists trees_sha1 on trees(sha1);
175
365
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
179
def from_repository(cls, repository):
181
transport = getattr(repository, "_transport", None)
182
if transport is not None:
183
return cls(os.path.join(transport.local_abspath("."), "git.db"))
184
except bzrlib.errors.NotLocalUrl:
186
return cls(os.path.join(get_cache_dir(), "remote.db"))
369
return "%s(%r)" % (self.__class__.__name__, self.path)
188
371
def lookup_commit(self, revid):
189
372
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
190
373
if row is not None:
191
return row[0].encode("utf-8")
377
def commit_write_group(self):
197
def add_entries(self, entries):
200
for sha, type, type_data in entries:
201
assert isinstance(type_data[0], str)
202
assert isinstance(type_data[1], str)
203
entry = (sha.decode("utf-8"), type_data[0].decode("utf-8"),
204
type_data[1].decode("utf-8"))
212
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
214
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
217
def add_entry(self, sha, type, type_data):
218
"""Add a new entry to the database.
220
assert isinstance(type_data, tuple)
221
assert isinstance(sha, str), "type was %r" % sha
223
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
224
elif type in ("blob", "tree"):
225
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
227
raise AssertionError("Unknown type %s" % type)
229
def lookup_tree(self, fileid, revid):
230
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid,revid)).fetchone()
232
raise KeyError((fileid, revid))
233
return row[0].encode("utf-8")
235
def lookup_blob(self, fileid, revid):
236
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revid)).fetchone()
238
raise KeyError((fileid, revid))
239
return row[0].encode("utf-8")
380
def lookup_blob_id(self, fileid, revision):
381
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
384
raise KeyError(fileid)
386
def lookup_tree_id(self, fileid, revision):
387
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
390
raise KeyError(fileid)
241
392
def lookup_git_sha(self, sha):
242
393
"""Lookup a Git sha in the database.
245
396
:return: (type, type_data) with type_data:
246
397
revision: revid, tree sha
248
def format(type, row):
249
return (type, (row[0].encode("utf-8"), row[1].encode("utf-8")))
250
399
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
251
400
if row is not None:
252
return format("commit", row)
401
return ("commit", row)
253
402
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
254
403
if row is not None:
255
return format("blob", row)
256
405
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
257
406
if row is not None:
258
return format("tree", row)
259
408
raise KeyError(sha)
261
410
def revids(self):
262
411
"""List the revision ids known."""
263
for row in self.db.execute("select revid from commits").fetchall():
264
yield row[0].encode("utf-8")
412
return (row for (row,) in self.db.execute("select revid from commits"))
267
415
"""List the SHA1s."""
268
416
for table in ("blobs", "commits", "trees"):
269
for row in self.db.execute("select sha1 from %s" % table).fetchall():
270
yield row[0].encode("utf-8")
274
TDB_HASH_SIZE = 10000
417
for (sha,) in self.db.execute("select sha1 from %s" % table):
421
class TdbCacheUpdater(CacheUpdater):
423
def __init__(self, cache, rev):
425
self.db = cache.idmap.db
426
self.revid = rev.revision_id
427
self.parent_revids = rev.parent_ids
431
def add_object(self, obj, ie):
432
sha = obj.sha().digest()
433
if obj.type_name == "commit":
434
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
435
type_data = (self.revid, obj.tree)
438
elif obj.type_name == "blob":
439
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
440
type_data = (ie.file_id, ie.revision)
441
elif obj.type_name == "tree":
442
type_data = (ie.file_id, self.revid)
445
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
448
if self._commit is None:
449
raise AssertionError("No commit object added")
453
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
455
class TdbGitCacheFormat(BzrGitCacheFormat):
457
def get_format_string(self):
458
return 'bzr-git sha map version 3 using tdb\n'
460
def open(self, transport):
462
basepath = transport.local_abspath(".")
463
except bzrlib.errors.NotLocalUrl:
464
basepath = get_cache_dir()
466
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
469
"Unable to open existing bzr-git cache because 'tdb' is not "
277
473
class TdbGitShaMap(GitShaMap):
294
493
if not mapdbs().has_key(path):
295
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
494
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
296
495
os.O_RDWR|os.O_CREAT)
297
self.db = mapdbs()[path]
298
if not "version" in self.db:
299
self.db["version"] = str(TDB_MAP_VERSION)
301
if int(self.db["version"]) != TDB_MAP_VERSION:
496
self.db = mapdbs()[path]
498
if int(self.db["version"]) not in (2, 3):
302
499
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
303
self.db["version"], TDB_MAP_VERSION)
500
self.db["version"], self.TDB_MAP_VERSION)
305
self.db["version"] = str(TDB_MAP_VERSION)
308
def from_repository(cls, repository):
310
transport = getattr(repository, "_transport", None)
311
if transport is not None:
312
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
313
except bzrlib.errors.NotLocalUrl:
315
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
504
self.db["version"] = str(self.TDB_MAP_VERSION)
506
def start_write_group(self):
507
"""Start writing changes."""
508
self.db.transaction_start()
510
def commit_write_group(self):
511
"""Commit any pending changes."""
512
self.db.transaction_commit()
514
def abort_write_group(self):
515
"""Abort any pending changes."""
516
self.db.transaction_cancel()
519
return "%s(%r)" % (self.__class__.__name__, self.path)
317
521
def lookup_commit(self, revid):
318
522
return sha_to_hex(self.db["commit\0" + revid][:20])
323
def add_entry(self, sha, type, type_data):
324
"""Add a new entry to the database.
326
self.db["git\0" + hex_to_sha(sha)] = "\0".join((type, type_data[0], type_data[1]))
328
self.db["commit\0" + type_data[0]] = "\0".join((hex_to_sha(sha), type_data[1]))
330
self.db["\0".join((type, type_data[0], type_data[1]))] = hex_to_sha(sha)
332
def lookup_tree(self, fileid, revid):
333
return sha_to_hex(self.db["\0".join(("tree", fileid, revid))])
335
def lookup_blob(self, fileid, revid):
336
return sha_to_hex(self.db["\0".join(("blob", fileid, revid))])
524
def lookup_blob_id(self, fileid, revision):
525
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
338
527
def lookup_git_sha(self, sha):
339
528
"""Lookup a Git sha in the database.
356
554
for key in self.db.iterkeys():
357
555
if key.startswith("git\0"):
358
556
yield sha_to_hex(key[4:])
559
formats = registry.Registry()
560
formats.register(TdbGitCacheFormat().get_format_string(),
562
formats.register(SqliteGitCacheFormat().get_format_string(),
563
SqliteGitCacheFormat())
567
formats.register('default', SqliteGitCacheFormat())
569
formats.register('default', TdbGitCacheFormat())
572
def migrate_ancient_formats(repo_transport):
573
# Prefer migrating git.db over git.tdb, since the latter may not
574
# be openable on some platforms.
575
if repo_transport.has("git.db"):
576
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
577
repo_transport.rename("git.db", "git/idmap.db")
578
elif repo_transport.has("git.tdb"):
579
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
580
repo_transport.rename("git.tdb", "git/idmap.tdb")
583
def remove_readonly_transport_decorator(transport):
584
if transport.is_readonly():
585
return transport._decorated
589
def from_repository(repository):
590
"""Open a cache file for a repository.
592
If the repository is remote and there is no transport available from it
593
this will use a local file in the users cache directory
594
(typically ~/.cache/bazaar/git/)
596
:param repository: A repository object
598
repo_transport = getattr(repository, "_transport", None)
599
if repo_transport is not None:
600
# Migrate older cache formats
601
repo_transport = remove_readonly_transport_decorator(repo_transport)
603
repo_transport.mkdir("git")
604
except bzrlib.errors.FileExists:
607
migrate_ancient_formats(repo_transport)
608
return BzrGitCacheFormat.from_repository(repository)