61
86
class GitShaMap(object):
62
87
"""Git<->Bzr revision id mapping database."""
64
def add_entry(self, sha, type, type_data):
65
"""Add a new entry to the database.
67
raise NotImplementedError(self.add_entry)
69
def add_entries(self, entries):
70
"""Add multiple new entries to the database.
75
def lookup_tree(self, fileid, revid):
76
"""Lookup the SHA of a git tree."""
77
raise NotImplementedError(self.lookup_tree)
79
def lookup_blob(self, fileid, revid):
80
raise NotImplementedError(self.lookup_blob)
82
89
def lookup_git_sha(self, sha):
83
90
"""Lookup a Git sha in the database.
85
91
:param sha: Git object sha
86
92
:return: (type, type_data) with type_data:
87
93
revision: revid, tree sha
89
95
raise NotImplementedError(self.lookup_git_sha)
97
def lookup_blob_id(self, file_id, revision):
98
"""Retrieve a Git blob SHA by file id.
100
:param file_id: File id of the file/symlink
101
:param revision: revision in which the file was last changed.
103
raise NotImplementedError(self.lookup_blob_id)
105
def lookup_tree_id(self, file_id, revision):
106
"""Retrieve a Git tree SHA by file id.
108
raise NotImplementedError(self.lookup_tree_id)
92
111
"""List the revision ids known."""
93
112
raise NotImplementedError(self.revids)
114
def missing_revisions(self, revids):
115
"""Return set of all the revisions that are not present."""
116
present_revids = set(self.revids())
117
if not isinstance(revids, set):
119
return revids - present_revids
122
"""List the SHA1s."""
123
raise NotImplementedError(self.sha1s)
125
def start_write_group(self):
126
"""Start writing changes."""
128
def commit_write_group(self):
96
129
"""Commit any pending changes."""
131
def abort_write_group(self):
132
"""Abort any pending changes."""
135
class ContentCache(object):
136
"""Object that can cache Git objects."""
138
def __getitem__(self, sha):
139
"""Retrieve an item, by SHA."""
140
raise NotImplementedError(self.__getitem__)
143
class BzrGitCacheFormat(object):
145
def get_format_string(self):
146
"""Return a single-line unique format string for this cache format."""
147
raise NotImplementedError(self.get_format_string)
149
def open(self, transport):
150
"""Open this format on a transport."""
151
raise NotImplementedError(self.open)
153
def initialize(self, transport):
154
transport.put_bytes('format', self.get_format_string())
157
def from_transport(self, transport):
158
"""Open a cache file present on a transport, or initialize one.
160
:param transport: Transport to use
161
:return: A BzrGitCache instance
164
format_name = transport.get_bytes('format')
165
format = formats.get(format_name)
166
except bzrlib.errors.NoSuchFile:
167
format = formats.get('default')
168
format.initialize(transport)
169
return format.open(transport)
172
def from_repository(cls, repository):
173
"""Open a cache file for a repository.
175
This will use the repository's transport to store the cache file, or
176
use the users global cache directory if the repository has no
177
transport associated with it.
179
:param repository: Repository to open the cache for
180
:return: A `BzrGitCache`
182
repo_transport = getattr(repository, "_transport", None)
183
if repo_transport is not None:
184
# Even if we don't write to this repo, we should be able
185
# to update its cache.
186
repo_transport = remove_readonly_transport_decorator(repo_transport)
188
repo_transport.mkdir('git')
189
except bzrlib.errors.FileExists:
191
transport = repo_transport.clone('git')
193
transport = get_remote_cache_transport()
194
return cls.from_transport(transport)
197
class CacheUpdater(object):
199
def add_object(self, obj, ie):
200
raise NotImplementedError(self.add_object)
203
raise NotImplementedError(self.finish)
206
class BzrGitCache(object):
207
"""Caching backend."""
209
def __init__(self, idmap, content_cache, cache_updater_klass):
211
self.content_cache = content_cache
212
self._cache_updater_klass = cache_updater_klass
214
def get_updater(self, rev):
215
return self._cache_updater_klass(self, rev)
218
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
221
class DictCacheUpdater(CacheUpdater):
223
def __init__(self, cache, rev):
225
self.revid = rev.revision_id
226
self.parent_revids = rev.parent_ids
230
def add_object(self, obj, ie):
231
if obj.type_name == "commit":
234
type_data = (self.revid, self._commit.tree)
235
self.cache.idmap._by_revid[self.revid] = obj.id
236
elif obj.type_name in ("blob", "tree"):
237
if obj.type_name == "blob":
238
revision = ie.revision
240
revision = self.revid
241
type_data = (ie.file_id, revision)
242
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
245
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
248
if self._commit is None:
249
raise AssertionError("No commit object added")
99
253
class DictGitShaMap(GitShaMap):
101
255
def __init__(self):
104
def add_entry(self, sha, type, type_data):
105
self.dict[sha] = (type, type_data)
260
def lookup_blob_id(self, fileid, revision):
261
return self._by_fileid[revision][fileid]
107
263
def lookup_git_sha(self, sha):
108
return self.dict[sha]
110
def lookup_tree(self, fileid, revid):
111
for k, v in self.dict.iteritems():
112
if v == ("tree", (fileid, revid)):
114
raise KeyError((fileid, revid))
116
def lookup_blob(self, fileid, revid):
117
for k, v in self.dict.iteritems():
118
if v == ("blob", (fileid, revid)):
120
raise KeyError((fileid, revid))
264
return self._by_sha[sha]
266
def lookup_tree_id(self, fileid, revision):
267
return self._by_fileid[revision][fileid]
269
def lookup_commit(self, revid):
270
return self._by_revid[revid]
122
272
def revids(self):
123
for key, (type, type_data) in self.dict.iteritems():
273
for key, (type, type_data) in self._by_sha.iteritems():
124
274
if type == "commit":
125
275
yield type_data[0]
278
return self._by_sha.iterkeys()
281
class SqliteCacheUpdater(CacheUpdater):
283
def __init__(self, cache, rev):
285
self.db = self.cache.idmap.db
286
self.revid = rev.revision_id
291
def add_object(self, obj, ie):
292
if obj.type_name == "commit":
295
elif obj.type_name == "tree":
296
self._trees.append((obj.id, ie.file_id, self.revid))
297
elif obj.type_name == "blob":
298
self._blobs.append((obj.id, ie.file_id, ie.revision))
303
if self._commit is None:
304
raise AssertionError("No commit object added")
306
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
309
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
312
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
313
(self._commit.id, self.revid, self._commit.tree))
317
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
320
class SqliteGitCacheFormat(BzrGitCacheFormat):
322
def get_format_string(self):
323
return 'bzr-git sha map version 1 using sqlite\n'
325
def open(self, transport):
327
basepath = transport.local_abspath(".")
328
except bzrlib.errors.NotLocalUrl:
329
basepath = get_cache_dir()
330
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
128
333
class SqliteGitShaMap(GitShaMap):
135
340
if not mapdbs().has_key(path):
136
341
mapdbs()[path] = sqlite3.connect(path)
137
self.db = mapdbs()[path]
342
self.db = mapdbs()[path]
343
self.db.text_factory = str
138
344
self.db.executescript("""
139
create table if not exists commits(sha1 text, revid text, tree_sha text);
345
create table if not exists commits(
346
sha1 text not null check(length(sha1) == 40),
348
tree_sha text not null check(length(tree_sha) == 40)
140
350
create index if not exists commit_sha1 on commits(sha1);
141
351
create unique index if not exists commit_revid on commits(revid);
142
create table if not exists blobs(sha1 text, fileid text, revid text);
352
create table if not exists blobs(
353
sha1 text not null check(length(sha1) == 40),
354
fileid text not null,
143
357
create index if not exists blobs_sha1 on blobs(sha1);
144
358
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
145
create table if not exists trees(sha1 text, fileid text, revid text);
146
create index if not exists trees_sha1 on trees(sha1);
359
create table if not exists trees(
360
sha1 text unique not null check(length(sha1) == 40),
361
fileid text not null,
364
create unique index if not exists trees_sha1 on trees(sha1);
147
365
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
151
def from_repository(cls, repository):
152
return cls(os.path.join(repository._transport.local_abspath("."), "git.db"))
154
def _parent_lookup(self, revid):
369
return "%s(%r)" % (self.__class__.__name__, self.path)
371
def lookup_commit(self, revid):
155
372
row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
156
373
if row is not None:
157
return row[0].encode("utf-8")
377
def commit_write_group(self):
163
def add_entries(self, entries):
166
for sha, type, type_data in entries:
168
trees.append((sha, type_data[0], type_data[1]))
170
blobs.append((sha, type_data[0], type_data[1]))
174
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
176
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
179
def add_entry(self, sha, type, type_data):
180
"""Add a new entry to the database.
182
assert isinstance(type_data, tuple)
183
assert isinstance(sha, str), "type was %r" % sha
185
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
186
elif type in ("blob", "tree"):
187
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
189
raise AssertionError("Unknown type %s" % type)
191
def lookup_tree(self, fileid, revid):
192
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid,revid)).fetchone()
194
raise KeyError((fileid, revid))
195
return row[0].encode("utf-8")
197
def lookup_blob(self, fileid, revid):
198
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revid)).fetchone()
200
raise KeyError((fileid, revid))
201
return row[0].encode("utf-8")
380
def lookup_blob_id(self, fileid, revision):
381
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
384
raise KeyError(fileid)
386
def lookup_tree_id(self, fileid, revision):
387
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
390
raise KeyError(fileid)
203
392
def lookup_git_sha(self, sha):
204
393
"""Lookup a Git sha in the database.
207
396
:return: (type, type_data) with type_data:
208
397
revision: revid, tree sha
210
def format(type, row):
211
return (type, (row[0].encode("utf-8"), row[1].encode("utf-8")))
212
399
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
213
400
if row is not None:
214
return format("commit", row)
401
return ("commit", row)
215
402
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
216
403
if row is not None:
217
return format("blob", row)
218
405
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
219
406
if row is not None:
220
return format("tree", row)
221
408
raise KeyError(sha)
223
410
def revids(self):
224
411
"""List the revision ids known."""
225
for row in self.db.execute("select revid from commits").fetchall():
226
yield row[0].encode("utf-8")
412
return (row for (row,) in self.db.execute("select revid from commits"))
415
"""List the SHA1s."""
416
for table in ("blobs", "commits", "trees"):
417
for (sha,) in self.db.execute("select sha1 from %s" % table):
421
class TdbCacheUpdater(CacheUpdater):
423
def __init__(self, cache, rev):
425
self.db = cache.idmap.db
426
self.revid = rev.revision_id
427
self.parent_revids = rev.parent_ids
431
def add_object(self, obj, ie):
432
sha = obj.sha().digest()
433
if obj.type_name == "commit":
434
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
435
type_data = (self.revid, obj.tree)
438
elif obj.type_name == "blob":
439
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
440
type_data = (ie.file_id, ie.revision)
441
elif obj.type_name == "tree":
442
type_data = (ie.file_id, self.revid)
445
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
448
if self._commit is None:
449
raise AssertionError("No commit object added")
453
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
455
class TdbGitCacheFormat(BzrGitCacheFormat):
457
def get_format_string(self):
458
return 'bzr-git sha map version 3 using tdb\n'
460
def open(self, transport):
462
basepath = transport.local_abspath(".")
463
except bzrlib.errors.NotLocalUrl:
464
basepath = get_cache_dir()
466
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
469
"Unable to open existing bzr-git cache because 'tdb' is not "
473
class TdbGitShaMap(GitShaMap):
474
"""SHA Map that uses a TDB database.
478
"git <sha1>" -> "<type> <type-data1> <type-data2>"
479
"commit revid" -> "<sha1> <tree-id>"
480
"tree fileid revid" -> "<sha1>"
481
"blob fileid revid" -> "<sha1>"
485
TDB_HASH_SIZE = 50000
487
def __init__(self, path=None):
493
if not mapdbs().has_key(path):
494
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
495
os.O_RDWR|os.O_CREAT)
496
self.db = mapdbs()[path]
498
if int(self.db["version"]) not in (2, 3):
499
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
500
self.db["version"], self.TDB_MAP_VERSION)
504
self.db["version"] = str(self.TDB_MAP_VERSION)
506
def start_write_group(self):
507
"""Start writing changes."""
508
self.db.transaction_start()
510
def commit_write_group(self):
511
"""Commit any pending changes."""
512
self.db.transaction_commit()
514
def abort_write_group(self):
515
"""Abort any pending changes."""
516
self.db.transaction_cancel()
519
return "%s(%r)" % (self.__class__.__name__, self.path)
521
def lookup_commit(self, revid):
522
return sha_to_hex(self.db["commit\0" + revid][:20])
524
def lookup_blob_id(self, fileid, revision):
525
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
527
def lookup_git_sha(self, sha):
528
"""Lookup a Git sha in the database.
530
:param sha: Git object sha
531
:return: (type, type_data) with type_data:
532
revision: revid, tree sha
535
sha = hex_to_sha(sha)
536
data = self.db["git\0" + sha].split("\0")
537
return (data[0], (data[1], data[2]))
539
def missing_revisions(self, revids):
542
if self.db.get("commit\0" + revid) is None:
547
"""List the revision ids known."""
548
for key in self.db.iterkeys():
549
if key.startswith("commit\0"):
553
"""List the SHA1s."""
554
for key in self.db.iterkeys():
555
if key.startswith("git\0"):
556
yield sha_to_hex(key[4:])
559
formats = registry.Registry()
560
formats.register(TdbGitCacheFormat().get_format_string(),
562
formats.register(SqliteGitCacheFormat().get_format_string(),
563
SqliteGitCacheFormat())
567
formats.register('default', SqliteGitCacheFormat())
569
formats.register('default', TdbGitCacheFormat())
572
def migrate_ancient_formats(repo_transport):
573
# Prefer migrating git.db over git.tdb, since the latter may not
574
# be openable on some platforms.
575
if repo_transport.has("git.db"):
576
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
577
repo_transport.rename("git.db", "git/idmap.db")
578
elif repo_transport.has("git.tdb"):
579
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
580
repo_transport.rename("git.tdb", "git/idmap.tdb")
583
def remove_readonly_transport_decorator(transport):
584
if transport.is_readonly():
585
return transport._decorated
589
def from_repository(repository):
590
"""Open a cache file for a repository.
592
If the repository is remote and there is no transport available from it
593
this will use a local file in the users cache directory
594
(typically ~/.cache/bazaar/git/)
596
:param repository: A repository object
598
repo_transport = getattr(repository, "_transport", None)
599
if repo_transport is not None:
600
# Migrate older cache formats
601
repo_transport = remove_readonly_transport_decorator(repo_transport)
603
repo_transport.mkdir("git")
604
except bzrlib.errors.FileExists:
607
migrate_ancient_formats(repo_transport)
608
return BzrGitCacheFormat.from_repository(repository)