94
75
return _mapdbs.cache
97
class GitShaMap(object):
98
"""Git<->Bzr revision id mapping database."""
100
def lookup_git_sha(self, sha):
101
"""Lookup a Git sha in the database.
102
:param sha: Git object sha
103
:return: (type, type_data) with type_data:
104
commit: revid, tree_sha, verifiers
108
raise NotImplementedError(self.lookup_git_sha)
110
def lookup_blob_id(self, file_id, revision):
78
class InventorySHAMap(object):
79
"""Maps inventory file ids to Git SHAs."""
81
def lookup_blob(self, file_id, revision):
111
82
"""Retrieve a Git blob SHA by file id.
113
84
:param file_id: File id of the file/symlink
114
85
:param revision: revision in which the file was last changed.
116
raise NotImplementedError(self.lookup_blob_id)
87
raise NotImplementedError(self.lookup_blob)
118
def lookup_tree_id(self, file_id, revision):
89
def lookup_tree(self, file_id):
119
90
"""Retrieve a Git tree SHA by file id.
121
raise NotImplementedError(self.lookup_tree_id)
92
raise NotImplementedError(self.lookup_tree)
95
class GitShaMap(object):
96
"""Git<->Bzr revision id mapping database."""
98
def _add_entry(self, sha, type, type_data):
99
"""Add a new entry to the database.
101
raise NotImplementedError(self._add_entry)
103
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
105
"""Add multiple new entries to the database.
107
for (fileid, kind, hexsha, revision) in entries:
108
self._add_entry(hexsha, kind, (fileid, revision))
109
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
111
def get_inventory_sha_map(self, revid):
112
"""Return the inventory SHA map for a revision.
114
:param revid: Revision to fetch the map for
115
:return: A `InventorySHAMap`
117
raise NotImplementedError(self.get_inventory_sha_map)
119
def lookup_git_sha(self, sha):
120
"""Lookup a Git sha in the database.
121
:param sha: Git object sha
122
:return: (type, type_data) with type_data:
123
revision: revid, tree sha
125
raise NotImplementedError(self.lookup_git_sha)
123
127
def revids(self):
124
128
"""List the revision ids known."""
145
149
"""Abort any pending changes."""
148
class ContentCache(object):
149
"""Object that can cache Git objects."""
151
def add(self, object):
153
raise NotImplementedError(self.add)
155
def add_multi(self, objects):
156
"""Add multiple objects."""
160
def __getitem__(self, sha):
161
"""Retrieve an item, by SHA."""
162
raise NotImplementedError(self.__getitem__)
165
class BzrGitCacheFormat(object):
166
"""Bazaar-Git Cache Format."""
168
def get_format_string(self):
169
"""Return a single-line unique format string for this cache format."""
170
raise NotImplementedError(self.get_format_string)
172
def open(self, transport):
173
"""Open this format on a transport."""
174
raise NotImplementedError(self.open)
176
def initialize(self, transport):
177
"""Create a new instance of this cache format at transport."""
178
transport.put_bytes('format', self.get_format_string())
181
def from_transport(self, transport):
182
"""Open a cache file present on a transport, or initialize one.
184
:param transport: Transport to use
185
:return: A BzrGitCache instance
188
format_name = transport.get_bytes('format')
189
format = formats.get(format_name)
190
except bzrlib.errors.NoSuchFile:
191
format = formats.get('default')
192
format.initialize(transport)
193
return format.open(transport)
196
def from_repository(cls, repository):
197
"""Open a cache file for a repository.
199
This will use the repository's transport to store the cache file, or
200
use the users global cache directory if the repository has no
201
transport associated with it.
203
:param repository: Repository to open the cache for
204
:return: A `BzrGitCache`
206
repo_transport = getattr(repository, "_transport", None)
207
if repo_transport is not None:
208
# Even if we don't write to this repo, we should be able
209
# to update its cache.
210
repo_transport = remove_readonly_transport_decorator(repo_transport)
212
repo_transport.mkdir('git')
213
except bzrlib.errors.FileExists:
215
transport = repo_transport.clone('git')
217
transport = get_remote_cache_transport()
218
return cls.from_transport(transport)
221
class CacheUpdater(object):
222
"""Base class for objects that can update a bzr-git cache."""
224
def add_object(self, obj, ie, path):
227
:param obj: Object type ("commit", "blob" or "tree")
228
:param ie: Inventory entry (for blob/tree) or testament_sha in case
230
:param path: Path of the object (optional)
232
raise NotImplementedError(self.add_object)
235
raise NotImplementedError(self.finish)
238
class BzrGitCache(object):
239
"""Caching backend."""
241
def __init__(self, idmap, content_cache, cache_updater_klass):
243
self.content_cache = content_cache
244
self._cache_updater_klass = cache_updater_klass
246
def get_updater(self, rev):
247
"""Update an object that implements the CacheUpdater interface for
250
return self._cache_updater_klass(self, rev)
253
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
256
class DictCacheUpdater(CacheUpdater):
257
"""Cache updater for dict-based caches."""
259
def __init__(self, cache, rev):
261
self.revid = rev.revision_id
262
self.parent_revids = rev.parent_ids
266
def add_object(self, obj, ie, path):
267
if obj.type_name == "commit":
269
assert type(ie) is dict
270
type_data = (self.revid, self._commit.tree, ie)
271
self.cache.idmap._by_revid[self.revid] = obj.id
272
elif obj.type_name in ("blob", "tree"):
274
if obj.type_name == "blob":
275
revision = ie.revision
277
revision = self.revid
278
type_data = (ie.file_id, revision)
279
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
282
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
285
if self._commit is None:
286
raise AssertionError("No commit object added")
290
152
class DictGitShaMap(GitShaMap):
291
"""Git SHA map that uses a dictionary."""
293
154
def __init__(self):
294
155
self._by_sha = {}
295
156
self._by_fileid = {}
298
def lookup_blob_id(self, fileid, revision):
299
return self._by_fileid[revision][fileid]
158
def _add_entry(self, sha, type, type_data):
159
self._by_sha[sha] = (type, type_data)
160
if type in ("blob", "tree"):
161
self._by_fileid.setdefault(type_data[1], {})[type_data[0]] = sha
163
def get_inventory_sha_map(self, revid):
165
class DictInventorySHAMap(InventorySHAMap):
167
def __init__(self, base, revid):
171
def lookup_blob(self, fileid, revision):
172
return self._base._by_fileid[revision][fileid]
174
def lookup_tree(self, fileid):
175
return self._base._by_fileid[self.revid][fileid]
177
return DictInventorySHAMap(self, revid)
301
179
def lookup_git_sha(self, sha):
302
180
return self._by_sha[sha]
304
def lookup_tree_id(self, fileid, revision):
305
return self._by_fileid[revision][fileid]
307
def lookup_commit(self, revid):
308
return self._by_revid[revid]
310
182
def revids(self):
311
183
for key, (type, type_data) in self._by_sha.iteritems():
312
184
if type == "commit":
316
188
return self._by_sha.iterkeys()
319
class SqliteCacheUpdater(CacheUpdater):
321
def __init__(self, cache, rev):
323
self.db = self.cache.idmap.db
324
self.revid = rev.revision_id
329
def add_object(self, obj, ie, path):
330
if obj.type_name == "commit":
332
self._testament3_sha1 = ie["testament3-sha1"]
333
assert type(ie) is dict
334
elif obj.type_name == "tree":
336
self._trees.append((obj.id, ie.file_id, self.revid))
337
elif obj.type_name == "blob":
339
self._blobs.append((obj.id, ie.file_id, ie.revision))
344
if self._commit is None:
345
raise AssertionError("No commit object added")
347
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
350
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
353
"replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
354
(self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
358
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
361
class SqliteGitCacheFormat(BzrGitCacheFormat):
363
def get_format_string(self):
364
return 'bzr-git sha map version 1 using sqlite\n'
366
def open(self, transport):
368
basepath = transport.local_abspath(".")
369
except bzrlib.errors.NotLocalUrl:
370
basepath = get_cache_dir()
371
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
374
191
class SqliteGitShaMap(GitShaMap):
375
"""Bazaar GIT Sha map that uses a sqlite database for storage."""
377
193
def __init__(self, path=None):
426
245
def commit_write_group(self):
429
def lookup_blob_id(self, fileid, revision):
430
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
433
raise KeyError(fileid)
435
def lookup_tree_id(self, fileid, revision):
436
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
439
raise KeyError(fileid)
248
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
252
for (fileid, kind, hexsha, revision) in entries:
256
trees.append((hexsha, fileid, revid))
258
blobs.append((hexsha, fileid, revision))
262
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
264
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
265
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
267
def _add_entry(self, sha, type, type_data):
268
"""Add a new entry to the database.
270
assert isinstance(type_data, tuple)
273
assert isinstance(sha, str), "type was %r" % sha
275
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
276
elif type in ("blob", "tree"):
277
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
279
raise AssertionError("Unknown type %s" % type)
281
def get_inventory_sha_map(self, revid):
282
class SqliteInventorySHAMap(InventorySHAMap):
284
def __init__(self, db, revid):
288
def lookup_blob(self, fileid, revision):
289
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
292
raise KeyError(fileid)
294
def lookup_tree(self, fileid):
295
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, self.revid)).fetchone()
298
raise KeyError(fileid)
300
return SqliteInventorySHAMap(self.db, revid)
441
302
def lookup_git_sha(self, sha):
442
303
"""Lookup a Git sha in the database.
444
305
:param sha: Git object sha
445
306
:return: (type, type_data) with type_data:
446
commit: revid, tree sha, verifiers
307
revision: revid, tree sha
450
row = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,)).fetchone()
309
def format(type, row):
310
return (type, (row[0], row[1]))
311
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
451
312
if row is not None:
452
return ("commit", (row[0], row[1], {"testament3-sha1": row[2]}))
313
return format("commit", row)
453
314
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
454
315
if row is not None:
316
return format("blob", row)
456
317
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
457
318
if row is not None:
319
return format("tree", row)
459
320
raise KeyError(sha)
461
322
def revids(self):
575
380
def __repr__(self):
576
381
return "%s(%r)" % (self.__class__.__name__, self.path)
384
def from_repository(cls, repository):
386
transport = getattr(repository, "_transport", None)
387
if transport is not None:
388
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
389
except bzrlib.errors.NotLocalUrl:
391
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
578
393
def lookup_commit(self, revid):
579
394
return sha_to_hex(self.db["commit\0" + revid][:20])
581
def lookup_blob_id(self, fileid, revision):
582
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
396
def _add_entry(self, hexsha, type, type_data):
397
"""Add a new entry to the database.
402
sha = hex_to_sha(hexsha)
403
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
405
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
407
self.db["\0".join(("blob", type_data[0], type_data[1]))] = sha
409
def get_inventory_sha_map(self, revid):
411
class TdbInventorySHAMap(InventorySHAMap):
413
def __init__(self, db, revid):
417
def lookup_blob(self, fileid, revision):
418
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
420
return TdbInventorySHAMap(self.db, revid)
584
422
def lookup_git_sha(self, sha):
585
423
"""Lookup a Git sha in the database.
587
425
:param sha: Git object sha
588
426
:return: (type, type_data) with type_data:
589
commit: revid, tree sha
427
revision: revid, tree sha
593
429
if len(sha) == 40:
594
430
sha = hex_to_sha(sha)
595
431
data = self.db["git\0" + sha].split("\0")
596
if data[0] == "commit":
598
return (data[0], (data[1], data[2], {}))
600
return (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
602
return (data[0], tuple(data[1:]))
432
return (data[0], (data[1], data[2]))
604
434
def missing_revisions(self, revids):
621
451
yield sha_to_hex(key[4:])
624
class VersionedFilesContentCache(ContentCache):
626
def __init__(self, vf):
630
self._vf.insert_record_stream(
631
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
632
obj.as_legacy_object_chunks())])
634
def __getitem__(self, sha):
635
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
636
entry = stream.next()
637
if entry.storage_kind == 'absent':
639
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
642
class GitObjectStoreContentCache(ContentCache):
644
def __init__(self, store):
647
def add_multi(self, objs):
648
self.store.add_objects(objs)
650
def add(self, obj, path):
651
self.store.add_object(obj)
653
def __getitem__(self, sha):
654
return self.store[sha]
657
class IndexCacheUpdater(CacheUpdater):
659
def __init__(self, cache, rev):
661
self.revid = rev.revision_id
662
self.parent_revids = rev.parent_ids
665
self._cache_objs = set()
667
def add_object(self, obj, ie, path):
668
if obj.type_name == "commit":
670
assert type(ie) is dict
671
self.cache.idmap._add_git_sha(obj.id, "commit",
672
(self.revid, obj.tree, ie))
673
self.cache.idmap._add_node(("commit", self.revid, "X"),
674
" ".join((obj.id, obj.tree)))
675
self._cache_objs.add((obj, path))
676
elif obj.type_name == "blob":
677
self.cache.idmap._add_git_sha(obj.id, "blob",
678
(ie.file_id, ie.revision))
679
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
680
if ie.kind == "symlink":
681
self._cache_objs.add((obj, path))
682
elif obj.type_name == "tree":
683
self.cache.idmap._add_git_sha(obj.id, "tree",
684
(ie.file_id, self.revid))
685
self._cache_objs.add((obj, path))
690
self.cache.content_cache.add_multi(self._cache_objs)
694
class IndexBzrGitCache(BzrGitCache):
696
def __init__(self, transport=None):
697
mapper = versionedfile.ConstantMapper("trees")
698
shamap = IndexGitShaMap(transport.clone('index'))
699
#trees_store = knit.make_file_factory(True, mapper)(transport)
700
#content_cache = VersionedFilesContentCache(trees_store)
701
from bzrlib.plugins.git.transportgit import TransportObjectStore
702
store = TransportObjectStore(transport.clone('objects'))
703
content_cache = GitObjectStoreContentCache(store)
704
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
708
class IndexGitCacheFormat(BzrGitCacheFormat):
710
def get_format_string(self):
711
return 'bzr-git sha map with git object cache version 1\n'
713
def initialize(self, transport):
714
super(IndexGitCacheFormat, self).initialize(transport)
715
transport.mkdir('index')
716
transport.mkdir('objects')
717
from bzrlib.plugins.git.transportgit import TransportObjectStore
718
TransportObjectStore.init(transport.clone('objects'))
720
def open(self, transport):
721
return IndexBzrGitCache(transport)
724
class IndexGitShaMap(GitShaMap):
725
"""SHA Map that uses the Bazaar APIs to store a cache.
727
BTree Index file with the following contents:
729
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
730
("commit", <revid>) -> "<sha1> <tree-id>"
731
("blob", <fileid>, <revid>) -> <sha1>
735
def __init__(self, transport=None):
736
if transport is None:
737
self._transport = None
738
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
739
self._builder = self._index
742
self._transport = transport
743
self._index = _mod_index.CombinedGraphIndex([])
744
for name in self._transport.list_dir("."):
745
if not name.endswith(".rix"):
747
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
748
self._transport.stat(name).st_size)
749
self._index.insert_index(0, x)
752
def from_repository(cls, repository):
753
transport = getattr(repository, "_transport", None)
754
if transport is not None:
756
transport.mkdir('git')
757
except bzrlib.errors.FileExists:
759
return cls(transport.clone('git'))
760
from bzrlib.transport import get_transport
761
return cls(get_transport(get_cache_dir()))
764
if self._transport is not None:
765
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
767
return "%s()" % (self.__class__.__name__)
770
assert self._builder is None
771
self.start_write_group()
772
for _, key, value in self._index.iter_all_entries():
773
self._builder.add_node(key, value)
775
for name in self._transport.list_dir('.'):
776
if name.endswith('.rix'):
777
to_remove.append(name)
778
self.commit_write_group()
779
del self._index.indices[1:]
780
for name in to_remove:
781
self._transport.rename(name, name + '.old')
783
def start_write_group(self):
784
assert self._builder is None
785
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
786
self._name = osutils.sha()
788
def commit_write_group(self):
789
assert self._builder is not None
790
stream = self._builder.finish()
791
name = self._name.hexdigest() + ".rix"
792
size = self._transport.put_file(name, stream)
793
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
794
self._index.insert_index(0, index)
798
def abort_write_group(self):
799
assert self._builder is not None
803
def _add_node(self, key, value):
805
self._builder.add_node(key, value)
806
except bzrlib.errors.BadIndexDuplicateKey:
807
# Multiple bzr objects can have the same contents
812
def _get_entry(self, key):
813
entries = self._index.iter_entries([key])
815
return entries.next()[2]
816
except StopIteration:
817
if self._builder is None:
819
entries = self._builder.iter_entries([key])
821
return entries.next()[2]
822
except StopIteration:
825
def _iter_keys_prefix(self, prefix):
826
for entry in self._index.iter_entries_prefix([prefix]):
828
if self._builder is not None:
829
for entry in self._builder.iter_entries_prefix([prefix]):
832
def lookup_commit(self, revid):
833
return self._get_entry(("commit", revid, "X"))[:40]
835
def _add_git_sha(self, hexsha, type, type_data):
836
if hexsha is not None:
837
self._name.update(hexsha)
839
td = (type_data[0], type_data[1], type_data[2]["testament3-sha1"])
842
self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
844
# This object is not represented in Git - perhaps an empty
846
self._name.update(type + " ".join(type_data))
848
def lookup_blob_id(self, fileid, revision):
849
return self._get_entry(("blob", fileid, revision))
851
def lookup_git_sha(self, sha):
853
sha = sha_to_hex(sha)
854
data = self._get_entry(("git", sha, "X")).split(" ", 3)
855
if data[0] == "commit":
856
return ("commit", (data[1], data[2], {"testament3-sha1": data[3]}))
858
return (data[0], tuple(data[1:]))
861
"""List the revision ids known."""
862
for key in self._iter_keys_prefix(("commit", None, None)):
865
def missing_revisions(self, revids):
866
"""Return set of all the revisions that are not present."""
867
missing_revids = set(revids)
868
for _, key, value in self._index.iter_entries((
869
("commit", revid, "X") for revid in revids)):
870
missing_revids.remove(key[1])
871
return missing_revids
874
"""List the SHA1s."""
875
for key in self._iter_keys_prefix(("git", None, None)):
879
formats = registry.Registry()
880
formats.register(TdbGitCacheFormat().get_format_string(),
882
formats.register(SqliteGitCacheFormat().get_format_string(),
883
SqliteGitCacheFormat())
884
formats.register(IndexGitCacheFormat().get_format_string(),
885
IndexGitCacheFormat())
886
# In the future, this will become the default:
887
# formats.register('default', IndexGitCacheFormat())
891
formats.register('default', SqliteGitCacheFormat())
893
formats.register('default', TdbGitCacheFormat())
897
def migrate_ancient_formats(repo_transport):
898
# Prefer migrating git.db over git.tdb, since the latter may not
899
# be openable on some platforms.
900
if repo_transport.has("git.db"):
901
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
902
repo_transport.rename("git.db", "git/idmap.db")
903
elif repo_transport.has("git.tdb"):
904
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
905
repo_transport.rename("git.tdb", "git/idmap.tdb")
908
def remove_readonly_transport_decorator(transport):
909
if transport.is_readonly():
910
return transport._decorated
914
454
def from_repository(repository):
915
"""Open a cache file for a repository.
917
If the repository is remote and there is no transport available from it
918
this will use a local file in the users cache directory
919
(typically ~/.cache/bazaar/git/)
921
:param repository: A repository object
923
repo_transport = getattr(repository, "_transport", None)
924
if repo_transport is not None:
925
# Migrate older cache formats
926
repo_transport = remove_readonly_transport_decorator(repo_transport)
928
repo_transport.mkdir("git")
929
except bzrlib.errors.FileExists:
932
migrate_ancient_formats(repo_transport)
933
return BzrGitCacheFormat.from_repository(repository)
456
return TdbGitShaMap.from_repository(repository)
458
return SqliteGitShaMap.from_repository(repository)