148
148
class ContentCache(object):
149
149
"""Object that can cache Git objects."""
151
def add(self, object):
153
raise NotImplementedError(self.add)
155
def add_multi(self, objects):
156
"""Add multiple objects."""
160
151
def __getitem__(self, sha):
161
152
"""Retrieve an item, by SHA."""
162
153
raise NotImplementedError(self.__getitem__)
156
"""Add an object to the cache."""
157
raise NotImplementedError(self.add)
165
160
class BzrGitCacheFormat(object):
166
"""Bazaar-Git Cache Format."""
168
162
def get_format_string(self):
169
"""Return a single-line unique format string for this cache format."""
170
163
raise NotImplementedError(self.get_format_string)
172
165
def open(self, transport):
173
"""Open this format on a transport."""
174
166
raise NotImplementedError(self.open)
176
168
def initialize(self, transport):
177
"""Create a new instance of this cache format at transport."""
178
169
transport.put_bytes('format', self.get_format_string())
181
def from_transport(self, transport):
182
"""Open a cache file present on a transport, or initialize one.
184
:param transport: Transport to use
185
:return: A BzrGitCache instance
172
def from_repository(self, repository):
173
repo_transport = getattr(repository, "_transport", None)
174
if repo_transport is not None:
176
repo_transport.mkdir('git')
177
except bzrlib.errors.FileExists:
179
transport = repo_transport.clone('git')
181
transport = get_remote_cache_transport()
188
183
format_name = transport.get_bytes('format')
189
184
format = formats.get(format_name)
192
187
format.initialize(transport)
193
188
return format.open(transport)
196
def from_repository(cls, repository):
197
"""Open a cache file for a repository.
199
This will use the repository's transport to store the cache file, or
200
use the users global cache directory if the repository has no
201
transport associated with it.
203
:param repository: Repository to open the cache for
204
:return: A `BzrGitCache`
206
repo_transport = getattr(repository, "_transport", None)
207
if repo_transport is not None:
208
# Even if we don't write to this repo, we should be able
209
# to update its cache.
210
repo_transport = remove_readonly_transport_decorator(repo_transport)
212
repo_transport.mkdir('git')
213
except bzrlib.errors.FileExists:
215
transport = repo_transport.clone('git')
217
transport = get_remote_cache_transport()
218
return cls.from_transport(transport)
221
class CacheUpdater(object):
222
"""Base class for objects that can update a bzr-git cache."""
224
def add_object(self, obj, ie, path):
227
:param obj: Object type ("commit", "blob" or "tree")
228
:param ie: Inventory entry (for blob/tree) or testament_sha in case
230
:param path: Path of the object (optional)
232
raise NotImplementedError(self.add_object)
235
raise NotImplementedError(self.finish)
238
class BzrGitCache(object):
239
"""Caching backend."""
241
def __init__(self, idmap, content_cache, cache_updater_klass):
243
self.content_cache = content_cache
244
self._cache_updater_klass = cache_updater_klass
246
def get_updater(self, rev):
247
"""Update an object that implements the CacheUpdater interface for
250
return self._cache_updater_klass(self, rev)
253
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
256
class DictCacheUpdater(CacheUpdater):
257
"""Cache updater for dict-based caches."""
259
def __init__(self, cache, rev):
261
self.revid = rev.revision_id
262
self.parent_revids = rev.parent_ids
266
def add_object(self, obj, ie, path):
267
if obj.type_name == "commit":
269
assert type(ie) is dict
270
type_data = (self.revid, self._commit.tree, ie)
271
self.cache.idmap._by_revid[self.revid] = obj.id
272
elif obj.type_name in ("blob", "tree"):
274
if obj.type_name == "blob":
275
revision = ie.revision
277
revision = self.revid
278
type_data = (ie.file_id, revision)
279
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
282
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
285
if self._commit is None:
286
raise AssertionError("No commit object added")
290
191
class DictGitShaMap(GitShaMap):
291
"""Git SHA map that uses a dictionary."""
293
193
def __init__(self):
294
194
self._by_sha = {}
295
195
self._by_fileid = {}
197
def _add_entry(self, sha, type, type_data):
198
self._by_sha[sha] = (type, type_data)
199
if type in ("blob", "tree"):
200
self._by_fileid.setdefault(type_data[1], {})[type_data[0]] = sha
298
202
def lookup_blob_id(self, fileid, revision):
299
203
return self._by_fileid[revision][fileid]
426
277
def commit_write_group(self):
280
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
284
for (fileid, kind, hexsha, revision) in entries:
286
trees.append((hexsha, fileid, revid))
288
blobs.append((hexsha, fileid, revision))
292
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
294
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
295
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
297
def _add_entry(self, sha, type, type_data):
298
"""Add a new entry to the database.
300
assert isinstance(type_data, tuple)
303
assert isinstance(sha, str), "type was %r" % sha
305
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
306
elif type in ("blob", "tree"):
307
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
309
raise AssertionError("Unknown type %s" % type)
429
311
def lookup_blob_id(self, fileid, revision):
430
312
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
431
313
if row is not None:
575
417
def __repr__(self):
576
418
return "%s(%r)" % (self.__class__.__name__, self.path)
421
def from_repository(cls, repository):
423
transport = getattr(repository, "_transport", None)
424
if transport is not None:
425
return cls(os.path.join(transport.local_abspath("."), "shamap.tdb"))
426
except bzrlib.errors.NotLocalUrl:
428
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
578
430
def lookup_commit(self, revid):
579
431
return sha_to_hex(self.db["commit\0" + revid][:20])
433
def _add_entry(self, hexsha, type, type_data):
434
"""Add a new entry to the database.
439
sha = hex_to_sha(hexsha)
440
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
442
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
444
self.db["\0".join(("blob", type_data[0], type_data[1]))] = sha
581
446
def lookup_blob_id(self, fileid, revision):
582
447
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
584
449
def lookup_git_sha(self, sha):
585
450
"""Lookup a Git sha in the database.
587
452
:param sha: Git object sha
588
453
:return: (type, type_data) with type_data:
589
commit: revid, tree sha
454
revision: revid, tree sha
593
456
if len(sha) == 40:
594
457
sha = hex_to_sha(sha)
595
458
data = self.db["git\0" + sha].split("\0")
596
if data[0] == "commit":
598
return (data[0], (data[1], data[2], {}))
600
return (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
602
return (data[0], tuple(data[1:]))
459
return (data[0], (data[1], data[2]))
604
461
def missing_revisions(self, revids):
621
478
yield sha_to_hex(key[4:])
624
class VersionedFilesContentCache(ContentCache):
626
def __init__(self, vf):
630
self._vf.insert_record_stream(
631
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
632
obj.as_legacy_object_chunks())])
634
def __getitem__(self, sha):
635
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
636
entry = stream.next()
637
if entry.storage_kind == 'absent':
639
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
642
class GitObjectStoreContentCache(ContentCache):
644
def __init__(self, store):
647
def add_multi(self, objs):
648
self.store.add_objects(objs)
650
def add(self, obj, path):
651
self.store.add_object(obj)
653
def __getitem__(self, sha):
654
return self.store[sha]
657
class IndexCacheUpdater(CacheUpdater):
659
def __init__(self, cache, rev):
661
self.revid = rev.revision_id
662
self.parent_revids = rev.parent_ids
665
self._cache_objs = set()
667
def add_object(self, obj, ie, path):
668
if obj.type_name == "commit":
670
assert type(ie) is dict
671
self.cache.idmap._add_git_sha(obj.id, "commit",
672
(self.revid, obj.tree, ie))
673
self.cache.idmap._add_node(("commit", self.revid, "X"),
674
" ".join((obj.id, obj.tree)))
675
self._cache_objs.add((obj, path))
676
elif obj.type_name == "blob":
677
self.cache.idmap._add_git_sha(obj.id, "blob",
678
(ie.file_id, ie.revision))
679
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
680
if ie.kind == "symlink":
681
self._cache_objs.add((obj, path))
682
elif obj.type_name == "tree":
683
self.cache.idmap._add_git_sha(obj.id, "tree",
684
(ie.file_id, self.revid))
685
self._cache_objs.add((obj, path))
690
self.cache.content_cache.add_multi(self._cache_objs)
694
class IndexBzrGitCache(BzrGitCache):
696
def __init__(self, transport=None):
697
mapper = versionedfile.ConstantMapper("trees")
698
shamap = IndexGitShaMap(transport.clone('index'))
699
#trees_store = knit.make_file_factory(True, mapper)(transport)
700
#content_cache = VersionedFilesContentCache(trees_store)
701
from bzrlib.plugins.git.transportgit import TransportObjectStore
702
store = TransportObjectStore(transport.clone('objects'))
703
content_cache = GitObjectStoreContentCache(store)
704
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
708
class IndexGitCacheFormat(BzrGitCacheFormat):
710
def get_format_string(self):
711
return 'bzr-git sha map with git object cache version 1\n'
713
def initialize(self, transport):
714
super(IndexGitCacheFormat, self).initialize(transport)
715
transport.mkdir('index')
716
transport.mkdir('objects')
717
from bzrlib.plugins.git.transportgit import TransportObjectStore
718
TransportObjectStore.init(transport.clone('objects'))
720
def open(self, transport):
721
return IndexBzrGitCache(transport)
724
class IndexGitShaMap(GitShaMap):
725
"""SHA Map that uses the Bazaar APIs to store a cache.
727
BTree Index file with the following contents:
729
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
730
("commit", <revid>) -> "<sha1> <tree-id>"
731
("blob", <fileid>, <revid>) -> <sha1>
735
def __init__(self, transport=None):
736
if transport is None:
737
self._transport = None
738
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
739
self._builder = self._index
742
self._transport = transport
743
self._index = _mod_index.CombinedGraphIndex([])
744
for name in self._transport.list_dir("."):
745
if not name.endswith(".rix"):
747
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
748
self._transport.stat(name).st_size)
749
self._index.insert_index(0, x)
752
def from_repository(cls, repository):
753
transport = getattr(repository, "_transport", None)
754
if transport is not None:
756
transport.mkdir('git')
757
except bzrlib.errors.FileExists:
759
return cls(transport.clone('git'))
760
from bzrlib.transport import get_transport
761
return cls(get_transport(get_cache_dir()))
764
if self._transport is not None:
765
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
767
return "%s()" % (self.__class__.__name__)
770
assert self._builder is None
771
self.start_write_group()
772
for _, key, value in self._index.iter_all_entries():
773
self._builder.add_node(key, value)
775
for name in self._transport.list_dir('.'):
776
if name.endswith('.rix'):
777
to_remove.append(name)
778
self.commit_write_group()
779
del self._index.indices[1:]
780
for name in to_remove:
781
self._transport.rename(name, name + '.old')
783
def start_write_group(self):
784
assert self._builder is None
785
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
786
self._name = osutils.sha()
788
def commit_write_group(self):
789
assert self._builder is not None
790
stream = self._builder.finish()
791
name = self._name.hexdigest() + ".rix"
792
size = self._transport.put_file(name, stream)
793
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
794
self._index.insert_index(0, index)
798
def abort_write_group(self):
799
assert self._builder is not None
803
def _add_node(self, key, value):
805
self._builder.add_node(key, value)
806
except bzrlib.errors.BadIndexDuplicateKey:
807
# Multiple bzr objects can have the same contents
812
def _get_entry(self, key):
813
entries = self._index.iter_entries([key])
815
return entries.next()[2]
816
except StopIteration:
817
if self._builder is None:
819
entries = self._builder.iter_entries([key])
821
return entries.next()[2]
822
except StopIteration:
825
def _iter_keys_prefix(self, prefix):
826
for entry in self._index.iter_entries_prefix([prefix]):
828
if self._builder is not None:
829
for entry in self._builder.iter_entries_prefix([prefix]):
832
def lookup_commit(self, revid):
833
return self._get_entry(("commit", revid, "X"))[:40]
835
def _add_git_sha(self, hexsha, type, type_data):
836
if hexsha is not None:
837
self._name.update(hexsha)
839
td = (type_data[0], type_data[1], type_data[2]["testament3-sha1"])
842
self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
844
# This object is not represented in Git - perhaps an empty
846
self._name.update(type + " ".join(type_data))
848
def lookup_blob_id(self, fileid, revision):
849
return self._get_entry(("blob", fileid, revision))
851
def lookup_git_sha(self, sha):
853
sha = sha_to_hex(sha)
854
data = self._get_entry(("git", sha, "X")).split(" ", 3)
855
if data[0] == "commit":
856
return ("commit", (data[1], data[2], {"testament3-sha1": data[3]}))
858
return (data[0], tuple(data[1:]))
861
"""List the revision ids known."""
862
for key in self._iter_keys_prefix(("commit", None, None)):
865
def missing_revisions(self, revids):
866
"""Return set of all the revisions that are not present."""
867
missing_revids = set(revids)
868
for _, key, value in self._index.iter_entries((
869
("commit", revid, "X") for revid in revids)):
870
missing_revids.remove(key[1])
871
return missing_revids
874
"""List the SHA1s."""
875
for key in self._iter_keys_prefix(("git", None, None)):
879
481
formats = registry.Registry()
880
482
formats.register(TdbGitCacheFormat().get_format_string(),
881
483
TdbGitCacheFormat())
882
484
formats.register(SqliteGitCacheFormat().get_format_string(),
883
485
SqliteGitCacheFormat())
884
formats.register(IndexGitCacheFormat().get_format_string(),
885
IndexGitCacheFormat())
886
# In the future, this will become the default:
887
# formats.register('default', IndexGitCacheFormat())
890
488
except ImportError: