148
148
class ContentCache(object):
149
149
"""Object that can cache Git objects."""
151
def add(self, object):
153
raise NotImplementedError(self.add)
155
def add_multi(self, objects):
156
"""Add multiple objects."""
160
151
def __getitem__(self, sha):
161
152
"""Retrieve an item, by SHA."""
162
153
raise NotImplementedError(self.__getitem__)
156
"""Add an object to the cache."""
157
raise NotImplementedError(self.add)
165
160
class BzrGitCacheFormat(object):
166
"""Bazaar-Git Cache Format."""
168
162
def get_format_string(self):
169
"""Return a single-line unique format string for this cache format."""
170
163
raise NotImplementedError(self.get_format_string)
172
165
def open(self, transport):
173
"""Open this format on a transport."""
174
166
raise NotImplementedError(self.open)
176
168
def initialize(self, transport):
177
"""Create a new instance of this cache format at transport."""
178
169
transport.put_bytes('format', self.get_format_string())
181
def from_transport(self, transport):
182
"""Open a cache file present on a transport, or initialize one.
184
:param transport: Transport to use
185
:return: A BzrGitCache instance
172
def from_repository(self, repository):
173
repo_transport = getattr(repository, "_transport", None)
174
if repo_transport is not None:
176
repo_transport.mkdir('git')
177
except bzrlib.errors.FileExists:
179
transport = repo_transport.clone('git')
181
transport = get_remote_cache_transport()
188
183
format_name = transport.get_bytes('format')
189
184
format = formats.get(format_name)
192
187
format.initialize(transport)
193
188
return format.open(transport)
196
def from_repository(cls, repository):
197
"""Open a cache file for a repository.
199
This will use the repository's transport to store the cache file, or
200
use the users global cache directory if the repository has no
201
transport associated with it.
203
:param repository: Repository to open the cache for
204
:return: A `BzrGitCache`
206
repo_transport = getattr(repository, "_transport", None)
207
if repo_transport is not None:
208
# Even if we don't write to this repo, we should be able
209
# to update its cache.
210
repo_transport = remove_readonly_transport_decorator(repo_transport)
212
repo_transport.mkdir('git')
213
except bzrlib.errors.FileExists:
215
transport = repo_transport.clone('git')
217
transport = get_remote_cache_transport()
218
return cls.from_transport(transport)
221
191
class CacheUpdater(object):
222
"""Base class for objects that can update a bzr-git cache."""
224
def add_object(self, obj, ie, path):
227
:param obj: Object type ("commit", "blob" or "tree")
228
:param ie: Inventory entry (for blob/tree) or testament_sha in case
230
:param path: Path of the object (optional)
232
raise NotImplementedError(self.add_object)
193
def __init__(self, cache, rev, content_cache_types):
195
self.content_cache_types = content_cache_types
196
self.revid = rev.revision_id
197
self.parent_revids = rev.parent_ids
201
def add_object(self, obj, ie):
202
if obj.type_name == "commit":
205
elif obj.type_name in ("blob", "tree"):
206
if obj.type_name == "blob":
207
revision = ie.revision
209
revision = self.revid
210
self._entries.append((ie.file_id, obj.type_name, obj.id, revision))
213
if (self.cache.content_cache and
214
obj.type_name in self.content_cache_types):
215
self.cache.content_cache.add(obj)
234
217
def finish(self):
235
raise NotImplementedError(self.finish)
218
if self._commit is None:
219
raise AssertionError("No commit object added")
220
self.cache.idmap.add_entries(self.revid, self.parent_revids,
221
self._commit.id, self._commit.tree, self._entries)
238
225
class BzrGitCache(object):
239
226
"""Caching backend."""
241
def __init__(self, idmap, content_cache, cache_updater_klass):
228
def __init__(self, idmap, content_cache):
242
229
self.idmap = idmap
243
230
self.content_cache = content_cache
244
self._cache_updater_klass = cache_updater_klass
246
def get_updater(self, rev):
247
"""Update an object that implements the CacheUpdater interface for
250
return self._cache_updater_klass(self, rev)
253
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
256
class DictCacheUpdater(CacheUpdater):
257
"""Cache updater for dict-based caches."""
259
def __init__(self, cache, rev):
261
self.revid = rev.revision_id
262
self.parent_revids = rev.parent_ids
266
def add_object(self, obj, ie, path):
267
if obj.type_name == "commit":
269
assert type(ie) is dict
270
type_data = (self.revid, self._commit.tree, ie)
271
self.cache.idmap._by_revid[self.revid] = obj.id
272
elif obj.type_name in ("blob", "tree"):
274
if obj.type_name == "blob":
275
revision = ie.revision
277
revision = self.revid
278
type_data = (ie.file_id, revision)
279
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
282
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
285
if self._commit is None:
286
raise AssertionError("No commit object added")
232
def get_updater(self, rev, content_cache_types):
233
return CacheUpdater(self, rev, content_cache_types)
290
236
class DictGitShaMap(GitShaMap):
291
"""Git SHA map that uses a dictionary."""
293
238
def __init__(self):
294
239
self._by_sha = {}
295
240
self._by_fileid = {}
242
def _add_entry(self, sha, type, type_data):
243
self._by_sha[sha] = (type, type_data)
244
if type in ("blob", "tree"):
245
self._by_fileid.setdefault(type_data[1], {})[type_data[0]] = sha
298
247
def lookup_blob_id(self, fileid, revision):
299
248
return self._by_fileid[revision][fileid]
426
324
def commit_write_group(self):
327
def add_entries(self, revid, parent_revids, commit_sha, root_tree_sha,
331
for (fileid, kind, hexsha, revision) in entries:
333
trees.append((hexsha, fileid, revid))
335
blobs.append((hexsha, fileid, revision))
339
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
341
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
342
self._add_entry(commit_sha, "commit", (revid, root_tree_sha))
344
def _add_entry(self, sha, type, type_data):
345
"""Add a new entry to the database.
347
assert isinstance(type_data, tuple)
350
assert isinstance(sha, str), "type was %r" % sha
352
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
353
elif type in ("blob", "tree"):
354
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
356
raise AssertionError("Unknown type %s" % type)
429
358
def lookup_blob_id(self, fileid, revision):
430
359
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
431
360
if row is not None:
621
516
yield sha_to_hex(key[4:])
624
class VersionedFilesContentCache(ContentCache):
626
def __init__(self, vf):
630
self._vf.insert_record_stream(
631
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
632
obj.as_legacy_object_chunks())])
634
def __getitem__(self, sha):
635
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
636
entry = stream.next()
637
if entry.storage_kind == 'absent':
639
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
642
class GitObjectStoreContentCache(ContentCache):
644
def __init__(self, store):
647
def add_multi(self, objs):
648
self.store.add_objects(objs)
650
def add(self, obj, path):
651
self.store.add_object(obj)
653
def __getitem__(self, sha):
654
return self.store[sha]
657
class IndexCacheUpdater(CacheUpdater):
659
def __init__(self, cache, rev):
661
self.revid = rev.revision_id
662
self.parent_revids = rev.parent_ids
665
self._cache_objs = set()
667
def add_object(self, obj, ie, path):
668
if obj.type_name == "commit":
670
assert type(ie) is dict
671
self.cache.idmap._add_git_sha(obj.id, "commit",
672
(self.revid, obj.tree, ie))
673
self.cache.idmap._add_node(("commit", self.revid, "X"),
674
" ".join((obj.id, obj.tree)))
675
self._cache_objs.add((obj, path))
676
elif obj.type_name == "blob":
677
self.cache.idmap._add_git_sha(obj.id, "blob",
678
(ie.file_id, ie.revision))
679
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
680
if ie.kind == "symlink":
681
self._cache_objs.add((obj, path))
682
elif obj.type_name == "tree":
683
self.cache.idmap._add_git_sha(obj.id, "tree",
684
(ie.file_id, self.revid))
685
self._cache_objs.add((obj, path))
690
self.cache.content_cache.add_multi(self._cache_objs)
694
class IndexBzrGitCache(BzrGitCache):
696
def __init__(self, transport=None):
697
mapper = versionedfile.ConstantMapper("trees")
698
shamap = IndexGitShaMap(transport.clone('index'))
699
#trees_store = knit.make_file_factory(True, mapper)(transport)
700
#content_cache = VersionedFilesContentCache(trees_store)
701
from bzrlib.plugins.git.transportgit import TransportObjectStore
702
store = TransportObjectStore(transport.clone('objects'))
703
content_cache = GitObjectStoreContentCache(store)
704
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
708
class IndexGitCacheFormat(BzrGitCacheFormat):
710
def get_format_string(self):
711
return 'bzr-git sha map with git object cache version 1\n'
713
def initialize(self, transport):
714
super(IndexGitCacheFormat, self).initialize(transport)
715
transport.mkdir('index')
716
transport.mkdir('objects')
717
from bzrlib.plugins.git.transportgit import TransportObjectStore
718
TransportObjectStore.init(transport.clone('objects'))
720
def open(self, transport):
721
return IndexBzrGitCache(transport)
724
class IndexGitShaMap(GitShaMap):
725
"""SHA Map that uses the Bazaar APIs to store a cache.
727
BTree Index file with the following contents:
729
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
730
("commit", <revid>) -> "<sha1> <tree-id>"
731
("blob", <fileid>, <revid>) -> <sha1>
735
def __init__(self, transport=None):
736
if transport is None:
737
self._transport = None
738
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
739
self._builder = self._index
742
self._transport = transport
743
self._index = _mod_index.CombinedGraphIndex([])
744
for name in self._transport.list_dir("."):
745
if not name.endswith(".rix"):
747
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
748
self._transport.stat(name).st_size)
749
self._index.insert_index(0, x)
752
def from_repository(cls, repository):
753
transport = getattr(repository, "_transport", None)
754
if transport is not None:
756
transport.mkdir('git')
757
except bzrlib.errors.FileExists:
759
return cls(transport.clone('git'))
760
from bzrlib.transport import get_transport
761
return cls(get_transport(get_cache_dir()))
764
if self._transport is not None:
765
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
767
return "%s()" % (self.__class__.__name__)
770
assert self._builder is None
771
self.start_write_group()
772
for _, key, value in self._index.iter_all_entries():
773
self._builder.add_node(key, value)
775
for name in self._transport.list_dir('.'):
776
if name.endswith('.rix'):
777
to_remove.append(name)
778
self.commit_write_group()
779
del self._index.indices[1:]
780
for name in to_remove:
781
self._transport.rename(name, name + '.old')
783
def start_write_group(self):
784
assert self._builder is None
785
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
786
self._name = osutils.sha()
788
def commit_write_group(self):
789
assert self._builder is not None
790
stream = self._builder.finish()
791
name = self._name.hexdigest() + ".rix"
792
size = self._transport.put_file(name, stream)
793
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
794
self._index.insert_index(0, index)
798
def abort_write_group(self):
799
assert self._builder is not None
803
def _add_node(self, key, value):
805
self._builder.add_node(key, value)
806
except bzrlib.errors.BadIndexDuplicateKey:
807
# Multiple bzr objects can have the same contents
812
def _get_entry(self, key):
813
entries = self._index.iter_entries([key])
815
return entries.next()[2]
816
except StopIteration:
817
if self._builder is None:
819
entries = self._builder.iter_entries([key])
821
return entries.next()[2]
822
except StopIteration:
825
def _iter_keys_prefix(self, prefix):
826
for entry in self._index.iter_entries_prefix([prefix]):
828
if self._builder is not None:
829
for entry in self._builder.iter_entries_prefix([prefix]):
832
def lookup_commit(self, revid):
833
return self._get_entry(("commit", revid, "X"))[:40]
835
def _add_git_sha(self, hexsha, type, type_data):
836
if hexsha is not None:
837
self._name.update(hexsha)
839
td = (type_data[0], type_data[1], type_data[2]["testament3-sha1"])
842
self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
844
# This object is not represented in Git - perhaps an empty
846
self._name.update(type + " ".join(type_data))
848
def lookup_blob_id(self, fileid, revision):
849
return self._get_entry(("blob", fileid, revision))
851
def lookup_git_sha(self, sha):
853
sha = sha_to_hex(sha)
854
data = self._get_entry(("git", sha, "X")).split(" ", 3)
855
if data[0] == "commit":
856
return ("commit", (data[1], data[2], {"testament3-sha1": data[3]}))
858
return (data[0], tuple(data[1:]))
861
"""List the revision ids known."""
862
for key in self._iter_keys_prefix(("commit", None, None)):
865
def missing_revisions(self, revids):
866
"""Return set of all the revisions that are not present."""
867
missing_revids = set(revids)
868
for _, key, value in self._index.iter_entries((
869
("commit", revid, "X") for revid in revids)):
870
missing_revids.remove(key[1])
871
return missing_revids
874
"""List the SHA1s."""
875
for key in self._iter_keys_prefix(("git", None, None)):
879
519
formats = registry.Registry()
880
520
formats.register(TdbGitCacheFormat().get_format_string(),
881
521
TdbGitCacheFormat())
882
522
formats.register(SqliteGitCacheFormat().get_format_string(),
883
523
SqliteGitCacheFormat())
884
formats.register(IndexGitCacheFormat().get_format_string(),
885
IndexGitCacheFormat())
886
# In the future, this will become the default:
887
# formats.register('default', IndexGitCacheFormat())
890
526
except ImportError: