145
131
"""Abort any pending changes."""
148
class ContentCache(object):
149
"""Object that can cache Git objects."""
151
def add(self, object):
153
raise NotImplementedError(self.add)
155
def add_multi(self, objects):
156
"""Add multiple objects."""
160
def __getitem__(self, sha):
161
"""Retrieve an item, by SHA."""
162
raise NotImplementedError(self.__getitem__)
165
class BzrGitCacheFormat(object):
166
"""Bazaar-Git Cache Format."""
168
def get_format_string(self):
169
"""Return a single-line unique format string for this cache format."""
170
raise NotImplementedError(self.get_format_string)
172
def open(self, transport):
173
"""Open this format on a transport."""
174
raise NotImplementedError(self.open)
176
def initialize(self, transport):
177
"""Create a new instance of this cache format at transport."""
178
transport.put_bytes('format', self.get_format_string())
181
def from_transport(self, transport):
182
"""Open a cache file present on a transport, or initialize one.
184
:param transport: Transport to use
185
:return: A BzrGitCache instance
188
format_name = transport.get_bytes('format')
189
format = formats.get(format_name)
190
except bzrlib.errors.NoSuchFile:
191
format = formats.get('default')
192
format.initialize(transport)
193
return format.open(transport)
196
def from_repository(cls, repository):
197
"""Open a cache file for a repository.
199
This will use the repository's transport to store the cache file, or
200
use the users global cache directory if the repository has no
201
transport associated with it.
203
:param repository: Repository to open the cache for
204
:return: A `BzrGitCache`
206
repo_transport = getattr(repository, "_transport", None)
207
if repo_transport is not None:
208
# Even if we don't write to this repo, we should be able
209
# to update its cache.
210
repo_transport = remove_readonly_transport_decorator(repo_transport)
212
repo_transport.mkdir('git')
213
except bzrlib.errors.FileExists:
215
transport = repo_transport.clone('git')
217
transport = get_remote_cache_transport()
218
return cls.from_transport(transport)
221
class CacheUpdater(object):
222
"""Base class for objects that can update a bzr-git cache."""
224
def add_object(self, obj, ie, path):
227
:param obj: Object type ("commit", "blob" or "tree")
228
:param ie: Inventory entry (for blob/tree) or testament_sha in case
230
:param path: Path of the object (optional)
232
raise NotImplementedError(self.add_object)
235
raise NotImplementedError(self.finish)
238
class BzrGitCache(object):
239
"""Caching backend."""
241
def __init__(self, idmap, content_cache, cache_updater_klass):
243
self.content_cache = content_cache
244
self._cache_updater_klass = cache_updater_klass
246
def get_updater(self, rev):
247
"""Update an object that implements the CacheUpdater interface for
250
return self._cache_updater_klass(self, rev)
253
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
256
class DictCacheUpdater(CacheUpdater):
257
"""Cache updater for dict-based caches."""
259
def __init__(self, cache, rev):
261
self.revid = rev.revision_id
262
self.parent_revids = rev.parent_ids
266
def add_object(self, obj, ie, path):
267
if obj.type_name == "commit":
269
assert type(ie) is dict
270
type_data = (self.revid, self._commit.tree, ie)
271
self.cache.idmap._by_revid[self.revid] = obj.id
272
elif obj.type_name in ("blob", "tree"):
274
if obj.type_name == "blob":
275
revision = ie.revision
277
revision = self.revid
278
type_data = (ie.file_id, revision)
279
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
282
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
285
if self._commit is None:
286
raise AssertionError("No commit object added")
290
134
class DictGitShaMap(GitShaMap):
291
"""Git SHA map that uses a dictionary."""
293
136
def __init__(self):
298
def lookup_blob_id(self, fileid, revision):
299
return self._by_fileid[revision][fileid]
139
def add_entry(self, sha, type, type_data):
140
self.dict[sha] = (type, type_data)
301
142
def lookup_git_sha(self, sha):
302
return self._by_sha[sha]
304
def lookup_tree_id(self, fileid, revision):
305
return self._by_fileid[revision][fileid]
307
def lookup_commit(self, revid):
308
return self._by_revid[revid]
143
return self.dict[sha]
145
def lookup_tree(self, fileid, revid):
146
for k, v in self.dict.iteritems():
147
if v == ("tree", (fileid, revid)):
149
raise KeyError((fileid, revid))
151
def lookup_blob(self, fileid, revid):
152
for k, v in self.dict.iteritems():
153
if v == ("blob", (fileid, revid)):
155
raise KeyError((fileid, revid))
310
157
def revids(self):
311
for key, (type, type_data) in self._by_sha.iteritems():
158
for key, (type, type_data) in self.dict.iteritems():
312
159
if type == "commit":
313
160
yield type_data[0]
316
return self._by_sha.iterkeys()
319
class SqliteCacheUpdater(CacheUpdater):
321
def __init__(self, cache, rev):
323
self.db = self.cache.idmap.db
324
self.revid = rev.revision_id
329
def add_object(self, obj, ie, path):
330
if obj.type_name == "commit":
332
self._testament3_sha1 = ie["testament3-sha1"]
333
assert type(ie) is dict
334
elif obj.type_name == "tree":
336
self._trees.append((obj.id, ie.file_id, self.revid))
337
elif obj.type_name == "blob":
339
self._blobs.append((obj.id, ie.file_id, ie.revision))
344
if self._commit is None:
345
raise AssertionError("No commit object added")
347
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
350
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
353
"replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
354
(self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
358
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
361
class SqliteGitCacheFormat(BzrGitCacheFormat):
363
def get_format_string(self):
364
return 'bzr-git sha map version 1 using sqlite\n'
366
def open(self, transport):
368
basepath = transport.local_abspath(".")
369
except bzrlib.errors.NotLocalUrl:
370
basepath = get_cache_dir()
371
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
163
return self.dict.iterkeys()
374
166
class SqliteGitShaMap(GitShaMap):
375
"""Bazaar GIT Sha map that uses a sqlite database for storage."""
377
168
def __init__(self, path=None):
426
217
def commit_write_group(self):
429
def lookup_blob_id(self, fileid, revision):
430
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
433
raise KeyError(fileid)
435
def lookup_tree_id(self, fileid, revision):
436
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
439
raise KeyError(fileid)
220
def add_entries(self, entries):
223
for sha, type, type_data in entries:
224
assert isinstance(type_data[0], str)
225
assert isinstance(type_data[1], str)
226
entry = (sha, type_data[0], type_data[1])
234
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
236
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
239
def add_entry(self, sha, type, type_data):
240
"""Add a new entry to the database.
242
assert isinstance(type_data, tuple)
245
assert isinstance(sha, str), "type was %r" % sha
247
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
248
elif type in ("blob", "tree"):
249
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
251
raise AssertionError("Unknown type %s" % type)
253
def lookup_tree(self, fileid, revid):
254
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid,revid)).fetchone()
256
raise KeyError((fileid, revid))
259
def lookup_blob(self, fileid, revid):
260
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revid)).fetchone()
262
raise KeyError((fileid, revid))
441
265
def lookup_git_sha(self, sha):
442
266
"""Lookup a Git sha in the database.
444
268
:param sha: Git object sha
445
269
:return: (type, type_data) with type_data:
446
commit: revid, tree sha, verifiers
270
revision: revid, tree sha
450
row = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,)).fetchone()
272
def format(type, row):
273
return (type, (row[0], row[1]))
274
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
451
275
if row is not None:
452
return ("commit", (row[0], row[1], {"testament3-sha1": row[2]}))
276
return format("commit", row)
453
277
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
454
278
if row is not None:
279
return format("blob", row)
456
280
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
457
281
if row is not None:
282
return format("tree", row)
459
283
raise KeyError(sha)
461
285
def revids(self):
462
286
"""List the revision ids known."""
463
return (row for (row,) in self.db.execute("select revid from commits"))
287
for row in self.db.execute("select revid from commits").fetchall():
466
291
"""List the SHA1s."""
467
292
for table in ("blobs", "commits", "trees"):
468
for (sha,) in self.db.execute("select sha1 from %s" % table):
472
class TdbCacheUpdater(CacheUpdater):
473
"""Cache updater for tdb-based caches."""
475
def __init__(self, cache, rev):
477
self.db = cache.idmap.db
478
self.revid = rev.revision_id
479
self.parent_revids = rev.parent_ids
483
def add_object(self, obj, ie, path):
484
sha = obj.sha().digest()
485
if obj.type_name == "commit":
486
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
487
assert type(ie) is dict, "was %r" % ie
488
type_data = (self.revid, obj.tree, ie["testament3-sha1"])
490
elif obj.type_name == "blob":
493
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
494
type_data = (ie.file_id, ie.revision)
495
elif obj.type_name == "tree":
498
type_data = (ie.file_id, self.revid)
501
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
504
if self._commit is None:
505
raise AssertionError("No commit object added")
509
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
511
class TdbGitCacheFormat(BzrGitCacheFormat):
512
"""Cache format for tdb-based caches."""
514
def get_format_string(self):
515
return 'bzr-git sha map version 3 using tdb\n'
517
def open(self, transport):
519
basepath = transport.local_abspath(".")
520
except bzrlib.errors.NotLocalUrl:
521
basepath = get_cache_dir()
523
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
526
"Unable to open existing bzr-git cache because 'tdb' is not "
293
for row in self.db.execute("select sha1 from %s" % table).fetchall():
298
TDB_HASH_SIZE = 50000
530
301
class TdbGitShaMap(GitShaMap):
550
318
if not mapdbs().has_key(path):
551
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
319
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
552
320
os.O_RDWR|os.O_CREAT)
553
321
self.db = mapdbs()[path]
555
if int(self.db["version"]) not in (2, 3):
323
if int(self.db["version"]) != TDB_MAP_VERSION:
556
324
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
557
self.db["version"], self.TDB_MAP_VERSION)
325
self.db["version"], TDB_MAP_VERSION)
327
self.db["version"] = str(TDB_MAP_VERSION)
329
self.db["version"] = str(TDB_MAP_VERSION)
332
def from_repository(cls, repository):
334
transport = getattr(repository, "_transport", None)
335
if transport is not None:
336
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
337
except bzrlib.errors.NotLocalUrl:
561
self.db["version"] = str(self.TDB_MAP_VERSION)
563
def start_write_group(self):
564
"""Start writing changes."""
565
self.db.transaction_start()
567
def commit_write_group(self):
568
"""Commit any pending changes."""
569
self.db.transaction_commit()
571
def abort_write_group(self):
572
"""Abort any pending changes."""
573
self.db.transaction_cancel()
576
return "%s(%r)" % (self.__class__.__name__, self.path)
339
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
578
341
def lookup_commit(self, revid):
579
342
return sha_to_hex(self.db["commit\0" + revid][:20])
581
def lookup_blob_id(self, fileid, revision):
582
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
344
def add_entry(self, hexsha, type, type_data):
345
"""Add a new entry to the database.
350
sha = hex_to_sha(hexsha)
351
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
353
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
355
self.db["\0".join((type, type_data[0], type_data[1]))] = sha
357
def lookup_tree(self, fileid, revid):
358
sha = self.db["\0".join(("tree", fileid, revid))]
362
return sha_to_hex(sha)
364
def lookup_blob(self, fileid, revid):
365
return sha_to_hex(self.db["\0".join(("blob", fileid, revid))])
584
367
def lookup_git_sha(self, sha):
585
368
"""Lookup a Git sha in the database.
587
370
:param sha: Git object sha
588
371
:return: (type, type_data) with type_data:
589
commit: revid, tree sha
372
revision: revid, tree sha
593
374
if len(sha) == 40:
594
375
sha = hex_to_sha(sha)
595
376
data = self.db["git\0" + sha].split("\0")
596
if data[0] == "commit":
598
return (data[0], (data[1], data[2], {}))
600
return (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
602
return (data[0], tuple(data[1:]))
377
return (data[0], (data[1], data[2]))
604
379
def missing_revisions(self, revids):
619
394
for key in self.db.iterkeys():
620
395
if key.startswith("git\0"):
621
396
yield sha_to_hex(key[4:])
624
class VersionedFilesContentCache(ContentCache):
626
def __init__(self, vf):
630
self._vf.insert_record_stream(
631
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
632
obj.as_legacy_object_chunks())])
634
def __getitem__(self, sha):
635
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
636
entry = stream.next()
637
if entry.storage_kind == 'absent':
639
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
642
class GitObjectStoreContentCache(ContentCache):
644
def __init__(self, store):
647
def add_multi(self, objs):
648
self.store.add_objects(objs)
650
def add(self, obj, path):
651
self.store.add_object(obj)
653
def __getitem__(self, sha):
654
return self.store[sha]
657
class IndexCacheUpdater(CacheUpdater):
659
def __init__(self, cache, rev):
661
self.revid = rev.revision_id
662
self.parent_revids = rev.parent_ids
665
self._cache_objs = set()
667
def add_object(self, obj, ie, path):
668
if obj.type_name == "commit":
670
assert type(ie) is dict
671
self.cache.idmap._add_git_sha(obj.id, "commit",
672
(self.revid, obj.tree, ie))
673
self.cache.idmap._add_node(("commit", self.revid, "X"),
674
" ".join((obj.id, obj.tree)))
675
self._cache_objs.add((obj, path))
676
elif obj.type_name == "blob":
677
self.cache.idmap._add_git_sha(obj.id, "blob",
678
(ie.file_id, ie.revision))
679
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
680
if ie.kind == "symlink":
681
self._cache_objs.add((obj, path))
682
elif obj.type_name == "tree":
683
self.cache.idmap._add_git_sha(obj.id, "tree",
684
(ie.file_id, self.revid))
685
self._cache_objs.add((obj, path))
690
self.cache.content_cache.add_multi(self._cache_objs)
694
class IndexBzrGitCache(BzrGitCache):
696
def __init__(self, transport=None):
697
mapper = versionedfile.ConstantMapper("trees")
698
shamap = IndexGitShaMap(transport.clone('index'))
699
#trees_store = knit.make_file_factory(True, mapper)(transport)
700
#content_cache = VersionedFilesContentCache(trees_store)
701
from bzrlib.plugins.git.transportgit import TransportObjectStore
702
store = TransportObjectStore(transport.clone('objects'))
703
content_cache = GitObjectStoreContentCache(store)
704
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
708
class IndexGitCacheFormat(BzrGitCacheFormat):
710
def get_format_string(self):
711
return 'bzr-git sha map with git object cache version 1\n'
713
def initialize(self, transport):
714
super(IndexGitCacheFormat, self).initialize(transport)
715
transport.mkdir('index')
716
transport.mkdir('objects')
717
from bzrlib.plugins.git.transportgit import TransportObjectStore
718
TransportObjectStore.init(transport.clone('objects'))
720
def open(self, transport):
721
return IndexBzrGitCache(transport)
724
class IndexGitShaMap(GitShaMap):
725
"""SHA Map that uses the Bazaar APIs to store a cache.
727
BTree Index file with the following contents:
729
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
730
("commit", <revid>) -> "<sha1> <tree-id>"
731
("blob", <fileid>, <revid>) -> <sha1>
735
def __init__(self, transport=None):
736
if transport is None:
737
self._transport = None
738
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
739
self._builder = self._index
742
self._transport = transport
743
self._index = _mod_index.CombinedGraphIndex([])
744
for name in self._transport.list_dir("."):
745
if not name.endswith(".rix"):
747
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
748
self._transport.stat(name).st_size)
749
self._index.insert_index(0, x)
752
def from_repository(cls, repository):
753
transport = getattr(repository, "_transport", None)
754
if transport is not None:
756
transport.mkdir('git')
757
except bzrlib.errors.FileExists:
759
return cls(transport.clone('git'))
760
from bzrlib.transport import get_transport
761
return cls(get_transport(get_cache_dir()))
764
if self._transport is not None:
765
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
767
return "%s()" % (self.__class__.__name__)
770
assert self._builder is None
771
self.start_write_group()
772
for _, key, value in self._index.iter_all_entries():
773
self._builder.add_node(key, value)
775
for name in self._transport.list_dir('.'):
776
if name.endswith('.rix'):
777
to_remove.append(name)
778
self.commit_write_group()
779
del self._index.indices[1:]
780
for name in to_remove:
781
self._transport.rename(name, name + '.old')
783
def start_write_group(self):
784
assert self._builder is None
785
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
786
self._name = osutils.sha()
788
def commit_write_group(self):
789
assert self._builder is not None
790
stream = self._builder.finish()
791
name = self._name.hexdigest() + ".rix"
792
size = self._transport.put_file(name, stream)
793
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
794
self._index.insert_index(0, index)
798
def abort_write_group(self):
799
assert self._builder is not None
803
def _add_node(self, key, value):
805
self._builder.add_node(key, value)
806
except bzrlib.errors.BadIndexDuplicateKey:
807
# Multiple bzr objects can have the same contents
812
def _get_entry(self, key):
813
entries = self._index.iter_entries([key])
815
return entries.next()[2]
816
except StopIteration:
817
if self._builder is None:
819
entries = self._builder.iter_entries([key])
821
return entries.next()[2]
822
except StopIteration:
825
def _iter_keys_prefix(self, prefix):
826
for entry in self._index.iter_entries_prefix([prefix]):
828
if self._builder is not None:
829
for entry in self._builder.iter_entries_prefix([prefix]):
832
def lookup_commit(self, revid):
833
return self._get_entry(("commit", revid, "X"))[:40]
835
def _add_git_sha(self, hexsha, type, type_data):
836
if hexsha is not None:
837
self._name.update(hexsha)
839
td = (type_data[0], type_data[1], type_data[2]["testament3-sha1"])
842
self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
844
# This object is not represented in Git - perhaps an empty
846
self._name.update(type + " ".join(type_data))
848
def lookup_blob_id(self, fileid, revision):
849
return self._get_entry(("blob", fileid, revision))
851
def lookup_git_sha(self, sha):
853
sha = sha_to_hex(sha)
854
data = self._get_entry(("git", sha, "X")).split(" ", 3)
855
if data[0] == "commit":
856
return ("commit", (data[1], data[2], {"testament3-sha1": data[3]}))
858
return (data[0], tuple(data[1:]))
861
"""List the revision ids known."""
862
for key in self._iter_keys_prefix(("commit", None, None)):
865
def missing_revisions(self, revids):
866
"""Return set of all the revisions that are not present."""
867
missing_revids = set(revids)
868
for _, key, value in self._index.iter_entries((
869
("commit", revid, "X") for revid in revids)):
870
missing_revids.remove(key[1])
871
return missing_revids
874
"""List the SHA1s."""
875
for key in self._iter_keys_prefix(("git", None, None)):
879
formats = registry.Registry()
880
formats.register(TdbGitCacheFormat().get_format_string(),
882
formats.register(SqliteGitCacheFormat().get_format_string(),
883
SqliteGitCacheFormat())
884
formats.register(IndexGitCacheFormat().get_format_string(),
885
IndexGitCacheFormat())
886
# In the future, this will become the default:
887
# formats.register('default', IndexGitCacheFormat())
891
formats.register('default', SqliteGitCacheFormat())
893
formats.register('default', TdbGitCacheFormat())
897
def migrate_ancient_formats(repo_transport):
898
# Prefer migrating git.db over git.tdb, since the latter may not
899
# be openable on some platforms.
900
if repo_transport.has("git.db"):
901
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
902
repo_transport.rename("git.db", "git/idmap.db")
903
elif repo_transport.has("git.tdb"):
904
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
905
repo_transport.rename("git.tdb", "git/idmap.tdb")
908
def remove_readonly_transport_decorator(transport):
909
if transport.is_readonly():
910
return transport._decorated
914
def from_repository(repository):
915
"""Open a cache file for a repository.
917
If the repository is remote and there is no transport available from it
918
this will use a local file in the users cache directory
919
(typically ~/.cache/bazaar/git/)
921
:param repository: A repository object
923
repo_transport = getattr(repository, "_transport", None)
924
if repo_transport is not None:
925
# Migrate older cache formats
926
repo_transport = remove_readonly_transport_decorator(repo_transport)
928
repo_transport.mkdir("git")
929
except bzrlib.errors.FileExists:
932
migrate_ancient_formats(repo_transport)
933
return BzrGitCacheFormat.from_repository(repository)