131
140
"""Abort any pending changes."""
143
class ContentCache(object):
144
"""Object that can cache Git objects."""
146
def add(self, object):
148
raise NotImplementedError(self.add)
150
def add_multi(self, objects):
151
"""Add multiple objects."""
155
def __getitem__(self, sha):
156
"""Retrieve an item, by SHA."""
157
raise NotImplementedError(self.__getitem__)
160
class BzrGitCacheFormat(object):
161
"""Bazaar-Git Cache Format."""
163
def get_format_string(self):
164
"""Return a single-line unique format string for this cache format."""
165
raise NotImplementedError(self.get_format_string)
167
def open(self, transport):
168
"""Open this format on a transport."""
169
raise NotImplementedError(self.open)
171
def initialize(self, transport):
172
"""Create a new instance of this cache format at transport."""
173
transport.put_bytes('format', self.get_format_string())
176
def from_transport(self, transport):
177
"""Open a cache file present on a transport, or initialize one.
179
:param transport: Transport to use
180
:return: A BzrGitCache instance
183
format_name = transport.get_bytes('format')
184
format = formats.get(format_name)
185
except bzrlib.errors.NoSuchFile:
186
format = formats.get('default')
187
format.initialize(transport)
188
return format.open(transport)
191
def from_repository(cls, repository):
192
"""Open a cache file for a repository.
194
This will use the repository's transport to store the cache file, or
195
use the users global cache directory if the repository has no
196
transport associated with it.
198
:param repository: Repository to open the cache for
199
:return: A `BzrGitCache`
201
repo_transport = getattr(repository, "_transport", None)
202
if repo_transport is not None:
203
# Even if we don't write to this repo, we should be able
204
# to update its cache.
205
repo_transport = remove_readonly_transport_decorator(repo_transport)
207
repo_transport.mkdir('git')
208
except bzrlib.errors.FileExists:
210
transport = repo_transport.clone('git')
212
transport = get_remote_cache_transport()
213
return cls.from_transport(transport)
216
class CacheUpdater(object):
217
"""Base class for objects that can update a bzr-git cache."""
219
def add_object(self, obj, ie, path):
220
raise NotImplementedError(self.add_object)
223
raise NotImplementedError(self.finish)
226
class BzrGitCache(object):
227
"""Caching backend."""
229
def __init__(self, idmap, content_cache, cache_updater_klass):
231
self.content_cache = content_cache
232
self._cache_updater_klass = cache_updater_klass
234
def get_updater(self, rev):
235
"""Update an object that implements the CacheUpdater interface for
238
return self._cache_updater_klass(self, rev)
241
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
244
class DictCacheUpdater(CacheUpdater):
245
"""Cache updater for dict-based caches."""
247
def __init__(self, cache, rev):
249
self.revid = rev.revision_id
250
self.parent_revids = rev.parent_ids
254
def add_object(self, obj, ie, path):
255
if obj.type_name == "commit":
258
type_data = (self.revid, self._commit.tree)
259
self.cache.idmap._by_revid[self.revid] = obj.id
260
elif obj.type_name in ("blob", "tree"):
262
if obj.type_name == "blob":
263
revision = ie.revision
265
revision = self.revid
266
type_data = (ie.file_id, revision)
267
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] =\
271
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
274
if self._commit is None:
275
raise AssertionError("No commit object added")
134
279
class DictGitShaMap(GitShaMap):
280
"""Git SHA map that uses a dictionary."""
136
282
def __init__(self):
139
def add_entry(self, sha, type, type_data):
140
self.dict[sha] = (type, type_data)
287
def lookup_blob_id(self, fileid, revision):
288
return self._by_fileid[revision][fileid]
142
290
def lookup_git_sha(self, sha):
143
return self.dict[sha]
145
def lookup_tree(self, fileid, revid):
146
for k, v in self.dict.iteritems():
147
if v == ("tree", (fileid, revid)):
149
raise KeyError((fileid, revid))
151
def lookup_blob(self, fileid, revid):
152
for k, v in self.dict.iteritems():
153
if v == ("blob", (fileid, revid)):
155
raise KeyError((fileid, revid))
291
return self._by_sha[sha]
293
def lookup_tree_id(self, fileid, revision):
294
return self._by_fileid[revision][fileid]
296
def lookup_commit(self, revid):
297
return self._by_revid[revid]
157
299
def revids(self):
158
for key, (type, type_data) in self.dict.iteritems():
300
for key, (type, type_data) in self._by_sha.iteritems():
159
301
if type == "commit":
160
302
yield type_data[0]
163
return self.dict.iterkeys()
305
return self._by_sha.iterkeys()
308
class SqliteCacheUpdater(CacheUpdater):
310
def __init__(self, cache, rev):
312
self.db = self.cache.idmap.db
313
self.revid = rev.revision_id
318
def add_object(self, obj, ie, path):
319
if obj.type_name == "commit":
322
elif obj.type_name == "tree":
324
self._trees.append((obj.id, ie.file_id, self.revid))
325
elif obj.type_name == "blob":
327
self._blobs.append((obj.id, ie.file_id, ie.revision))
332
if self._commit is None:
333
raise AssertionError("No commit object added")
335
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
338
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
341
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
342
(self._commit.id, self.revid, self._commit.tree))
346
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
349
class SqliteGitCacheFormat(BzrGitCacheFormat):
351
def get_format_string(self):
352
return 'bzr-git sha map version 1 using sqlite\n'
354
def open(self, transport):
356
basepath = transport.local_abspath(".")
357
except bzrlib.errors.NotLocalUrl:
358
basepath = get_cache_dir()
359
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
166
362
class SqliteGitShaMap(GitShaMap):
363
"""Bazaar GIT Sha map that uses a sqlite database for storage."""
168
365
def __init__(self, path=None):
217
409
def commit_write_group(self):
220
def add_entries(self, entries):
223
for sha, type, type_data in entries:
224
assert isinstance(type_data[0], str)
225
assert isinstance(type_data[1], str)
226
entry = (sha, type_data[0], type_data[1])
234
self.db.executemany("replace into trees (sha1, fileid, revid) values (?, ?, ?)", trees)
236
self.db.executemany("replace into blobs (sha1, fileid, revid) values (?, ?, ?)", blobs)
239
def add_entry(self, sha, type, type_data):
240
"""Add a new entry to the database.
242
assert isinstance(type_data, tuple)
243
assert isinstance(sha, str), "type was %r" % sha
245
self.db.execute("replace into commits (sha1, revid, tree_sha) values (?, ?, ?)", (sha, type_data[0], type_data[1]))
246
elif type in ("blob", "tree"):
247
self.db.execute("replace into %ss (sha1, fileid, revid) values (?, ?, ?)" % type, (sha, type_data[0], type_data[1]))
249
raise AssertionError("Unknown type %s" % type)
251
def lookup_tree(self, fileid, revid):
252
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid,revid)).fetchone()
254
raise KeyError((fileid, revid))
257
def lookup_blob(self, fileid, revid):
258
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revid)).fetchone()
260
raise KeyError((fileid, revid))
412
def lookup_blob_id(self, fileid, revision):
413
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
416
raise KeyError(fileid)
418
def lookup_tree_id(self, fileid, revision):
419
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
422
raise KeyError(fileid)
263
424
def lookup_git_sha(self, sha):
264
425
"""Lookup a Git sha in the database.
267
428
:return: (type, type_data) with type_data:
268
429
revision: revid, tree sha
270
def format(type, row):
271
return (type, (row[0], row[1]))
272
431
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
273
432
if row is not None:
274
return format("commit", row)
433
return ("commit", row)
275
434
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
276
435
if row is not None:
277
return format("blob", row)
278
437
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
279
438
if row is not None:
280
return format("tree", row)
281
440
raise KeyError(sha)
283
442
def revids(self):
284
443
"""List the revision ids known."""
285
for row in self.db.execute("select revid from commits").fetchall():
444
return (row for (row,) in self.db.execute("select revid from commits"))
289
447
"""List the SHA1s."""
290
448
for table in ("blobs", "commits", "trees"):
291
for row in self.db.execute("select sha1 from %s" % table).fetchall():
296
TDB_HASH_SIZE = 50000
449
for (sha,) in self.db.execute("select sha1 from %s" % table):
453
class TdbCacheUpdater(CacheUpdater):
454
"""Cache updater for tdb-based caches."""
456
def __init__(self, cache, rev):
458
self.db = cache.idmap.db
459
self.revid = rev.revision_id
460
self.parent_revids = rev.parent_ids
464
def add_object(self, obj, ie, path):
465
sha = obj.sha().digest()
466
if obj.type_name == "commit":
467
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
468
type_data = (self.revid, obj.tree)
471
elif obj.type_name == "blob":
474
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
475
type_data = (ie.file_id, ie.revision)
476
elif obj.type_name == "tree":
479
type_data = (ie.file_id, self.revid)
482
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
485
if self._commit is None:
486
raise AssertionError("No commit object added")
490
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
492
class TdbGitCacheFormat(BzrGitCacheFormat):
493
"""Cache format for tdb-based caches."""
495
def get_format_string(self):
496
return 'bzr-git sha map version 3 using tdb\n'
498
def open(self, transport):
500
basepath = transport.local_abspath(".")
501
except bzrlib.errors.NotLocalUrl:
502
basepath = get_cache_dir()
504
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
507
"Unable to open existing bzr-git cache because 'tdb' is not "
299
511
class TdbGitShaMap(GitShaMap):
316
531
if not mapdbs().has_key(path):
317
mapdbs()[path] = tdb.Tdb(path, TDB_HASH_SIZE, tdb.DEFAULT,
532
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
318
533
os.O_RDWR|os.O_CREAT)
319
534
self.db = mapdbs()[path]
321
if int(self.db["version"]) != TDB_MAP_VERSION:
536
if int(self.db["version"]) not in (2, 3):
322
537
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
323
self.db["version"], TDB_MAP_VERSION)
538
self.db["version"], self.TDB_MAP_VERSION)
325
self.db["version"] = str(TDB_MAP_VERSION)
327
self.db["version"] = str(TDB_MAP_VERSION)
330
def from_repository(cls, repository):
332
transport = getattr(repository, "_transport", None)
333
if transport is not None:
334
return cls(os.path.join(transport.local_abspath("."), "git.tdb"))
335
except bzrlib.errors.NotLocalUrl:
337
return cls(os.path.join(get_cache_dir(), "remote.tdb"))
542
self.db["version"] = str(self.TDB_MAP_VERSION)
544
def start_write_group(self):
545
"""Start writing changes."""
546
self.db.transaction_start()
548
def commit_write_group(self):
549
"""Commit any pending changes."""
550
self.db.transaction_commit()
552
def abort_write_group(self):
553
"""Abort any pending changes."""
554
self.db.transaction_cancel()
557
return "%s(%r)" % (self.__class__.__name__, self.path)
339
559
def lookup_commit(self, revid):
340
560
return sha_to_hex(self.db["commit\0" + revid][:20])
342
def add_entry(self, hexsha, type, type_data):
343
"""Add a new entry to the database.
348
sha = hex_to_sha(hexsha)
349
self.db["git\0" + sha] = "\0".join((type, type_data[0], type_data[1]))
351
self.db["commit\0" + type_data[0]] = "\0".join((sha, type_data[1]))
353
self.db["\0".join((type, type_data[0], type_data[1]))] = sha
355
def lookup_tree(self, fileid, revid):
356
sha = self.db["\0".join(("tree", fileid, revid))]
360
return sha_to_hex(sha)
362
def lookup_blob(self, fileid, revid):
363
return sha_to_hex(self.db["\0".join(("blob", fileid, revid))])
562
def lookup_blob_id(self, fileid, revision):
563
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
365
565
def lookup_git_sha(self, sha):
366
566
"""Lookup a Git sha in the database.
392
592
for key in self.db.iterkeys():
393
593
if key.startswith("git\0"):
394
594
yield sha_to_hex(key[4:])
597
class VersionedFilesContentCache(ContentCache):
599
def __init__(self, vf):
603
self._vf.insert_record_stream(
604
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
605
obj.as_legacy_object_chunks())])
607
def __getitem__(self, sha):
608
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
609
entry = stream.next()
610
if entry.storage_kind == 'absent':
612
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
615
class GitObjectStoreContentCache(ContentCache):
617
def __init__(self, store):
620
def add_multi(self, objs):
621
self.store.add_objects(objs)
623
def add(self, obj, path):
624
self.store.add_object(obj)
626
def __getitem__(self, sha):
627
return self.store[sha]
630
class IndexCacheUpdater(CacheUpdater):
632
def __init__(self, cache, rev):
634
self.revid = rev.revision_id
635
self.parent_revids = rev.parent_ids
638
self._cache_objs = set()
640
def add_object(self, obj, ie, path):
641
if obj.type_name == "commit":
644
self.cache.idmap._add_git_sha(obj.id, "commit",
645
(self.revid, obj.tree))
646
self.cache.idmap._add_node(("commit", self.revid, "X"),
647
" ".join((obj.id, obj.tree)))
648
self._cache_objs.add((obj, path))
649
elif obj.type_name == "blob":
650
self.cache.idmap._add_git_sha(obj.id, "blob",
651
(ie.file_id, ie.revision))
652
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
653
if ie.kind == "symlink":
654
self._cache_objs.add((obj, path))
655
elif obj.type_name == "tree":
656
self.cache.idmap._add_git_sha(obj.id, "tree",
657
(ie.file_id, self.revid))
658
self._cache_objs.add((obj, path))
663
self.cache.content_cache.add_multi(self._cache_objs)
667
class IndexBzrGitCache(BzrGitCache):
669
def __init__(self, transport=None):
670
mapper = versionedfile.ConstantMapper("trees")
671
shamap = IndexGitShaMap(transport.clone('index'))
672
#trees_store = knit.make_file_factory(True, mapper)(transport)
673
#content_cache = VersionedFilesContentCache(trees_store)
674
from bzrlib.plugins.git.transportgit import TransportObjectStore
675
store = TransportObjectStore(transport.clone('objects'))
676
content_cache = GitObjectStoreContentCache(store)
677
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
681
class IndexGitCacheFormat(BzrGitCacheFormat):
683
def get_format_string(self):
684
return 'bzr-git sha map with git object cache version 1\n'
686
def initialize(self, transport):
687
super(IndexGitCacheFormat, self).initialize(transport)
688
transport.mkdir('index')
689
transport.mkdir('objects')
690
from bzrlib.plugins.git.transportgit import TransportObjectStore
691
TransportObjectStore.init(transport.clone('objects'))
693
def open(self, transport):
694
return IndexBzrGitCache(transport)
697
class IndexGitShaMap(GitShaMap):
698
"""SHA Map that uses the Bazaar APIs to store a cache.
700
BTree Index file with the following contents:
702
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
703
("commit", <revid>) -> "<sha1> <tree-id>"
704
("blob", <fileid>, <revid>) -> <sha1>
708
def __init__(self, transport=None):
709
if transport is None:
710
self._transport = None
711
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
712
self._builder = self._index
715
self._transport = transport
716
self._index = _mod_index.CombinedGraphIndex([])
717
for name in self._transport.list_dir("."):
718
if not name.endswith(".rix"):
720
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
721
self._transport.stat(name).st_size)
722
self._index.insert_index(0, x)
725
def from_repository(cls, repository):
726
transport = getattr(repository, "_transport", None)
727
if transport is not None:
729
transport.mkdir('git')
730
except bzrlib.errors.FileExists:
732
return cls(transport.clone('git'))
733
from bzrlib.transport import get_transport
734
return cls(get_transport(get_cache_dir()))
737
if self._transport is not None:
738
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
740
return "%s()" % (self.__class__.__name__)
743
assert self._builder is None
744
self.start_write_group()
745
for _, key, value in self._index.iter_all_entries():
746
self._builder.add_node(key, value)
748
for name in self._transport.list_dir('.'):
749
if name.endswith('.rix'):
750
to_remove.append(name)
751
self.commit_write_group()
752
del self._index.indices[1:]
753
for name in to_remove:
754
self._transport.rename(name, name + '.old')
756
def start_write_group(self):
757
assert self._builder is None
758
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
759
self._name = osutils.sha()
761
def commit_write_group(self):
762
assert self._builder is not None
763
stream = self._builder.finish()
764
name = self._name.hexdigest() + ".rix"
765
size = self._transport.put_file(name, stream)
766
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
767
self._index.insert_index(0, index)
771
def abort_write_group(self):
772
assert self._builder is not None
776
def _add_node(self, key, value):
778
self._builder.add_node(key, value)
779
except bzrlib.errors.BadIndexDuplicateKey:
780
# Multiple bzr objects can have the same contents
785
def _get_entry(self, key):
786
entries = self._index.iter_entries([key])
788
return entries.next()[2]
789
except StopIteration:
790
if self._builder is None:
792
entries = self._builder.iter_entries([key])
794
return entries.next()[2]
795
except StopIteration:
798
def _iter_keys_prefix(self, prefix):
799
for entry in self._index.iter_entries_prefix([prefix]):
801
if self._builder is not None:
802
for entry in self._builder.iter_entries_prefix([prefix]):
805
def lookup_commit(self, revid):
806
return self._get_entry(("commit", revid, "X"))[:40]
808
def _add_git_sha(self, hexsha, type, type_data):
809
if hexsha is not None:
810
self._name.update(hexsha)
811
self._add_node(("git", hexsha, "X"),
812
" ".join((type, type_data[0], type_data[1])))
814
# This object is not represented in Git - perhaps an empty
816
self._name.update(type + " ".join(type_data))
818
def lookup_blob_id(self, fileid, revision):
819
return self._get_entry(("blob", fileid, revision))
821
def lookup_git_sha(self, sha):
823
sha = sha_to_hex(sha)
824
data = self._get_entry(("git", sha, "X")).split(" ", 2)
825
return (data[0], (data[1], data[2]))
828
"""List the revision ids known."""
829
for key in self._iter_keys_prefix(("commit", None, None)):
832
def missing_revisions(self, revids):
833
"""Return set of all the revisions that are not present."""
834
missing_revids = set(revids)
835
for _, key, value in self._index.iter_entries((
836
("commit", revid, "X") for revid in revids)):
837
missing_revids.remove(key[1])
838
return missing_revids
841
"""List the SHA1s."""
842
for key in self._iter_keys_prefix(("git", None, None)):
846
formats = registry.Registry()
847
formats.register(TdbGitCacheFormat().get_format_string(),
849
formats.register(SqliteGitCacheFormat().get_format_string(),
850
SqliteGitCacheFormat())
851
formats.register(IndexGitCacheFormat().get_format_string(),
852
IndexGitCacheFormat())
853
# In the future, this will become the default:
854
# formats.register('default', IndexGitCacheFormat())
858
formats.register('default', SqliteGitCacheFormat())
860
formats.register('default', TdbGitCacheFormat())
864
def migrate_ancient_formats(repo_transport):
865
# Prefer migrating git.db over git.tdb, since the latter may not
866
# be openable on some platforms.
867
if repo_transport.has("git.db"):
868
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
869
repo_transport.rename("git.db", "git/idmap.db")
870
elif repo_transport.has("git.tdb"):
871
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
872
repo_transport.rename("git.tdb", "git/idmap.tdb")
875
def remove_readonly_transport_decorator(transport):
876
if transport.is_readonly():
877
return transport._decorated
881
def from_repository(repository):
882
"""Open a cache file for a repository.
884
If the repository is remote and there is no transport available from it
885
this will use a local file in the users cache directory
886
(typically ~/.cache/bazaar/git/)
888
:param repository: A repository object
890
repo_transport = getattr(repository, "_transport", None)
891
if repo_transport is not None:
892
# Migrate older cache formats
893
repo_transport = remove_readonly_transport_decorator(repo_transport)
895
repo_transport.mkdir("git")
896
except bzrlib.errors.FileExists:
899
migrate_ancient_formats(repo_transport)
900
return BzrGitCacheFormat.from_repository(repository)