13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
17
"""Map from Git sha's to Bazaar objects."""
19
from __future__ import absolute_import
19
21
from dulwich.objects import (
46
50
from xdg.BaseDirectory import xdg_cache_home
47
51
except ImportError:
48
from bzrlib.config import config_dir
52
from ...config import config_dir
49
53
ret = os.path.join(config_dir(), "git")
51
ret = os.path.join(xdg_cache_home, "bazaar", "git")
55
ret = os.path.join(xdg_cache_home, "breezy", "git")
52
56
if not os.path.isdir(ret):
57
def get_remote_cache_transport():
61
def get_remote_cache_transport(repository):
58
62
"""Retrieve the transport to use when accessing (unwritable) remote
61
return get_transport(get_cache_dir())
65
uuid = getattr(repository, "uuid", None)
67
path = get_cache_dir()
69
path = os.path.join(get_cache_dir(), uuid)
70
if not os.path.isdir(path):
72
return get_transport(path)
64
75
def check_pysqlite_version(sqlite3):
69
80
(sqlite3.sqlite_version_info[0] == 3 and
70
81
sqlite3.sqlite_version_info[1] < 3)):
71
82
trace.warning('Needs at least sqlite 3.3.x')
72
raise bzrlib.errors.BzrError("incompatible sqlite library")
83
raise bzr_errors.BzrError("incompatible sqlite library")
77
88
check_pysqlite_version(sqlite3)
78
except (ImportError, bzrlib.errors.BzrError), e:
89
except (ImportError, bzr_errors.BzrError) as e:
79
90
from pysqlite2 import dbapi2 as sqlite3
80
91
check_pysqlite_version(sqlite3)
82
93
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
84
raise bzrlib.errors.BzrError("missing sqlite library")
95
raise bzr_errors.BzrError("missing sqlite library")
87
98
_mapdbs = threading.local()
100
111
def lookup_git_sha(self, sha):
101
112
"""Lookup a Git sha in the database.
102
113
:param sha: Git object sha
103
:return: (type, type_data) with type_data:
114
:return: list with (type, type_data) tuples with type_data:
104
115
commit: revid, tree_sha, verifiers
105
116
blob: fileid, revid
106
117
tree: fileid, revid
188
204
format_name = transport.get_bytes('format')
189
205
format = formats.get(format_name)
190
except bzrlib.errors.NoSuchFile:
206
except bzr_errors.NoSuchFile:
191
207
format = formats.get('default')
192
208
format.initialize(transport)
193
209
return format.open(transport)
203
219
:param repository: Repository to open the cache for
204
220
:return: A `BzrGitCache`
222
from ...transport.local import LocalTransport
206
223
repo_transport = getattr(repository, "_transport", None)
207
if repo_transport is not None:
208
# Even if we don't write to this repo, we should be able
224
if (repo_transport is not None and
225
isinstance(repo_transport, LocalTransport)):
226
# Even if we don't write to this repo, we should be able
209
227
# to update its cache.
210
repo_transport = remove_readonly_transport_decorator(repo_transport)
212
repo_transport.mkdir('git')
213
except bzrlib.errors.FileExists:
215
transport = repo_transport.clone('git')
229
repo_transport = remove_readonly_transport_decorator(repo_transport)
230
except bzr_errors.ReadOnlyError:
234
repo_transport.mkdir('git')
235
except bzr_errors.FileExists:
237
transport = repo_transport.clone('git')
217
transport = get_remote_cache_transport()
240
if transport is None:
241
transport = get_remote_cache_transport(repository)
218
242
return cls.from_transport(transport)
221
245
class CacheUpdater(object):
222
246
"""Base class for objects that can update a bzr-git cache."""
224
def add_object(self, obj, ie, path):
248
def add_object(self, obj, bzr_key_data, path):
225
249
"""Add an object.
227
251
:param obj: Object type ("commit", "blob" or "tree")
228
:param ie: Inventory entry (for blob/tree) or testament_sha in case
252
:param bzr_key_data: bzr key store data or testament_sha in case
230
254
:param path: Path of the object (optional)
238
262
class BzrGitCache(object):
239
263
"""Caching backend."""
241
def __init__(self, idmap, content_cache, cache_updater_klass):
265
def __init__(self, idmap, cache_updater_klass):
242
266
self.idmap = idmap
243
self.content_cache = content_cache
244
267
self._cache_updater_klass = cache_updater_klass
246
269
def get_updater(self, rev):
263
286
self._commit = None
264
287
self._entries = []
266
def add_object(self, obj, ie, path):
267
if obj.type_name == "commit":
289
def add_object(self, obj, bzr_key_data, path):
290
if isinstance(obj, tuple):
291
(type_name, hexsha) = obj
293
type_name = obj.type_name
295
if type_name == "commit":
268
296
self._commit = obj
269
assert type(ie) is dict
270
type_data = (self.revid, self._commit.tree, ie)
271
self.cache.idmap._by_revid[self.revid] = obj.id
272
elif obj.type_name in ("blob", "tree"):
274
if obj.type_name == "blob":
275
revision = ie.revision
277
revision = self.revid
278
type_data = (ie.file_id, revision)
279
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
297
if type(bzr_key_data) is not dict:
298
raise TypeError(bzr_key_data)
300
type_data = (self.revid, self._commit.tree, bzr_key_data)
301
self.cache.idmap._by_revid[self.revid] = hexsha
302
elif type_name in ("blob", "tree"):
303
if bzr_key_data is not None:
304
key = type_data = bzr_key_data
305
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = hexsha
281
307
raise AssertionError
282
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
308
entry = (type_name, type_data)
309
self.cache.idmap._by_sha.setdefault(hexsha, {})[key] = entry
284
311
def finish(self):
285
312
if self._commit is None:
329
def add_object(self, obj, ie, path):
330
if obj.type_name == "commit":
358
def add_object(self, obj, bzr_key_data, path):
359
if isinstance(obj, tuple):
360
(type_name, hexsha) = obj
362
type_name = obj.type_name
364
if type_name == "commit":
331
365
self._commit = obj
332
self._testament3_sha1 = ie["testament3-sha1"]
333
assert type(ie) is dict
334
elif obj.type_name == "tree":
336
self._trees.append((obj.id, ie.file_id, self.revid))
337
elif obj.type_name == "blob":
339
self._blobs.append((obj.id, ie.file_id, ie.revision))
366
if type(bzr_key_data) is not dict:
367
raise TypeError(bzr_key_data)
368
self._testament3_sha1 = bzr_key_data.get("testament3-sha1")
369
elif type_name == "tree":
370
if bzr_key_data is not None:
371
self._trees.append((hexsha, bzr_key_data[0], bzr_key_data[1]))
372
elif type_name == "blob":
373
if bzr_key_data is not None:
374
self._blobs.append((hexsha, bzr_key_data[0], bzr_key_data[1]))
341
376
raise AssertionError
355
390
return self._commit
358
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
393
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), SqliteCacheUpdater)
361
396
class SqliteGitCacheFormat(BzrGitCacheFormat):
363
398
def get_format_string(self):
364
return 'bzr-git sha map version 1 using sqlite\n'
399
return b'bzr-git sha map version 1 using sqlite\n'
366
401
def open(self, transport):
368
403
basepath = transport.local_abspath(".")
369
except bzrlib.errors.NotLocalUrl:
404
except bzr_errors.NotLocalUrl:
370
405
basepath = get_cache_dir()
371
406
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
447
482
tree: fileid, revid
448
483
blob: fileid, revid
450
row = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,)).fetchone()
452
return ("commit", (row[0], row[1], {"testament3-sha1": row[2]}))
453
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
456
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
486
cursor = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,))
487
for row in cursor.fetchall():
489
if row[2] is not None:
490
verifiers = {"testament3-sha1": row[2]}
493
yield ("commit", (row[0], row[1], verifiers))
494
cursor = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,))
495
for row in cursor.fetchall():
498
cursor = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,))
499
for row in cursor.fetchall():
461
505
def revids(self):
462
506
"""List the revision ids known."""
480
524
self._commit = None
481
525
self._entries = []
483
def add_object(self, obj, ie, path):
484
sha = obj.sha().digest()
485
if obj.type_name == "commit":
527
def add_object(self, obj, bzr_key_data, path):
528
if isinstance(obj, tuple):
529
(type_name, hexsha) = obj
530
sha = hex_to_sha(hexsha)
532
type_name = obj.type_name
533
sha = obj.sha().digest()
534
if type_name == "commit":
486
535
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
487
assert type(ie) is dict, "was %r" % ie
488
type_data = (self.revid, obj.tree, ie["testament3-sha1"])
536
if type(bzr_key_data) is not dict:
537
raise TypeError(bzr_key_data)
538
type_data = (self.revid, obj.tree)
540
type_data += (bzr_key_data["testament3-sha1"],)
489
543
self._commit = obj
490
elif obj.type_name == "blob":
493
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
494
type_data = (ie.file_id, ie.revision)
495
elif obj.type_name == "tree":
498
type_data = (ie.file_id, self.revid)
544
elif type_name == "blob":
545
if bzr_key_data is None:
547
self.db["\0".join(("blob", bzr_key_data[0], bzr_key_data[1]))] = sha
548
type_data = bzr_key_data
549
elif type_name == "tree":
550
if bzr_key_data is None:
552
type_data = bzr_key_data
500
554
raise AssertionError
501
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
555
entry = "\0".join((type_name, ) + type_data) + "\n"
558
oldval = self.db[key]
562
if oldval[-1] != "\n":
563
self.db[key] = "".join([oldval, "\n", entry])
565
self.db[key] = "".join([oldval, entry])
503
567
def finish(self):
504
568
if self._commit is None:
506
570
return self._commit
509
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
573
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), TdbCacheUpdater)
511
576
class TdbGitCacheFormat(BzrGitCacheFormat):
512
577
"""Cache format for tdb-based caches."""
514
579
def get_format_string(self):
515
return 'bzr-git sha map version 3 using tdb\n'
580
return b'bzr-git sha map version 3 using tdb\n'
517
582
def open(self, transport):
519
basepath = transport.local_abspath(".")
520
except bzrlib.errors.NotLocalUrl:
584
basepath = transport.local_abspath(".").encode(osutils._fs_enc)
585
except bzr_errors.NotLocalUrl:
521
586
basepath = get_cache_dir()
587
if type(basepath) is not str:
588
raise TypeError(basepath)
523
590
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
524
591
except ImportError:
576
645
return "%s(%r)" % (self.__class__.__name__, self.path)
578
647
def lookup_commit(self, revid):
579
return sha_to_hex(self.db["commit\0" + revid][:20])
649
return sha_to_hex(self.db["commit\0" + revid][:20])
651
raise KeyError("No cache entry for %r" % revid)
581
653
def lookup_blob_id(self, fileid, revision):
582
654
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
593
665
if len(sha) == 40:
594
666
sha = hex_to_sha(sha)
595
data = self.db["git\0" + sha].split("\0")
596
if data[0] == "commit":
598
return (data[0], (data[1], data[2], {}))
667
value = self.db["git\0" + sha]
668
for data in value.splitlines():
669
data = data.split("\0")
670
if data[0] == "commit":
672
yield (data[0], (data[1], data[2], {}))
674
yield (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
675
elif data[0] in ("tree", "blob"):
676
yield (data[0], tuple(data[1:]))
600
return (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
602
return (data[0], tuple(data[1:]))
678
raise AssertionError("unknown type %r" % data[0])
604
680
def missing_revisions(self, revids):
639
715
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
642
class GitObjectStoreContentCache(ContentCache):
644
def __init__(self, store):
647
def add_multi(self, objs):
648
self.store.add_objects(objs)
650
def add(self, obj, path):
651
self.store.add_object(obj)
653
def __getitem__(self, sha):
654
return self.store[sha]
657
718
class IndexCacheUpdater(CacheUpdater):
659
720
def __init__(self, cache, rev):
662
723
self.parent_revids = rev.parent_ids
663
724
self._commit = None
664
725
self._entries = []
665
self._cache_objs = set()
667
def add_object(self, obj, ie, path):
668
if obj.type_name == "commit":
727
def add_object(self, obj, bzr_key_data, path):
728
if isinstance(obj, tuple):
729
(type_name, hexsha) = obj
731
type_name = obj.type_name
733
if type_name == "commit":
669
734
self._commit = obj
670
assert type(ie) is dict
671
self.cache.idmap._add_git_sha(obj.id, "commit",
672
(self.revid, obj.tree, ie))
735
if type(bzr_key_data) is not dict:
736
raise TypeError(bzr_key_data)
737
self.cache.idmap._add_git_sha(hexsha, "commit",
738
(self.revid, obj.tree, bzr_key_data))
673
739
self.cache.idmap._add_node(("commit", self.revid, "X"),
674
" ".join((obj.id, obj.tree)))
675
self._cache_objs.add((obj, path))
676
elif obj.type_name == "blob":
677
self.cache.idmap._add_git_sha(obj.id, "blob",
678
(ie.file_id, ie.revision))
679
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
680
if ie.kind == "symlink":
681
self._cache_objs.add((obj, path))
682
elif obj.type_name == "tree":
683
self.cache.idmap._add_git_sha(obj.id, "tree",
684
(ie.file_id, self.revid))
685
self._cache_objs.add((obj, path))
740
" ".join((hexsha, obj.tree)))
741
elif type_name == "blob":
742
self.cache.idmap._add_git_sha(hexsha, "blob", bzr_key_data)
743
self.cache.idmap._add_node(("blob", bzr_key_data[0],
744
bzr_key_data[1]), hexsha)
745
elif type_name == "tree":
746
self.cache.idmap._add_git_sha(hexsha, "tree", bzr_key_data)
687
748
raise AssertionError
689
750
def finish(self):
690
self.cache.content_cache.add_multi(self._cache_objs)
691
751
return self._commit
696
756
def __init__(self, transport=None):
697
757
mapper = versionedfile.ConstantMapper("trees")
698
758
shamap = IndexGitShaMap(transport.clone('index'))
699
#trees_store = knit.make_file_factory(True, mapper)(transport)
700
#content_cache = VersionedFilesContentCache(trees_store)
701
from bzrlib.plugins.git.transportgit import TransportObjectStore
702
store = TransportObjectStore(transport.clone('objects'))
703
content_cache = GitObjectStoreContentCache(store)
704
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
759
from .transportgit import TransportObjectStore
760
super(IndexBzrGitCache, self).__init__(shamap, IndexCacheUpdater)
708
763
class IndexGitCacheFormat(BzrGitCacheFormat):
710
765
def get_format_string(self):
711
return 'bzr-git sha map with git object cache version 1\n'
766
return b'bzr-git sha map with git object cache version 1\n'
713
768
def initialize(self, transport):
714
769
super(IndexGitCacheFormat, self).initialize(transport)
715
770
transport.mkdir('index')
716
771
transport.mkdir('objects')
717
from bzrlib.plugins.git.transportgit import TransportObjectStore
772
from .transportgit import TransportObjectStore
718
773
TransportObjectStore.init(transport.clone('objects'))
720
775
def open(self, transport):
727
782
BTree Index file with the following contents:
729
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
730
("commit", <revid>) -> "<sha1> <tree-id>"
784
("git", <sha1>, "X") -> "<type> <type-data1> <type-data2>"
785
("commit", <revid>, "X") -> "<sha1> <tree-id>"
731
786
("blob", <fileid>, <revid>) -> <sha1>
735
790
def __init__(self, transport=None):
736
792
if transport is None:
737
793
self._transport = None
738
794
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
754
810
if transport is not None:
756
812
transport.mkdir('git')
757
except bzrlib.errors.FileExists:
813
except bzr_errors.FileExists:
759
815
return cls(transport.clone('git'))
760
from bzrlib.transport import get_transport
816
from ...transport import get_transport
761
817
return cls(get_transport(get_cache_dir()))
763
819
def __repr__(self):
767
823
return "%s()" % (self.__class__.__name__)
769
825
def repack(self):
770
assert self._builder is None
826
if self._builder is not None:
827
raise errors.BzrError('builder already open')
771
828
self.start_write_group()
772
for _, key, value in self._index.iter_all_entries():
773
self._builder.add_node(key, value)
829
self._builder.add_nodes(
830
((key, value) for (_, key, value) in
831
self._index.iter_all_entries()))
775
833
for name in self._transport.list_dir('.'):
776
834
if name.endswith('.rix'):
781
839
self._transport.rename(name, name + '.old')
783
841
def start_write_group(self):
784
assert self._builder is None
842
if self._builder is not None:
843
raise errors.BzrError('builder already open')
785
844
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
786
845
self._name = osutils.sha()
788
847
def commit_write_group(self):
789
assert self._builder is not None
848
if self._builder is None:
849
raise errors.BzrError('builder not open')
790
850
stream = self._builder.finish()
791
851
name = self._name.hexdigest() + ".rix"
792
852
size = self._transport.put_file(name, stream)
796
856
self._name = None
798
858
def abort_write_group(self):
799
assert self._builder is not None
859
if self._builder is None:
860
raise errors.BzrError('builder not open')
800
861
self._builder = None
801
862
self._name = None
803
864
def _add_node(self, key, value):
805
868
self._builder.add_node(key, value)
806
except bzrlib.errors.BadIndexDuplicateKey:
807
# Multiple bzr objects can have the same contents
812
873
def _get_entry(self, key):
813
874
entries = self._index.iter_entries([key])
822
883
except StopIteration:
825
def _iter_keys_prefix(self, prefix):
886
def _iter_entries_prefix(self, prefix):
826
887
for entry in self._index.iter_entries_prefix([prefix]):
888
yield (entry[1], entry[2])
828
889
if self._builder is not None:
829
890
for entry in self._builder.iter_entries_prefix([prefix]):
891
yield (entry[1], entry[2])
832
893
def lookup_commit(self, revid):
833
894
return self._get_entry(("commit", revid, "X"))[:40]
836
897
if hexsha is not None:
837
898
self._name.update(hexsha)
838
899
if type == "commit":
839
td = (type_data[0], type_data[1], type_data[2]["testament3-sha1"])
900
td = (type_data[0], type_data[1])
902
td += (type_data[2]["testament3-sha1"],)
842
907
self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
851
916
def lookup_git_sha(self, sha):
852
917
if len(sha) == 20:
853
918
sha = sha_to_hex(sha)
854
data = self._get_entry(("git", sha, "X")).split(" ", 3)
919
value = self._get_entry(("git", sha, "X"))
920
data = value.split(" ", 3)
855
921
if data[0] == "commit":
856
return ("commit", (data[1], data[2], {"testament3-sha1": data[3]}))
924
verifiers = {"testament3-sha1": data[3]}
929
yield ("commit", (data[1], data[2], verifiers))
858
return (data[0], tuple(data[1:]))
931
yield (data[0], tuple(data[1:]))
860
933
def revids(self):
861
934
"""List the revision ids known."""
862
for key in self._iter_keys_prefix(("commit", None, None)):
935
for key, value in self._iter_entries_prefix(("commit", None, None)):
865
938
def missing_revisions(self, revids):
884
957
formats.register(IndexGitCacheFormat().get_format_string(),
885
958
IndexGitCacheFormat())
886
959
# In the future, this will become the default:
887
# formats.register('default', IndexGitCacheFormat())
891
formats.register('default', SqliteGitCacheFormat())
893
formats.register('default', TdbGitCacheFormat())
960
formats.register('default', IndexGitCacheFormat())
897
964
def migrate_ancient_formats(repo_transport):
965
# Migrate older cache formats
966
repo_transport = remove_readonly_transport_decorator(repo_transport)
967
has_sqlite = repo_transport.has("git.db")
968
has_tdb = repo_transport.has("git.tdb")
969
if not has_sqlite or has_tdb:
972
repo_transport.mkdir("git")
973
except bzr_errors.FileExists:
898
975
# Prefer migrating git.db over git.tdb, since the latter may not
899
976
# be openable on some platforms.
900
if repo_transport.has("git.db"):
901
978
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
902
979
repo_transport.rename("git.db", "git/idmap.db")
903
elif repo_transport.has("git.tdb"):
904
981
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
905
982
repo_transport.rename("git.tdb", "git/idmap.tdb")
908
985
def remove_readonly_transport_decorator(transport):
909
986
if transport.is_readonly():
910
return transport._decorated
988
return transport._decorated
989
except AttributeError:
990
raise bzr_errors.ReadOnlyError(transport)
923
1003
repo_transport = getattr(repository, "_transport", None)
924
1004
if repo_transport is not None:
925
# Migrate older cache formats
926
repo_transport = remove_readonly_transport_decorator(repo_transport)
928
repo_transport.mkdir("git")
929
except bzrlib.errors.FileExists:
932
1006
migrate_ancient_formats(repo_transport)
1007
except bzr_errors.ReadOnlyError:
1008
pass # Not much we can do
933
1009
return BzrGitCacheFormat.from_repository(repository)