1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
26
from dulwich.objects import (
32
btree_index as _mod_btree_index,
39
from bzrlib.transport import (
46
from xdg.BaseDirectory import xdg_cache_home
48
from bzrlib.config import config_dir
49
ret = os.path.join(config_dir(), "git")
51
ret = os.path.join(xdg_cache_home, "bazaar", "git")
52
if not os.path.isdir(ret):
57
def get_remote_cache_transport():
58
return get_transport(get_cache_dir())
61
def check_pysqlite_version(sqlite3):
62
"""Check that sqlite library is compatible.
65
if (sqlite3.sqlite_version_info[0] < 3 or
66
(sqlite3.sqlite_version_info[0] == 3 and
67
sqlite3.sqlite_version_info[1] < 3)):
68
trace.warning('Needs at least sqlite 3.3.x')
69
raise bzrlib.errors.BzrError("incompatible sqlite library")
74
check_pysqlite_version(sqlite3)
75
except (ImportError, bzrlib.errors.BzrError), e:
76
from pysqlite2 import dbapi2 as sqlite3
77
check_pysqlite_version(sqlite3)
79
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
81
raise bzrlib.errors.BzrError("missing sqlite library")
84
_mapdbs = threading.local()
86
"""Get a cache for this thread's db connections."""
89
except AttributeError:
94
class GitShaMap(object):
95
"""Git<->Bzr revision id mapping database."""
97
def lookup_git_sha(self, sha):
98
"""Lookup a Git sha in the database.
99
:param sha: Git object sha
100
:return: (type, type_data) with type_data:
101
revision: revid, tree sha
103
raise NotImplementedError(self.lookup_git_sha)
105
def lookup_blob_id(self, file_id, revision):
106
"""Retrieve a Git blob SHA by file id.
108
:param file_id: File id of the file/symlink
109
:param revision: revision in which the file was last changed.
111
raise NotImplementedError(self.lookup_blob_id)
113
def lookup_tree_id(self, file_id, revision):
114
"""Retrieve a Git tree SHA by file id.
116
raise NotImplementedError(self.lookup_tree_id)
119
"""List the revision ids known."""
120
raise NotImplementedError(self.revids)
122
def missing_revisions(self, revids):
123
"""Return set of all the revisions that are not present."""
124
present_revids = set(self.revids())
125
if not isinstance(revids, set):
127
return revids - present_revids
130
"""List the SHA1s."""
131
raise NotImplementedError(self.sha1s)
133
def start_write_group(self):
134
"""Start writing changes."""
136
def commit_write_group(self):
137
"""Commit any pending changes."""
139
def abort_write_group(self):
140
"""Abort any pending changes."""
143
class ContentCache(object):
144
"""Object that can cache Git objects."""
146
def __getitem__(self, sha):
147
"""Retrieve an item, by SHA."""
148
raise NotImplementedError(self.__getitem__)
151
class BzrGitCacheFormat(object):
152
"""Bazaar-Git Cache Format."""
154
def get_format_string(self):
155
"""Return a single-line unique format string for this cache format."""
156
raise NotImplementedError(self.get_format_string)
158
def open(self, transport):
159
"""Open this format on a transport."""
160
raise NotImplementedError(self.open)
162
def initialize(self, transport):
163
"""Create a new instance of this cache format at transport."""
164
transport.put_bytes('format', self.get_format_string())
167
def from_transport(self, transport):
168
"""Open a cache file present on a transport, or initialize one.
170
:param transport: Transport to use
171
:return: A BzrGitCache instance
174
format_name = transport.get_bytes('format')
175
format = formats.get(format_name)
176
except bzrlib.errors.NoSuchFile:
177
format = formats.get('default')
178
format.initialize(transport)
179
return format.open(transport)
182
def from_repository(cls, repository):
183
"""Open a cache file for a repository.
185
This will use the repository's transport to store the cache file, or
186
use the users global cache directory if the repository has no
187
transport associated with it.
189
:param repository: Repository to open the cache for
190
:return: A `BzrGitCache`
192
repo_transport = getattr(repository, "_transport", None)
193
if repo_transport is not None:
194
# Even if we don't write to this repo, we should be able
195
# to update its cache.
196
repo_transport = remove_readonly_transport_decorator(repo_transport)
198
repo_transport.mkdir('git')
199
except bzrlib.errors.FileExists:
201
transport = repo_transport.clone('git')
203
transport = get_remote_cache_transport()
204
return cls.from_transport(transport)
207
class CacheUpdater(object):
208
"""Base class for objects that can update a bzr-git cache."""
210
def add_object(self, obj, ie):
211
raise NotImplementedError(self.add_object)
214
raise NotImplementedError(self.finish)
217
class BzrGitCache(object):
218
"""Caching backend."""
220
def __init__(self, idmap, content_cache, cache_updater_klass):
222
self.content_cache = content_cache
223
self._cache_updater_klass = cache_updater_klass
225
def get_updater(self, rev):
226
"""Update an object that implements the CacheUpdater interface for
229
return self._cache_updater_klass(self, rev)
232
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
235
class DictCacheUpdater(CacheUpdater):
236
"""Cache updater for dict-based caches."""
238
def __init__(self, cache, rev):
240
self.revid = rev.revision_id
241
self.parent_revids = rev.parent_ids
245
def add_object(self, obj, ie):
246
if obj.type_name == "commit":
249
type_data = (self.revid, self._commit.tree)
250
self.cache.idmap._by_revid[self.revid] = obj.id
251
elif obj.type_name in ("blob", "tree"):
253
if obj.type_name == "blob":
254
revision = ie.revision
256
revision = self.revid
257
type_data = (ie.file_id, revision)
258
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] =\
262
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
265
if self._commit is None:
266
raise AssertionError("No commit object added")
270
class DictGitShaMap(GitShaMap):
271
"""Git SHA map that uses a dictionary."""
278
def lookup_blob_id(self, fileid, revision):
279
return self._by_fileid[revision][fileid]
281
def lookup_git_sha(self, sha):
282
return self._by_sha[sha]
284
def lookup_tree_id(self, fileid, revision):
285
return self._by_fileid[revision][fileid]
287
def lookup_commit(self, revid):
288
return self._by_revid[revid]
291
for key, (type, type_data) in self._by_sha.iteritems():
296
return self._by_sha.iterkeys()
299
class SqliteCacheUpdater(CacheUpdater):
301
def __init__(self, cache, rev):
303
self.db = self.cache.idmap.db
304
self.revid = rev.revision_id
309
def add_object(self, obj, ie):
310
if obj.type_name == "commit":
313
elif obj.type_name == "tree":
315
self._trees.append((obj.id, ie.file_id, self.revid))
316
elif obj.type_name == "blob":
318
self._blobs.append((obj.id, ie.file_id, ie.revision))
323
if self._commit is None:
324
raise AssertionError("No commit object added")
326
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
329
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
332
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
333
(self._commit.id, self.revid, self._commit.tree))
337
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
340
class SqliteGitCacheFormat(BzrGitCacheFormat):
342
def get_format_string(self):
343
return 'bzr-git sha map version 1 using sqlite\n'
345
def open(self, transport):
347
basepath = transport.local_abspath(".")
348
except bzrlib.errors.NotLocalUrl:
349
basepath = get_cache_dir()
350
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
353
class SqliteGitShaMap(GitShaMap):
354
"""Bazaar GIT Sha map that uses a sqlite database for storage."""
356
def __init__(self, path=None):
359
self.db = sqlite3.connect(":memory:")
361
if not mapdbs().has_key(path):
362
mapdbs()[path] = sqlite3.connect(path)
363
self.db = mapdbs()[path]
364
self.db.text_factory = str
365
self.db.executescript("""
366
create table if not exists commits(
367
sha1 text not null check(length(sha1) == 40),
369
tree_sha text not null check(length(tree_sha) == 40)
371
create index if not exists commit_sha1 on commits(sha1);
372
create unique index if not exists commit_revid on commits(revid);
373
create table if not exists blobs(
374
sha1 text not null check(length(sha1) == 40),
375
fileid text not null,
378
create index if not exists blobs_sha1 on blobs(sha1);
379
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
380
create table if not exists trees(
381
sha1 text unique not null check(length(sha1) == 40),
382
fileid text not null,
385
create unique index if not exists trees_sha1 on trees(sha1);
386
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
390
return "%s(%r)" % (self.__class__.__name__, self.path)
392
def lookup_commit(self, revid):
393
cursor = self.db.execute("select sha1 from commits where revid = ?",
395
row = cursor.fetchone()
400
def commit_write_group(self):
403
def lookup_blob_id(self, fileid, revision):
404
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
407
raise KeyError(fileid)
409
def lookup_tree_id(self, fileid, revision):
410
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
413
raise KeyError(fileid)
415
def lookup_git_sha(self, sha):
416
"""Lookup a Git sha in the database.
418
:param sha: Git object sha
419
:return: (type, type_data) with type_data:
420
revision: revid, tree sha
422
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
424
return ("commit", row)
425
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
428
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
434
"""List the revision ids known."""
435
return (row for (row,) in self.db.execute("select revid from commits"))
438
"""List the SHA1s."""
439
for table in ("blobs", "commits", "trees"):
440
for (sha,) in self.db.execute("select sha1 from %s" % table):
444
class TdbCacheUpdater(CacheUpdater):
445
"""Cache updater for tdb-based caches."""
447
def __init__(self, cache, rev):
449
self.db = cache.idmap.db
450
self.revid = rev.revision_id
451
self.parent_revids = rev.parent_ids
455
def add_object(self, obj, ie):
456
sha = obj.sha().digest()
457
if obj.type_name == "commit":
458
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
459
type_data = (self.revid, obj.tree)
462
elif obj.type_name == "blob":
465
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
466
type_data = (ie.file_id, ie.revision)
467
elif obj.type_name == "tree":
470
type_data = (ie.file_id, self.revid)
473
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
476
if self._commit is None:
477
raise AssertionError("No commit object added")
481
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
483
class TdbGitCacheFormat(BzrGitCacheFormat):
484
"""Cache format for tdb-based caches."""
486
def get_format_string(self):
487
return 'bzr-git sha map version 3 using tdb\n'
489
def open(self, transport):
491
basepath = transport.local_abspath(".")
492
except bzrlib.errors.NotLocalUrl:
493
basepath = get_cache_dir()
495
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
498
"Unable to open existing bzr-git cache because 'tdb' is not "
502
class TdbGitShaMap(GitShaMap):
503
"""SHA Map that uses a TDB database.
507
"git <sha1>" -> "<type> <type-data1> <type-data2>"
508
"commit revid" -> "<sha1> <tree-id>"
509
"tree fileid revid" -> "<sha1>"
510
"blob fileid revid" -> "<sha1>"
514
TDB_HASH_SIZE = 50000
516
def __init__(self, path=None):
522
if not mapdbs().has_key(path):
523
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
524
os.O_RDWR|os.O_CREAT)
525
self.db = mapdbs()[path]
527
if int(self.db["version"]) not in (2, 3):
528
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
529
self.db["version"], self.TDB_MAP_VERSION)
533
self.db["version"] = str(self.TDB_MAP_VERSION)
535
def start_write_group(self):
536
"""Start writing changes."""
537
self.db.transaction_start()
539
def commit_write_group(self):
540
"""Commit any pending changes."""
541
self.db.transaction_commit()
543
def abort_write_group(self):
544
"""Abort any pending changes."""
545
self.db.transaction_cancel()
548
return "%s(%r)" % (self.__class__.__name__, self.path)
550
def lookup_commit(self, revid):
551
return sha_to_hex(self.db["commit\0" + revid][:20])
553
def lookup_blob_id(self, fileid, revision):
554
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
556
def lookup_git_sha(self, sha):
557
"""Lookup a Git sha in the database.
559
:param sha: Git object sha
560
:return: (type, type_data) with type_data:
561
revision: revid, tree sha
564
sha = hex_to_sha(sha)
565
data = self.db["git\0" + sha].split("\0")
566
return (data[0], (data[1], data[2]))
568
def missing_revisions(self, revids):
571
if self.db.get("commit\0" + revid) is None:
576
"""List the revision ids known."""
577
for key in self.db.iterkeys():
578
if key.startswith("commit\0"):
582
"""List the SHA1s."""
583
for key in self.db.iterkeys():
584
if key.startswith("git\0"):
585
yield sha_to_hex(key[4:])
588
class VersionedFilesContentCache(ContentCache):
590
def __init__(self, vf):
594
self._vf.insert_record_stream(
595
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
596
obj.as_legacy_object_chunks())])
598
def __getitem__(self, sha):
599
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
600
entry = stream.next()
601
if entry.storage_kind == 'absent':
603
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
606
class GitObjectStoreContentCache(ContentCache):
608
def __init__(self, store):
612
self.store.add_object(obj)
614
def __getitem__(self, sha):
615
return self.store[sha]
618
class IndexCacheUpdater(CacheUpdater):
620
def __init__(self, cache, rev):
622
self.revid = rev.revision_id
623
self.parent_revids = rev.parent_ids
627
def add_object(self, obj, ie):
628
if obj.type_name == "commit":
631
self.cache.idmap._add_git_sha(obj.id, "commit",
632
(self.revid, obj.tree))
633
self.cache.idmap._add_node(("commit", self.revid, "X"),
634
" ".join((obj.id, obj.tree)))
635
self.cache.content_cache.add(obj)
636
elif obj.type_name == "blob":
637
self.cache.idmap._add_git_sha(obj.id, "blob",
638
(ie.file_id, ie.revision))
639
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
640
if ie.kind == "symlink":
641
self.cache.content_cache.add(obj)
642
elif obj.type_name == "tree":
643
self.cache.idmap._add_git_sha(obj.id, "tree",
644
(ie.file_id, self.revid))
645
self.cache.content_cache.add(obj)
653
class IndexBzrGitCache(BzrGitCache):
655
def __init__(self, transport=None):
656
mapper = versionedfile.ConstantMapper("trees")
657
shamap = IndexGitShaMap(transport.clone('index'))
658
#trees_store = knit.make_file_factory(True, mapper)(transport)
659
#content_cache = VersionedFilesContentCache(trees_store)
660
from bzrlib.plugins.git.transportgit import TransportObjectStore
661
store = TransportObjectStore(transport.clone('objects'))
662
content_cache = GitObjectStoreContentCache(store)
663
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
667
class IndexGitCacheFormat(BzrGitCacheFormat):
669
def get_format_string(self):
670
return 'bzr-git sha map with git object cache version 1\n'
672
def initialize(self, transport):
673
super(IndexGitCacheFormat, self).initialize(transport)
674
transport.mkdir('index')
675
transport.mkdir('objects')
676
from bzrlib.plugins.git.transportgit import TransportObjectStore
677
TransportObjectStore.init(transport.clone('objects'))
679
def open(self, transport):
680
return IndexBzrGitCache(transport)
683
class IndexGitShaMap(GitShaMap):
684
"""SHA Map that uses the Bazaar APIs to store a cache.
686
BTree Index file with the following contents:
688
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
689
("commit", <revid>) -> "<sha1> <tree-id>"
690
("blob", <fileid>, <revid>) -> <sha1>
694
def __init__(self, transport=None):
695
if transport is None:
696
self._transport = None
697
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
698
self._builder = self._index
701
self._transport = transport
702
self._index = _mod_index.CombinedGraphIndex([])
703
for name in self._transport.list_dir("."):
704
if not name.endswith(".rix"):
706
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
707
self._transport.stat(name).st_size)
708
self._index.insert_index(0, x)
711
def from_repository(cls, repository):
712
transport = getattr(repository, "_transport", None)
713
if transport is not None:
715
transport.mkdir('git')
716
except bzrlib.errors.FileExists:
718
return cls(transport.clone('git'))
719
from bzrlib.transport import get_transport
720
return cls(get_transport(get_cache_dir()))
723
if self._transport is not None:
724
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
726
return "%s()" % (self.__class__.__name__)
729
assert self._builder is None
730
self.start_write_group()
731
for _, key, value in self._index.iter_all_entries():
732
self._builder.add_node(key, value)
734
for name in self._transport.list_dir('.'):
735
if name.endswith('.rix'):
736
to_remove.append(name)
737
self.commit_write_group()
738
del self._index.indices[1:]
739
for name in to_remove:
740
self._transport.rename(name, name + '.old')
742
def start_write_group(self):
743
assert self._builder is None
744
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
745
self._name = osutils.sha()
747
def commit_write_group(self):
748
assert self._builder is not None
749
stream = self._builder.finish()
750
name = self._name.hexdigest() + ".rix"
751
size = self._transport.put_file(name, stream)
752
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
753
self._index.insert_index(0, index)
757
def abort_write_group(self):
758
assert self._builder is not None
762
def _add_node(self, key, value):
764
self._builder.add_node(key, value)
765
except bzrlib.errors.BadIndexDuplicateKey:
766
# Multiple bzr objects can have the same contents
771
def _get_entry(self, key):
772
entries = self._index.iter_entries([key])
774
return entries.next()[2]
775
except StopIteration:
776
if self._builder is None:
778
entries = self._builder.iter_entries([key])
780
return entries.next()[2]
781
except StopIteration:
784
def _iter_keys_prefix(self, prefix):
785
for entry in self._index.iter_entries_prefix([prefix]):
787
if self._builder is not None:
788
for entry in self._builder.iter_entries_prefix([prefix]):
791
def lookup_commit(self, revid):
792
return self._get_entry(("commit", revid, "X"))[:40]
794
def _add_git_sha(self, hexsha, type, type_data):
795
if hexsha is not None:
796
self._name.update(hexsha)
797
self._add_node(("git", hexsha, "X"),
798
" ".join((type, type_data[0], type_data[1])))
800
# This object is not represented in Git - perhaps an empty
802
self._name.update(type + " ".join(type_data))
804
def lookup_blob_id(self, fileid, revision):
805
return self._get_entry(("blob", fileid, revision))
807
def lookup_git_sha(self, sha):
809
sha = sha_to_hex(sha)
810
data = self._get_entry(("git", sha, "X")).split(" ", 2)
811
return (data[0], (data[1], data[2]))
814
"""List the revision ids known."""
815
for key in self._iter_keys_prefix(("commit", None, None)):
818
def missing_revisions(self, revids):
819
"""Return set of all the revisions that are not present."""
820
missing_revids = set(revids)
821
for _, key, value in self._index.iter_entries((
822
("commit", revid, "X") for revid in revids)):
823
missing_revids.remove(key[1])
824
return missing_revids
827
"""List the SHA1s."""
828
for key in self._iter_keys_prefix(("git", None, None)):
832
formats = registry.Registry()
833
formats.register(TdbGitCacheFormat().get_format_string(),
835
formats.register(SqliteGitCacheFormat().get_format_string(),
836
SqliteGitCacheFormat())
837
formats.register(IndexGitCacheFormat().get_format_string(),
838
IndexGitCacheFormat())
839
# In the future, this will become the default:
840
# formats.register('default', IndexGitCacheFormat())
844
formats.register('default', SqliteGitCacheFormat())
846
formats.register('default', TdbGitCacheFormat())
850
def migrate_ancient_formats(repo_transport):
851
# Prefer migrating git.db over git.tdb, since the latter may not
852
# be openable on some platforms.
853
if repo_transport.has("git.db"):
854
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
855
repo_transport.rename("git.db", "git/idmap.db")
856
elif repo_transport.has("git.tdb"):
857
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
858
repo_transport.rename("git.tdb", "git/idmap.tdb")
861
def remove_readonly_transport_decorator(transport):
862
if transport.is_readonly():
863
return transport._decorated
867
def from_repository(repository):
868
"""Open a cache file for a repository.
870
If the repository is remote and there is no transport available from it
871
this will use a local file in the users cache directory
872
(typically ~/.cache/bazaar/git/)
874
:param repository: A repository object
876
repo_transport = getattr(repository, "_transport", None)
877
if repo_transport is not None:
878
# Migrate older cache formats
879
repo_transport = remove_readonly_transport_decorator(repo_transport)
881
repo_transport.mkdir("git")
882
except bzrlib.errors.FileExists:
885
migrate_ancient_formats(repo_transport)
886
return BzrGitCacheFormat.from_repository(repository)