1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Map from Git sha's to Bazaar objects."""
19
from dulwich.objects import (
26
from dulwich.objects import (
32
btree_index as _mod_btree_index,
39
from bzrlib.transport import (
46
from xdg.BaseDirectory import xdg_cache_home
48
from bzrlib.config import config_dir
49
ret = os.path.join(config_dir(), "git")
51
ret = os.path.join(xdg_cache_home, "bazaar", "git")
52
if not os.path.isdir(ret):
57
def get_remote_cache_transport():
58
"""Retrieve the transport to use when accessing (unwritable) remote
61
return get_transport(get_cache_dir())
64
def check_pysqlite_version(sqlite3):
65
"""Check that sqlite library is compatible.
68
if (sqlite3.sqlite_version_info[0] < 3 or
69
(sqlite3.sqlite_version_info[0] == 3 and
70
sqlite3.sqlite_version_info[1] < 3)):
71
trace.warning('Needs at least sqlite 3.3.x')
72
raise bzrlib.errors.BzrError("incompatible sqlite library")
77
check_pysqlite_version(sqlite3)
78
except (ImportError, bzrlib.errors.BzrError), e:
79
from pysqlite2 import dbapi2 as sqlite3
80
check_pysqlite_version(sqlite3)
82
trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
84
raise bzrlib.errors.BzrError("missing sqlite library")
87
_mapdbs = threading.local()
89
"""Get a cache for this thread's db connections."""
92
except AttributeError:
97
class GitShaMap(object):
98
"""Git<->Bzr revision id mapping database."""
100
def lookup_git_sha(self, sha):
101
"""Lookup a Git sha in the database.
102
:param sha: Git object sha
103
:return: (type, type_data) with type_data:
104
revision: revid, tree sha
106
raise NotImplementedError(self.lookup_git_sha)
108
def lookup_blob_id(self, file_id, revision):
109
"""Retrieve a Git blob SHA by file id.
111
:param file_id: File id of the file/symlink
112
:param revision: revision in which the file was last changed.
114
raise NotImplementedError(self.lookup_blob_id)
116
def lookup_tree_id(self, file_id, revision):
117
"""Retrieve a Git tree SHA by file id.
119
raise NotImplementedError(self.lookup_tree_id)
122
"""List the revision ids known."""
123
raise NotImplementedError(self.revids)
125
def missing_revisions(self, revids):
126
"""Return set of all the revisions that are not present."""
127
present_revids = set(self.revids())
128
if not isinstance(revids, set):
130
return revids - present_revids
133
"""List the SHA1s."""
134
raise NotImplementedError(self.sha1s)
136
def start_write_group(self):
137
"""Start writing changes."""
139
def commit_write_group(self):
140
"""Commit any pending changes."""
142
def abort_write_group(self):
143
"""Abort any pending changes."""
146
class ContentCache(object):
147
"""Object that can cache Git objects."""
149
def add(self, object):
151
raise NotImplementedError(self.add)
153
def add_multi(self, objects):
154
"""Add multiple objects."""
158
def __getitem__(self, sha):
159
"""Retrieve an item, by SHA."""
160
raise NotImplementedError(self.__getitem__)
163
class BzrGitCacheFormat(object):
164
"""Bazaar-Git Cache Format."""
166
def get_format_string(self):
167
"""Return a single-line unique format string for this cache format."""
168
raise NotImplementedError(self.get_format_string)
170
def open(self, transport):
171
"""Open this format on a transport."""
172
raise NotImplementedError(self.open)
174
def initialize(self, transport):
175
"""Create a new instance of this cache format at transport."""
176
transport.put_bytes('format', self.get_format_string())
179
def from_transport(self, transport):
180
"""Open a cache file present on a transport, or initialize one.
182
:param transport: Transport to use
183
:return: A BzrGitCache instance
186
format_name = transport.get_bytes('format')
187
format = formats.get(format_name)
188
except bzrlib.errors.NoSuchFile:
189
format = formats.get('default')
190
format.initialize(transport)
191
return format.open(transport)
194
def from_repository(cls, repository):
195
"""Open a cache file for a repository.
197
This will use the repository's transport to store the cache file, or
198
use the users global cache directory if the repository has no
199
transport associated with it.
201
:param repository: Repository to open the cache for
202
:return: A `BzrGitCache`
204
repo_transport = getattr(repository, "_transport", None)
205
if repo_transport is not None:
206
# Even if we don't write to this repo, we should be able
207
# to update its cache.
208
repo_transport = remove_readonly_transport_decorator(repo_transport)
210
repo_transport.mkdir('git')
211
except bzrlib.errors.FileExists:
213
transport = repo_transport.clone('git')
215
transport = get_remote_cache_transport()
216
return cls.from_transport(transport)
219
class CacheUpdater(object):
220
"""Base class for objects that can update a bzr-git cache."""
222
def add_object(self, obj, ie, path):
223
raise NotImplementedError(self.add_object)
226
raise NotImplementedError(self.finish)
229
class BzrGitCache(object):
230
"""Caching backend."""
232
def __init__(self, idmap, content_cache, cache_updater_klass):
234
self.content_cache = content_cache
235
self._cache_updater_klass = cache_updater_klass
237
def get_updater(self, rev):
238
"""Update an object that implements the CacheUpdater interface for
241
return self._cache_updater_klass(self, rev)
244
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
247
class DictCacheUpdater(CacheUpdater):
248
"""Cache updater for dict-based caches."""
250
def __init__(self, cache, rev):
252
self.revid = rev.revision_id
253
self.parent_revids = rev.parent_ids
257
def add_object(self, obj, ie, path):
258
if obj.type_name == "commit":
261
type_data = (self.revid, self._commit.tree)
262
self.cache.idmap._by_revid[self.revid] = obj.id
263
elif obj.type_name in ("blob", "tree"):
265
if obj.type_name == "blob":
266
revision = ie.revision
268
revision = self.revid
269
type_data = (ie.file_id, revision)
270
self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] =\
274
self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
277
if self._commit is None:
278
raise AssertionError("No commit object added")
282
class DictGitShaMap(GitShaMap):
283
"""Git SHA map that uses a dictionary."""
290
def lookup_blob_id(self, fileid, revision):
291
return self._by_fileid[revision][fileid]
293
def lookup_git_sha(self, sha):
294
return self._by_sha[sha]
296
def lookup_tree_id(self, fileid, revision):
297
return self._by_fileid[revision][fileid]
299
def lookup_commit(self, revid):
300
return self._by_revid[revid]
303
for key, (type, type_data) in self._by_sha.iteritems():
308
return self._by_sha.iterkeys()
311
class SqliteCacheUpdater(CacheUpdater):
313
def __init__(self, cache, rev):
315
self.db = self.cache.idmap.db
316
self.revid = rev.revision_id
321
def add_object(self, obj, ie, path):
322
if obj.type_name == "commit":
325
elif obj.type_name == "tree":
327
self._trees.append((obj.id, ie.file_id, self.revid))
328
elif obj.type_name == "blob":
330
self._blobs.append((obj.id, ie.file_id, ie.revision))
335
if self._commit is None:
336
raise AssertionError("No commit object added")
338
"replace into trees (sha1, fileid, revid) values (?, ?, ?)",
341
"replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
344
"replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
345
(self._commit.id, self.revid, self._commit.tree))
349
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
352
class SqliteGitCacheFormat(BzrGitCacheFormat):
354
def get_format_string(self):
355
return 'bzr-git sha map version 1 using sqlite\n'
357
def open(self, transport):
359
basepath = transport.local_abspath(".")
360
except bzrlib.errors.NotLocalUrl:
361
basepath = get_cache_dir()
362
return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
365
class SqliteGitShaMap(GitShaMap):
366
"""Bazaar GIT Sha map that uses a sqlite database for storage."""
368
def __init__(self, path=None):
371
self.db = sqlite3.connect(":memory:")
373
if not mapdbs().has_key(path):
374
mapdbs()[path] = sqlite3.connect(path)
375
self.db = mapdbs()[path]
376
self.db.text_factory = str
377
self.db.executescript("""
378
create table if not exists commits(
379
sha1 text not null check(length(sha1) == 40),
381
tree_sha text not null check(length(tree_sha) == 40)
383
create index if not exists commit_sha1 on commits(sha1);
384
create unique index if not exists commit_revid on commits(revid);
385
create table if not exists blobs(
386
sha1 text not null check(length(sha1) == 40),
387
fileid text not null,
390
create index if not exists blobs_sha1 on blobs(sha1);
391
create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
392
create table if not exists trees(
393
sha1 text unique not null check(length(sha1) == 40),
394
fileid text not null,
397
create unique index if not exists trees_sha1 on trees(sha1);
398
create unique index if not exists trees_fileid_revid on trees(fileid, revid);
402
return "%s(%r)" % (self.__class__.__name__, self.path)
404
def lookup_commit(self, revid):
405
cursor = self.db.execute("select sha1 from commits where revid = ?",
407
row = cursor.fetchone()
412
def commit_write_group(self):
415
def lookup_blob_id(self, fileid, revision):
416
row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
419
raise KeyError(fileid)
421
def lookup_tree_id(self, fileid, revision):
422
row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
425
raise KeyError(fileid)
427
def lookup_git_sha(self, sha):
428
"""Lookup a Git sha in the database.
430
:param sha: Git object sha
431
:return: (type, type_data) with type_data:
432
revision: revid, tree sha
434
row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
436
return ("commit", row)
437
row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
440
row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
446
"""List the revision ids known."""
447
return (row for (row,) in self.db.execute("select revid from commits"))
450
"""List the SHA1s."""
451
for table in ("blobs", "commits", "trees"):
452
for (sha,) in self.db.execute("select sha1 from %s" % table):
456
class TdbCacheUpdater(CacheUpdater):
457
"""Cache updater for tdb-based caches."""
459
def __init__(self, cache, rev):
461
self.db = cache.idmap.db
462
self.revid = rev.revision_id
463
self.parent_revids = rev.parent_ids
467
def add_object(self, obj, ie, path):
468
sha = obj.sha().digest()
469
if obj.type_name == "commit":
470
self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
471
type_data = (self.revid, obj.tree)
474
elif obj.type_name == "blob":
477
self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
478
type_data = (ie.file_id, ie.revision)
479
elif obj.type_name == "tree":
482
type_data = (ie.file_id, self.revid)
485
self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
488
if self._commit is None:
489
raise AssertionError("No commit object added")
493
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
495
class TdbGitCacheFormat(BzrGitCacheFormat):
496
"""Cache format for tdb-based caches."""
498
def get_format_string(self):
499
return 'bzr-git sha map version 3 using tdb\n'
501
def open(self, transport):
503
basepath = transport.local_abspath(".")
504
except bzrlib.errors.NotLocalUrl:
505
basepath = get_cache_dir()
507
return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
510
"Unable to open existing bzr-git cache because 'tdb' is not "
514
class TdbGitShaMap(GitShaMap):
515
"""SHA Map that uses a TDB database.
519
"git <sha1>" -> "<type> <type-data1> <type-data2>"
520
"commit revid" -> "<sha1> <tree-id>"
521
"tree fileid revid" -> "<sha1>"
522
"blob fileid revid" -> "<sha1>"
526
TDB_HASH_SIZE = 50000
528
def __init__(self, path=None):
534
if not mapdbs().has_key(path):
535
mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
536
os.O_RDWR|os.O_CREAT)
537
self.db = mapdbs()[path]
539
if int(self.db["version"]) not in (2, 3):
540
trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
541
self.db["version"], self.TDB_MAP_VERSION)
545
self.db["version"] = str(self.TDB_MAP_VERSION)
547
def start_write_group(self):
548
"""Start writing changes."""
549
self.db.transaction_start()
551
def commit_write_group(self):
552
"""Commit any pending changes."""
553
self.db.transaction_commit()
555
def abort_write_group(self):
556
"""Abort any pending changes."""
557
self.db.transaction_cancel()
560
return "%s(%r)" % (self.__class__.__name__, self.path)
562
def lookup_commit(self, revid):
563
return sha_to_hex(self.db["commit\0" + revid][:20])
565
def lookup_blob_id(self, fileid, revision):
566
return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
568
def lookup_git_sha(self, sha):
569
"""Lookup a Git sha in the database.
571
:param sha: Git object sha
572
:return: (type, type_data) with type_data:
573
revision: revid, tree sha
576
sha = hex_to_sha(sha)
577
data = self.db["git\0" + sha].split("\0")
578
return (data[0], (data[1], data[2]))
580
def missing_revisions(self, revids):
583
if self.db.get("commit\0" + revid) is None:
588
"""List the revision ids known."""
589
for key in self.db.iterkeys():
590
if key.startswith("commit\0"):
594
"""List the SHA1s."""
595
for key in self.db.iterkeys():
596
if key.startswith("git\0"):
597
yield sha_to_hex(key[4:])
600
class VersionedFilesContentCache(ContentCache):
602
def __init__(self, vf):
606
self._vf.insert_record_stream(
607
[versionedfile.ChunkedContentFactory((obj.id,), [], None,
608
obj.as_legacy_object_chunks())])
610
def __getitem__(self, sha):
611
stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
612
entry = stream.next()
613
if entry.storage_kind == 'absent':
615
return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
618
class GitObjectStoreContentCache(ContentCache):
620
def __init__(self, store):
623
def add_multi(self, objs):
624
self.store.add_objects(objs)
626
def add(self, obj, path):
627
self.store.add_object(obj)
629
def __getitem__(self, sha):
630
return self.store[sha]
633
class IndexCacheUpdater(CacheUpdater):
635
def __init__(self, cache, rev):
637
self.revid = rev.revision_id
638
self.parent_revids = rev.parent_ids
641
self._cache_objs = set()
643
def add_object(self, obj, ie, path):
644
if obj.type_name == "commit":
647
self.cache.idmap._add_git_sha(obj.id, "commit",
648
(self.revid, obj.tree))
649
self.cache.idmap._add_node(("commit", self.revid, "X"),
650
" ".join((obj.id, obj.tree)))
651
self._cache_objs.add((obj, path))
652
elif obj.type_name == "blob":
653
self.cache.idmap._add_git_sha(obj.id, "blob",
654
(ie.file_id, ie.revision))
655
self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
656
if ie.kind == "symlink":
657
self._cache_objs.add((obj, path))
658
elif obj.type_name == "tree":
659
self.cache.idmap._add_git_sha(obj.id, "tree",
660
(ie.file_id, self.revid))
661
self._cache_objs.add((obj, path))
666
self.cache.content_cache.add_multi(self._cache_objs)
670
class IndexBzrGitCache(BzrGitCache):
672
def __init__(self, transport=None):
673
mapper = versionedfile.ConstantMapper("trees")
674
shamap = IndexGitShaMap(transport.clone('index'))
675
#trees_store = knit.make_file_factory(True, mapper)(transport)
676
#content_cache = VersionedFilesContentCache(trees_store)
677
from bzrlib.plugins.git.transportgit import TransportObjectStore
678
store = TransportObjectStore(transport.clone('objects'))
679
content_cache = GitObjectStoreContentCache(store)
680
super(IndexBzrGitCache, self).__init__(shamap, content_cache,
684
class IndexGitCacheFormat(BzrGitCacheFormat):
686
def get_format_string(self):
687
return 'bzr-git sha map with git object cache version 1\n'
689
def initialize(self, transport):
690
super(IndexGitCacheFormat, self).initialize(transport)
691
transport.mkdir('index')
692
transport.mkdir('objects')
693
from bzrlib.plugins.git.transportgit import TransportObjectStore
694
TransportObjectStore.init(transport.clone('objects'))
696
def open(self, transport):
697
return IndexBzrGitCache(transport)
700
class IndexGitShaMap(GitShaMap):
701
"""SHA Map that uses the Bazaar APIs to store a cache.
703
BTree Index file with the following contents:
705
("git", <sha1>) -> "<type> <type-data1> <type-data2>"
706
("commit", <revid>) -> "<sha1> <tree-id>"
707
("blob", <fileid>, <revid>) -> <sha1>
711
def __init__(self, transport=None):
712
if transport is None:
713
self._transport = None
714
self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
715
self._builder = self._index
718
self._transport = transport
719
self._index = _mod_index.CombinedGraphIndex([])
720
for name in self._transport.list_dir("."):
721
if not name.endswith(".rix"):
723
x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
724
self._transport.stat(name).st_size)
725
self._index.insert_index(0, x)
728
def from_repository(cls, repository):
729
transport = getattr(repository, "_transport", None)
730
if transport is not None:
732
transport.mkdir('git')
733
except bzrlib.errors.FileExists:
735
return cls(transport.clone('git'))
736
from bzrlib.transport import get_transport
737
return cls(get_transport(get_cache_dir()))
740
if self._transport is not None:
741
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
743
return "%s()" % (self.__class__.__name__)
746
assert self._builder is None
747
self.start_write_group()
748
for _, key, value in self._index.iter_all_entries():
749
self._builder.add_node(key, value)
751
for name in self._transport.list_dir('.'):
752
if name.endswith('.rix'):
753
to_remove.append(name)
754
self.commit_write_group()
755
del self._index.indices[1:]
756
for name in to_remove:
757
self._transport.rename(name, name + '.old')
759
def start_write_group(self):
760
assert self._builder is None
761
self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
762
self._name = osutils.sha()
764
def commit_write_group(self):
765
assert self._builder is not None
766
stream = self._builder.finish()
767
name = self._name.hexdigest() + ".rix"
768
size = self._transport.put_file(name, stream)
769
index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
770
self._index.insert_index(0, index)
774
def abort_write_group(self):
775
assert self._builder is not None
779
def _add_node(self, key, value):
781
self._builder.add_node(key, value)
782
except bzrlib.errors.BadIndexDuplicateKey:
783
# Multiple bzr objects can have the same contents
788
def _get_entry(self, key):
789
entries = self._index.iter_entries([key])
791
return entries.next()[2]
792
except StopIteration:
793
if self._builder is None:
795
entries = self._builder.iter_entries([key])
797
return entries.next()[2]
798
except StopIteration:
801
def _iter_keys_prefix(self, prefix):
802
for entry in self._index.iter_entries_prefix([prefix]):
804
if self._builder is not None:
805
for entry in self._builder.iter_entries_prefix([prefix]):
808
def lookup_commit(self, revid):
809
return self._get_entry(("commit", revid, "X"))[:40]
811
def _add_git_sha(self, hexsha, type, type_data):
812
if hexsha is not None:
813
self._name.update(hexsha)
814
self._add_node(("git", hexsha, "X"),
815
" ".join((type, type_data[0], type_data[1])))
817
# This object is not represented in Git - perhaps an empty
819
self._name.update(type + " ".join(type_data))
821
def lookup_blob_id(self, fileid, revision):
822
return self._get_entry(("blob", fileid, revision))
824
def lookup_git_sha(self, sha):
826
sha = sha_to_hex(sha)
827
data = self._get_entry(("git", sha, "X")).split(" ", 2)
828
return (data[0], (data[1], data[2]))
831
"""List the revision ids known."""
832
for key in self._iter_keys_prefix(("commit", None, None)):
835
def missing_revisions(self, revids):
836
"""Return set of all the revisions that are not present."""
837
missing_revids = set(revids)
838
for _, key, value in self._index.iter_entries((
839
("commit", revid, "X") for revid in revids)):
840
missing_revids.remove(key[1])
841
return missing_revids
844
"""List the SHA1s."""
845
for key in self._iter_keys_prefix(("git", None, None)):
849
formats = registry.Registry()
850
formats.register(TdbGitCacheFormat().get_format_string(),
852
formats.register(SqliteGitCacheFormat().get_format_string(),
853
SqliteGitCacheFormat())
854
formats.register(IndexGitCacheFormat().get_format_string(),
855
IndexGitCacheFormat())
856
# In the future, this will become the default:
857
# formats.register('default', IndexGitCacheFormat())
861
formats.register('default', SqliteGitCacheFormat())
863
formats.register('default', TdbGitCacheFormat())
867
def migrate_ancient_formats(repo_transport):
868
# Prefer migrating git.db over git.tdb, since the latter may not
869
# be openable on some platforms.
870
if repo_transport.has("git.db"):
871
SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
872
repo_transport.rename("git.db", "git/idmap.db")
873
elif repo_transport.has("git.tdb"):
874
TdbGitCacheFormat().initialize(repo_transport.clone("git"))
875
repo_transport.rename("git.tdb", "git/idmap.tdb")
878
def remove_readonly_transport_decorator(transport):
879
if transport.is_readonly():
880
return transport._decorated
884
def from_repository(repository):
885
"""Open a cache file for a repository.
887
If the repository is remote and there is no transport available from it
888
this will use a local file in the users cache directory
889
(typically ~/.cache/bazaar/git/)
891
:param repository: A repository object
893
repo_transport = getattr(repository, "_transport", None)
894
if repo_transport is not None:
895
# Migrate older cache formats
896
repo_transport = remove_readonly_transport_decorator(repo_transport)
898
repo_transport.mkdir("git")
899
except bzrlib.errors.FileExists:
902
migrate_ancient_formats(repo_transport)
903
return BzrGitCacheFormat.from_repository(repository)