/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
19
from dulwich.objects import (
20
    sha_to_hex,
21
    hex_to_sha,
22
    )
0.200.292 by Jelmer Vernooij
Fix formatting.
23
import os
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
24
import threading
0.200.292 by Jelmer Vernooij
Fix formatting.
25
0.200.228 by Jelmer Vernooij
Split out map.
26
import bzrlib
0.200.528 by Jelmer Vernooij
Fix import.
27
from bzrlib import (
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
28
    registry,
0.200.528 by Jelmer Vernooij
Fix import.
29
    trace,
30
    )
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
31
from bzrlib.transport import (
32
    get_transport,
33
    )
0.200.230 by Jelmer Vernooij
Implement sha cache.
34
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
35
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
36
def get_cache_dir():
37
    try:
38
        from xdg.BaseDirectory import xdg_cache_home
39
    except ImportError:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
40
        from bzrlib.config import config_dir
41
        ret = os.path.join(config_dir(), "git")
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
42
    else:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
43
        ret = os.path.join(xdg_cache_home, "bazaar", "git")
44
    if not os.path.isdir(ret):
45
        os.makedirs(ret)
46
    return ret
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
47
48
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
49
def get_remote_cache_transport():
50
    return get_transport(get_cache_dir())
51
52
0.200.228 by Jelmer Vernooij
Split out map.
53
def check_pysqlite_version(sqlite3):
54
    """Check that sqlite library is compatible.
55
56
    """
0.200.675 by Jelmer Vernooij
Fix formatting.
57
    if (sqlite3.sqlite_version_info[0] < 3 or
58
            (sqlite3.sqlite_version_info[0] == 3 and
0.200.228 by Jelmer Vernooij
Split out map.
59
             sqlite3.sqlite_version_info[1] < 3)):
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
60
        trace.warning('Needs at least sqlite 3.3.x')
0.200.228 by Jelmer Vernooij
Split out map.
61
        raise bzrlib.errors.BzrError("incompatible sqlite library")
62
63
try:
64
    try:
65
        import sqlite3
66
        check_pysqlite_version(sqlite3)
0.200.675 by Jelmer Vernooij
Fix formatting.
67
    except (ImportError, bzrlib.errors.BzrError), e:
0.200.228 by Jelmer Vernooij
Split out map.
68
        from pysqlite2 import dbapi2 as sqlite3
69
        check_pysqlite_version(sqlite3)
70
except:
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
71
    trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
0.200.228 by Jelmer Vernooij
Split out map.
72
            'module')
73
    raise bzrlib.errors.BzrError("missing sqlite library")
74
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
75
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
76
_mapdbs = threading.local()
77
def mapdbs():
78
    """Get a cache for this thread's db connections."""
79
    try:
80
        return _mapdbs.cache
81
    except AttributeError:
82
        _mapdbs.cache = {}
83
        return _mapdbs.cache
84
85
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
86
class GitShaMap(object):
87
    """Git<->Bzr revision id mapping database."""
88
89
    def lookup_git_sha(self, sha):
90
        """Lookup a Git sha in the database.
91
        :param sha: Git object sha
92
        :return: (type, type_data) with type_data:
93
            revision: revid, tree sha
94
        """
95
        raise NotImplementedError(self.lookup_git_sha)
96
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
97
    def lookup_blob_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
98
        """Retrieve a Git blob SHA by file id.
99
100
        :param file_id: File id of the file/symlink
0.200.806 by Jelmer Vernooij
Make revision_hint mandatory.
101
        :param revision: revision in which the file was last changed.
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
102
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
103
        raise NotImplementedError(self.lookup_blob_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
104
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
105
    def lookup_tree_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
106
        """Retrieve a Git tree SHA by file id.
107
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
108
        raise NotImplementedError(self.lookup_tree_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
109
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
110
    def revids(self):
111
        """List the revision ids known."""
112
        raise NotImplementedError(self.revids)
113
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
114
    def missing_revisions(self, revids):
115
        """Return set of all the revisions that are not present."""
116
        present_revids = set(self.revids())
117
        if not isinstance(revids, set):
118
            revids = set(revids)
119
        return revids - present_revids
120
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
121
    def sha1s(self):
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
122
        """List the SHA1s."""
123
        raise NotImplementedError(self.sha1s)
124
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
125
    def start_write_group(self):
126
        """Start writing changes."""
127
128
    def commit_write_group(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
129
        """Commit any pending changes."""
130
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
131
    def abort_write_group(self):
132
        """Abort any pending changes."""
133
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
134
0.200.845 by Jelmer Vernooij
Couple of minor fixes.
135
class ContentCache(object):
136
    """Object that can cache Git objects."""
137
138
    def __getitem__(self, sha):
139
        """Retrieve an item, by SHA."""
140
        raise NotImplementedError(self.__getitem__)
141
142
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
143
class BzrGitCacheFormat(object):
144
145
    def get_format_string(self):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
146
        """Return a single-line unique format string for this cache format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
147
        raise NotImplementedError(self.get_format_string)
148
149
    def open(self, transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
150
        """Open this format on a transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
151
        raise NotImplementedError(self.open)
152
153
    def initialize(self, transport):
154
        transport.put_bytes('format', self.get_format_string())
155
156
    @classmethod
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
157
    def from_transport(self, transport):
158
        """Open a cache file present on a transport, or initialize one.
159
160
        :param transport: Transport to use
161
        :return: A BzrGitCache instance
162
        """
163
        try:
164
            format_name = transport.get_bytes('format')
165
            format = formats.get(format_name)
166
        except bzrlib.errors.NoSuchFile:
167
            format = formats.get('default')
168
            format.initialize(transport)
169
        return format.open(transport)
170
171
    @classmethod
172
    def from_repository(cls, repository):
173
        """Open a cache file for a repository.
174
175
        This will use the repository's transport to store the cache file, or
176
        use the users global cache directory if the repository has no 
177
        transport associated with it.
178
179
        :param repository: Repository to open the cache for
180
        :return: A `BzrGitCache`
181
        """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
182
        repo_transport = getattr(repository, "_transport", None)
183
        if repo_transport is not None:
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
184
            # Even if we don't write to this repo, we should be able 
185
            # to update its cache.
186
            repo_transport = remove_readonly_transport_decorator(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
187
            try:
188
                repo_transport.mkdir('git')
189
            except bzrlib.errors.FileExists:
190
                pass
191
            transport = repo_transport.clone('git')
192
        else:
193
            transport = get_remote_cache_transport()
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
194
        return cls.from_transport(transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
195
196
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
197
class CacheUpdater(object):
198
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
199
    def add_object(self, obj, ie):
200
        raise NotImplementedError(self.add_object)
201
202
    def finish(self):
203
        raise NotImplementedError(self.finish)
204
205
206
class BzrGitCache(object):
207
    """Caching backend."""
208
209
    def __init__(self, idmap, content_cache, cache_updater_klass):
210
        self.idmap = idmap
211
        self.content_cache = content_cache
212
        self._cache_updater_klass = cache_updater_klass
213
214
    def get_updater(self, rev):
215
        return self._cache_updater_klass(self, rev)
216
217
218
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
219
220
221
class DictCacheUpdater(CacheUpdater):
222
223
    def __init__(self, cache, rev):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
224
        self.cache = cache
225
        self.revid = rev.revision_id
226
        self.parent_revids = rev.parent_ids
227
        self._commit = None
228
        self._entries = []
229
230
    def add_object(self, obj, ie):
231
        if obj.type_name == "commit":
232
            self._commit = obj
233
            assert ie is None
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
234
            type_data = (self.revid, self._commit.tree)
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
235
            self.cache.idmap._by_revid[self.revid] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
236
        elif obj.type_name in ("blob", "tree"):
237
            if obj.type_name == "blob":
238
                revision = ie.revision
239
            else:
240
                revision = self.revid
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
241
            type_data = (ie.file_id, revision)
0.200.850 by Jelmer Vernooij
Fix tests.
242
            self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
243
        else:
244
            raise AssertionError
0.200.850 by Jelmer Vernooij
Fix tests.
245
        self.cache.idmap._by_sha[obj.id] = (obj.type_name, type_data)
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
246
247
    def finish(self):
248
        if self._commit is None:
249
            raise AssertionError("No commit object added")
250
        return self._commit
251
252
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
253
class DictGitShaMap(GitShaMap):
254
255
    def __init__(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
256
        self._by_sha = {}
257
        self._by_fileid = {}
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
258
        self._by_revid = {}
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
259
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
260
    def lookup_blob_id(self, fileid, revision):
261
        return self._by_fileid[revision][fileid]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
262
263
    def lookup_git_sha(self, sha):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
264
        return self._by_sha[sha]
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
265
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
266
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
267
        return self._by_fileid[revision][fileid]
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
268
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
269
    def lookup_commit(self, revid):
270
        return self._by_revid[revid]
271
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
272
    def revids(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
273
        for key, (type, type_data) in self._by_sha.iteritems():
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
274
            if type == "commit":
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
275
                yield type_data[0]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
276
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
277
    def sha1s(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
278
        return self._by_sha.iterkeys()
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
279
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
280
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
281
class SqliteCacheUpdater(CacheUpdater):
282
283
    def __init__(self, cache, rev):
284
        self.cache = cache
0.200.850 by Jelmer Vernooij
Fix tests.
285
        self.db = self.cache.idmap.db
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
286
        self.revid = rev.revision_id
287
        self._commit = None
288
        self._trees = []
289
        self._blobs = []
290
291
    def add_object(self, obj, ie):
292
        if obj.type_name == "commit":
293
            self._commit = obj
294
            assert ie is None
295
        elif obj.type_name == "tree":
296
            self._trees.append((obj.id, ie.file_id, self.revid))
297
        elif obj.type_name == "blob":
298
            self._blobs.append((obj.id, ie.file_id, ie.revision))
299
        else:
300
            raise AssertionError
301
302
    def finish(self):
303
        if self._commit is None:
304
            raise AssertionError("No commit object added")
0.200.850 by Jelmer Vernooij
Fix tests.
305
        self.db.executemany(
306
            "replace into trees (sha1, fileid, revid) values (?, ?, ?)",
307
            self._trees)
308
        self.db.executemany(
309
            "replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
310
            self._blobs)
311
        self.db.execute(
312
            "replace into commits (sha1, revid, tree_sha) values (?, ?, ?)",
313
            (self._commit.id, self.revid, self._commit.tree))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
314
        return self._commit
315
316
317
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
318
319
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
320
class SqliteGitCacheFormat(BzrGitCacheFormat):
321
322
    def get_format_string(self):
323
        return 'bzr-git sha map version 1 using sqlite\n'
324
325
    def open(self, transport):
326
        try:
327
            basepath = transport.local_abspath(".")
328
        except bzrlib.errors.NotLocalUrl:
329
            basepath = get_cache_dir()
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
330
        return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
331
332
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
333
class SqliteGitShaMap(GitShaMap):
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
334
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
335
    def __init__(self, path=None):
336
        self.path = path
337
        if path is None:
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
338
            self.db = sqlite3.connect(":memory:")
339
        else:
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
340
            if not mapdbs().has_key(path):
341
                mapdbs()[path] = sqlite3.connect(path)
0.200.675 by Jelmer Vernooij
Fix formatting.
342
            self.db = mapdbs()[path]
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
343
        self.db.text_factory = str
0.200.230 by Jelmer Vernooij
Implement sha cache.
344
        self.db.executescript("""
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
345
        create table if not exists commits(
346
            sha1 text not null check(length(sha1) == 40),
347
            revid text not null,
348
            tree_sha text not null check(length(tree_sha) == 40)
349
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
350
        create index if not exists commit_sha1 on commits(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
351
        create unique index if not exists commit_revid on commits(revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
352
        create table if not exists blobs(
353
            sha1 text not null check(length(sha1) == 40),
354
            fileid text not null,
355
            revid text not null
356
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
357
        create index if not exists blobs_sha1 on blobs(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
358
        create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
359
        create table if not exists trees(
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
360
            sha1 text unique not null check(length(sha1) == 40),
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
361
            fileid text not null,
362
            revid text not null
363
        );
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
364
        create unique index if not exists trees_sha1 on trees(sha1);
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
365
        create unique index if not exists trees_fileid_revid on trees(fileid, revid);
0.200.230 by Jelmer Vernooij
Implement sha cache.
366
""")
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
367
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
368
    def __repr__(self):
369
        return "%s(%r)" % (self.__class__.__name__, self.path)
370
    
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
371
    def lookup_commit(self, revid):
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
372
        row = self.db.execute("select sha1 from commits where revid = ?", (revid,)).fetchone()
373
        if row is not None:
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
374
            return row[0]
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
375
        raise KeyError
0.200.231 by Jelmer Vernooij
Partially fix pull.
376
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
377
    def commit_write_group(self):
0.200.232 by Jelmer Vernooij
Fix pull from remote branches.
378
        self.db.commit()
379
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
380
    def lookup_blob_id(self, fileid, revision):
381
        row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
382
        if row is not None:
383
            return row[0]
384
        raise KeyError(fileid)
385
386
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
387
        row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
388
        if row is not None:
389
            return row[0]
390
        raise KeyError(fileid)
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
391
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
392
    def lookup_git_sha(self, sha):
393
        """Lookup a Git sha in the database.
394
395
        :param sha: Git object sha
396
        :return: (type, type_data) with type_data:
397
            revision: revid, tree sha
398
        """
0.200.230 by Jelmer Vernooij
Implement sha cache.
399
        row = self.db.execute("select revid, tree_sha from commits where sha1 = ?", (sha,)).fetchone()
400
        if row is not None:
0.200.845 by Jelmer Vernooij
Couple of minor fixes.
401
            return ("commit", row)
0.200.230 by Jelmer Vernooij
Implement sha cache.
402
        row = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,)).fetchone()
403
        if row is not None:
0.200.845 by Jelmer Vernooij
Couple of minor fixes.
404
            return ("blob", row)
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
405
        row = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,)).fetchone()
0.200.230 by Jelmer Vernooij
Implement sha cache.
406
        if row is not None:
0.200.845 by Jelmer Vernooij
Couple of minor fixes.
407
            return ("tree", row)
0.200.230 by Jelmer Vernooij
Implement sha cache.
408
        raise KeyError(sha)
409
410
    def revids(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
411
        """List the revision ids known."""
0.248.7 by Jelmer Vernooij
Avoid fetching all sha1s at once.
412
        return (row for (row,) in self.db.execute("select revid from commits"))
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
413
414
    def sha1s(self):
415
        """List the SHA1s."""
416
        for table in ("blobs", "commits", "trees"):
0.200.845 by Jelmer Vernooij
Couple of minor fixes.
417
            for (sha,) in self.db.execute("select sha1 from %s" % table):
418
                yield sha
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
419
420
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
421
class TdbCacheUpdater(CacheUpdater):
422
423
    def __init__(self, cache, rev):
424
        self.cache = cache
425
        self.db = cache.idmap.db
426
        self.revid = rev.revision_id
427
        self.parent_revids = rev.parent_ids
428
        self._commit = None
429
        self._entries = []
430
431
    def add_object(self, obj, ie):
432
        sha = obj.sha().digest()
433
        if obj.type_name == "commit":
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
434
            self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
435
            type_data = (self.revid, obj.tree)
436
            self._commit = obj
437
            assert ie is None
438
        elif obj.type_name == "blob":
439
            self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
440
            type_data = (ie.file_id, ie.revision)
441
        elif obj.type_name == "tree":
442
            type_data = (ie.file_id, self.revid)
443
        else:
444
            raise AssertionError
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
445
        self.db["git\0" + sha] = "\0".join((obj.type_name, ) + type_data)
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
446
447
    def finish(self):
448
        if self._commit is None:
449
            raise AssertionError("No commit object added")
450
        return self._commit
451
452
453
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
0.200.479 by Jelmer Vernooij
Version tdb sha map.
454
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
455
class TdbGitCacheFormat(BzrGitCacheFormat):
456
457
    def get_format_string(self):
458
        return 'bzr-git sha map version 3 using tdb\n'
459
460
    def open(self, transport):
461
        try:
462
            basepath = transport.local_abspath(".")
463
        except bzrlib.errors.NotLocalUrl:
464
            basepath = get_cache_dir()
465
        try:
0.200.850 by Jelmer Vernooij
Fix tests.
466
            return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
467
        except ImportError:
468
            raise ImportError(
469
                "Unable to open existing bzr-git cache because 'tdb' is not "
470
                "installed.")
471
472
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
473
class TdbGitShaMap(GitShaMap):
474
    """SHA Map that uses a TDB database.
475
476
    Entries:
477
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
478
    "git <sha1>" -> "<type> <type-data1> <type-data2>"
479
    "commit revid" -> "<sha1> <tree-id>"
0.200.477 by Jelmer Vernooij
More tests for sha maps, fix cache misses in tdb.
480
    "tree fileid revid" -> "<sha1>"
481
    "blob fileid revid" -> "<sha1>"
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
482
    """
483
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
484
    TDB_MAP_VERSION = 3
485
    TDB_HASH_SIZE = 50000
486
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
487
    def __init__(self, path=None):
488
        import tdb
489
        self.path = path
490
        if path is None:
491
            self.db = {}
492
        else:
493
            if not mapdbs().has_key(path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
494
                mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
495
                                          os.O_RDWR|os.O_CREAT)
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
496
            self.db = mapdbs()[path]
497
        try:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
498
            if int(self.db["version"]) not in (2, 3):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
499
                trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
500
                              self.db["version"], self.TDB_MAP_VERSION)
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
501
                self.db.clear()
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
502
        except KeyError:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
503
            pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
504
        self.db["version"] = str(self.TDB_MAP_VERSION)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
505
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
506
    def start_write_group(self):
507
        """Start writing changes."""
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
508
        self.db.transaction_start()
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
509
510
    def commit_write_group(self):
511
        """Commit any pending changes."""
512
        self.db.transaction_commit()
513
514
    def abort_write_group(self):
515
        """Abort any pending changes."""
516
        self.db.transaction_cancel()
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
517
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
518
    def __repr__(self):
519
        return "%s(%r)" % (self.__class__.__name__, self.path)
520
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
521
    def lookup_commit(self, revid):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
522
        return sha_to_hex(self.db["commit\0" + revid][:20])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
523
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
524
    def lookup_blob_id(self, fileid, revision):
525
        return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
526
                
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
527
    def lookup_git_sha(self, sha):
528
        """Lookup a Git sha in the database.
529
530
        :param sha: Git object sha
531
        :return: (type, type_data) with type_data:
532
            revision: revid, tree sha
533
        """
0.200.564 by Jelmer Vernooij
Accept 'binary' shas.
534
        if len(sha) == 40:
535
            sha = hex_to_sha(sha)
536
        data = self.db["git\0" + sha].split("\0")
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
537
        return (data[0], (data[1], data[2]))
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
538
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
539
    def missing_revisions(self, revids):
540
        ret = set()
541
        for revid in revids:
542
            if self.db.get("commit\0" + revid) is None:
543
                ret.add(revid)
544
        return ret
545
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
546
    def revids(self):
547
        """List the revision ids known."""
548
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
549
            if key.startswith("commit\0"):
550
                yield key[7:]
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
551
552
    def sha1s(self):
553
        """List the SHA1s."""
554
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
555
            if key.startswith("git\0"):
556
                yield sha_to_hex(key[4:])
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
557
558
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
559
formats = registry.Registry()
560
formats.register(TdbGitCacheFormat().get_format_string(),
561
    TdbGitCacheFormat())
562
formats.register(SqliteGitCacheFormat().get_format_string(),
563
    SqliteGitCacheFormat())
564
try:
565
    import tdb
566
except ImportError:
567
    formats.register('default', SqliteGitCacheFormat())
568
else:
569
    formats.register('default', TdbGitCacheFormat())
570
571
572
def migrate_ancient_formats(repo_transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
573
    # Prefer migrating git.db over git.tdb, since the latter may not 
574
    # be openable on some platforms.
575
    if repo_transport.has("git.db"):
576
        SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
577
        repo_transport.rename("git.db", "git/idmap.db")
578
    elif repo_transport.has("git.tdb"):
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
579
        TdbGitCacheFormat().initialize(repo_transport.clone("git"))
580
        repo_transport.rename("git.tdb", "git/idmap.tdb")
581
582
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
583
def remove_readonly_transport_decorator(transport):
584
    if transport.is_readonly():
585
        return transport._decorated
586
    return transport
587
588
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
589
def from_repository(repository):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
590
    """Open a cache file for a repository.
591
592
    If the repository is remote and there is no transport available from it
593
    this will use a local file in the users cache directory
594
    (typically ~/.cache/bazaar/git/)
595
596
    :param repository: A repository object
597
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
598
    repo_transport = getattr(repository, "_transport", None)
599
    if repo_transport is not None:
600
        # Migrate older cache formats
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
601
        repo_transport = remove_readonly_transport_decorator(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
602
        try:
603
            repo_transport.mkdir("git")
604
        except bzrlib.errors.FileExists:
605
            pass
606
        else:
607
            migrate_ancient_formats(repo_transport)
608
    return BzrGitCacheFormat.from_repository(repository)