/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
19
from dulwich.objects import (
20
    sha_to_hex,
21
    hex_to_sha,
22
    )
0.200.292 by Jelmer Vernooij
Fix formatting.
23
import os
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
24
import threading
0.200.292 by Jelmer Vernooij
Fix formatting.
25
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
26
from dulwich.objects import (
27
    ShaFile,
28
    )
29
0.200.228 by Jelmer Vernooij
Split out map.
30
import bzrlib
0.200.528 by Jelmer Vernooij
Fix import.
31
from bzrlib import (
0.254.2 by jelmer
use btree indexes
32
    btree_index as _mod_btree_index,
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
33
    index as _mod_index,
34
    osutils,
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
35
    registry,
0.200.528 by Jelmer Vernooij
Fix import.
36
    trace,
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
37
    versionedfile,
0.200.528 by Jelmer Vernooij
Fix import.
38
    )
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
39
from bzrlib.transport import (
40
    get_transport,
41
    )
0.200.230 by Jelmer Vernooij
Implement sha cache.
42
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
43
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
44
def get_cache_dir():
45
    try:
46
        from xdg.BaseDirectory import xdg_cache_home
47
    except ImportError:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
48
        from bzrlib.config import config_dir
49
        ret = os.path.join(config_dir(), "git")
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
50
    else:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
51
        ret = os.path.join(xdg_cache_home, "bazaar", "git")
52
    if not os.path.isdir(ret):
53
        os.makedirs(ret)
54
    return ret
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
55
56
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
57
def get_remote_cache_transport():
0.200.1027 by Jelmer Vernooij
mark remote git directories as not supporting working trees.
58
    """Retrieve the transport to use when accessing (unwritable) remote 
59
    repositories.
60
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
61
    return get_transport(get_cache_dir())
62
63
0.200.228 by Jelmer Vernooij
Split out map.
64
def check_pysqlite_version(sqlite3):
65
    """Check that sqlite library is compatible.
66
67
    """
0.200.675 by Jelmer Vernooij
Fix formatting.
68
    if (sqlite3.sqlite_version_info[0] < 3 or
69
            (sqlite3.sqlite_version_info[0] == 3 and
0.200.228 by Jelmer Vernooij
Split out map.
70
             sqlite3.sqlite_version_info[1] < 3)):
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
71
        trace.warning('Needs at least sqlite 3.3.x')
0.200.228 by Jelmer Vernooij
Split out map.
72
        raise bzrlib.errors.BzrError("incompatible sqlite library")
73
74
try:
75
    try:
76
        import sqlite3
77
        check_pysqlite_version(sqlite3)
0.200.675 by Jelmer Vernooij
Fix formatting.
78
    except (ImportError, bzrlib.errors.BzrError), e:
0.200.228 by Jelmer Vernooij
Split out map.
79
        from pysqlite2 import dbapi2 as sqlite3
80
        check_pysqlite_version(sqlite3)
81
except:
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
82
    trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
0.200.228 by Jelmer Vernooij
Split out map.
83
            'module')
84
    raise bzrlib.errors.BzrError("missing sqlite library")
85
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
86
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
87
_mapdbs = threading.local()
88
def mapdbs():
89
    """Get a cache for this thread's db connections."""
90
    try:
91
        return _mapdbs.cache
92
    except AttributeError:
93
        _mapdbs.cache = {}
94
        return _mapdbs.cache
95
96
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
97
class GitShaMap(object):
98
    """Git<->Bzr revision id mapping database."""
99
100
    def lookup_git_sha(self, sha):
101
        """Lookup a Git sha in the database.
102
        :param sha: Git object sha
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
103
        :return: list with (type, type_data) tuples with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
104
            commit: revid, tree_sha, verifiers
105
            blob: fileid, revid
106
            tree: fileid, revid
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
107
        """
108
        raise NotImplementedError(self.lookup_git_sha)
109
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
110
    def lookup_blob_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
111
        """Retrieve a Git blob SHA by file id.
112
113
        :param file_id: File id of the file/symlink
0.200.806 by Jelmer Vernooij
Make revision_hint mandatory.
114
        :param revision: revision in which the file was last changed.
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
115
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
116
        raise NotImplementedError(self.lookup_blob_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
117
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
118
    def lookup_tree_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
119
        """Retrieve a Git tree SHA by file id.
120
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
121
        raise NotImplementedError(self.lookup_tree_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
122
0.200.1039 by Jelmer Vernooij
Add stub.
123
    def lookup_commit(self, revid):
124
        """Retrieve a Git commit SHA by Bazaar revision id.
125
        """
126
        raise NotImplementedError(self.lookup_commit)
127
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
128
    def revids(self):
129
        """List the revision ids known."""
130
        raise NotImplementedError(self.revids)
131
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
132
    def missing_revisions(self, revids):
133
        """Return set of all the revisions that are not present."""
134
        present_revids = set(self.revids())
135
        if not isinstance(revids, set):
136
            revids = set(revids)
137
        return revids - present_revids
138
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
139
    def sha1s(self):
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
140
        """List the SHA1s."""
141
        raise NotImplementedError(self.sha1s)
142
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
143
    def start_write_group(self):
144
        """Start writing changes."""
145
146
    def commit_write_group(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
147
        """Commit any pending changes."""
148
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
149
    def abort_write_group(self):
150
        """Abort any pending changes."""
151
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
152
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
153
class ContentCache(object):
154
    """Object that can cache Git objects."""
155
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
156
    def add(self, object):
157
        """Add an object."""
158
        raise NotImplementedError(self.add)
159
160
    def add_multi(self, objects):
161
        """Add multiple objects."""
162
        for obj in objects:
163
            self.add(obj)
164
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
165
    def __getitem__(self, sha):
166
        """Retrieve an item, by SHA."""
167
        raise NotImplementedError(self.__getitem__)
168
169
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
170
class BzrGitCacheFormat(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
171
    """Bazaar-Git Cache Format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
172
173
    def get_format_string(self):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
174
        """Return a single-line unique format string for this cache format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
175
        raise NotImplementedError(self.get_format_string)
176
177
    def open(self, transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
178
        """Open this format on a transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
179
        raise NotImplementedError(self.open)
180
181
    def initialize(self, transport):
0.254.51 by Jelmer Vernooij
Add some docstrings.
182
        """Create a new instance of this cache format at transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
183
        transport.put_bytes('format', self.get_format_string())
184
185
    @classmethod
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
186
    def from_transport(self, transport):
187
        """Open a cache file present on a transport, or initialize one.
188
189
        :param transport: Transport to use
190
        :return: A BzrGitCache instance
191
        """
192
        try:
193
            format_name = transport.get_bytes('format')
194
            format = formats.get(format_name)
195
        except bzrlib.errors.NoSuchFile:
196
            format = formats.get('default')
197
            format.initialize(transport)
198
        return format.open(transport)
199
200
    @classmethod
201
    def from_repository(cls, repository):
202
        """Open a cache file for a repository.
203
204
        This will use the repository's transport to store the cache file, or
205
        use the users global cache directory if the repository has no 
206
        transport associated with it.
207
208
        :param repository: Repository to open the cache for
209
        :return: A `BzrGitCache`
210
        """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
211
        repo_transport = getattr(repository, "_transport", None)
212
        if repo_transport is not None:
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
213
            # Even if we don't write to this repo, we should be able 
214
            # to update its cache.
215
            repo_transport = remove_readonly_transport_decorator(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
216
            try:
217
                repo_transport.mkdir('git')
218
            except bzrlib.errors.FileExists:
219
                pass
220
            transport = repo_transport.clone('git')
221
        else:
222
            transport = get_remote_cache_transport()
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
223
        return cls.from_transport(transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
224
225
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
226
class CacheUpdater(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
227
    """Base class for objects that can update a bzr-git cache."""
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
228
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
229
    def add_object(self, obj, ie, path):
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
230
        """Add an object.
231
232
        :param obj: Object type ("commit", "blob" or "tree")
233
        :param ie: Inventory entry (for blob/tree) or testament_sha in case
234
            of commit
235
        :param path: Path of the object (optional)
236
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
237
        raise NotImplementedError(self.add_object)
238
239
    def finish(self):
240
        raise NotImplementedError(self.finish)
241
242
243
class BzrGitCache(object):
244
    """Caching backend."""
245
246
    def __init__(self, idmap, content_cache, cache_updater_klass):
247
        self.idmap = idmap
248
        self.content_cache = content_cache
249
        self._cache_updater_klass = cache_updater_klass
250
251
    def get_updater(self, rev):
0.254.51 by Jelmer Vernooij
Add some docstrings.
252
        """Update an object that implements the CacheUpdater interface for 
253
        updating this cache.
254
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
255
        return self._cache_updater_klass(self, rev)
256
257
258
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
259
260
261
class DictCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
262
    """Cache updater for dict-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
263
264
    def __init__(self, cache, rev):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
265
        self.cache = cache
266
        self.revid = rev.revision_id
267
        self.parent_revids = rev.parent_ids
268
        self._commit = None
269
        self._entries = []
270
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
271
    def add_object(self, obj, ie, path):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
272
        if obj.type_name == "commit":
273
            self._commit = obj
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
274
            assert type(ie) is dict
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
275
            key = self.revid
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
276
            type_data = (self.revid, self._commit.tree, ie)
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
277
            self.cache.idmap._by_revid[self.revid] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
278
        elif obj.type_name in ("blob", "tree"):
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
279
            if ie is not None:
280
                if obj.type_name == "blob":
281
                    revision = ie.revision
282
                else:
283
                    revision = self.revid
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
284
                key = type_data = (ie.file_id, revision)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
285
                self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
286
        else:
287
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
288
        entry = (obj.type_name, type_data)
289
        self.cache.idmap._by_sha.setdefault(obj.id, {})[key] = entry
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
290
291
    def finish(self):
292
        if self._commit is None:
293
            raise AssertionError("No commit object added")
294
        return self._commit
295
296
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
297
class DictGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
298
    """Git SHA map that uses a dictionary."""
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
299
300
    def __init__(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
301
        self._by_sha = {}
302
        self._by_fileid = {}
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
303
        self._by_revid = {}
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
304
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
305
    def lookup_blob_id(self, fileid, revision):
306
        return self._by_fileid[revision][fileid]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
307
308
    def lookup_git_sha(self, sha):
0.261.2 by Jelmer Vernooij
Fix cache tests.
309
        for entry in self._by_sha[sha].itervalues():
310
            yield entry
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
311
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
312
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
313
        return self._by_fileid[revision][fileid]
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
314
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
315
    def lookup_commit(self, revid):
316
        return self._by_revid[revid]
317
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
318
    def revids(self):
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
319
        for key, entries in self._by_sha.iteritems():
320
            for (type, type_data) in entries.values():
321
                if type == "commit":
322
                    yield type_data[0]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
323
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
324
    def sha1s(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
325
        return self._by_sha.iterkeys()
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
326
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
327
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
328
class SqliteCacheUpdater(CacheUpdater):
329
330
    def __init__(self, cache, rev):
331
        self.cache = cache
0.200.850 by Jelmer Vernooij
Fix tests.
332
        self.db = self.cache.idmap.db
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
333
        self.revid = rev.revision_id
334
        self._commit = None
335
        self._trees = []
336
        self._blobs = []
337
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
338
    def add_object(self, obj, ie, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
339
        if obj.type_name == "commit":
340
            self._commit = obj
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
341
            self._testament3_sha1 = ie.get("testament3-sha1")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
342
            assert type(ie) is dict
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
343
        elif obj.type_name == "tree":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
344
            if ie is not None:
345
                self._trees.append((obj.id, ie.file_id, self.revid))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
346
        elif obj.type_name == "blob":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
347
            if ie is not None:
348
                self._blobs.append((obj.id, ie.file_id, ie.revision))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
349
        else:
350
            raise AssertionError
351
352
    def finish(self):
353
        if self._commit is None:
354
            raise AssertionError("No commit object added")
0.200.850 by Jelmer Vernooij
Fix tests.
355
        self.db.executemany(
356
            "replace into trees (sha1, fileid, revid) values (?, ?, ?)",
357
            self._trees)
358
        self.db.executemany(
359
            "replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
360
            self._blobs)
361
        self.db.execute(
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
362
            "replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
363
            (self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
364
        return self._commit
365
366
367
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
368
369
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
370
class SqliteGitCacheFormat(BzrGitCacheFormat):
371
372
    def get_format_string(self):
373
        return 'bzr-git sha map version 1 using sqlite\n'
374
375
    def open(self, transport):
376
        try:
377
            basepath = transport.local_abspath(".")
378
        except bzrlib.errors.NotLocalUrl:
379
            basepath = get_cache_dir()
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
380
        return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
381
382
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
383
class SqliteGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
384
    """Bazaar GIT Sha map that uses a sqlite database for storage."""
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
385
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
386
    def __init__(self, path=None):
387
        self.path = path
388
        if path is None:
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
389
            self.db = sqlite3.connect(":memory:")
390
        else:
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
391
            if not mapdbs().has_key(path):
392
                mapdbs()[path] = sqlite3.connect(path)
0.200.675 by Jelmer Vernooij
Fix formatting.
393
            self.db = mapdbs()[path]
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
394
        self.db.text_factory = str
0.200.230 by Jelmer Vernooij
Implement sha cache.
395
        self.db.executescript("""
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
396
        create table if not exists commits(
397
            sha1 text not null check(length(sha1) == 40),
398
            revid text not null,
399
            tree_sha text not null check(length(tree_sha) == 40)
400
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
401
        create index if not exists commit_sha1 on commits(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
402
        create unique index if not exists commit_revid on commits(revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
403
        create table if not exists blobs(
404
            sha1 text not null check(length(sha1) == 40),
405
            fileid text not null,
406
            revid text not null
407
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
408
        create index if not exists blobs_sha1 on blobs(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
409
        create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
410
        create table if not exists trees(
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
411
            sha1 text unique not null check(length(sha1) == 40),
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
412
            fileid text not null,
413
            revid text not null
414
        );
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
415
        create unique index if not exists trees_sha1 on trees(sha1);
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
416
        create unique index if not exists trees_fileid_revid on trees(fileid, revid);
0.200.230 by Jelmer Vernooij
Implement sha cache.
417
""")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
418
        try:
419
            self.db.executescript(
420
                "ALTER TABLE commits ADD testament3_sha1 TEXT;")
421
        except sqlite3.OperationalError:
422
            pass # Column already exists.
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
423
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
424
    def __repr__(self):
425
        return "%s(%r)" % (self.__class__.__name__, self.path)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
426
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
427
    def lookup_commit(self, revid):
0.254.51 by Jelmer Vernooij
Add some docstrings.
428
        cursor = self.db.execute("select sha1 from commits where revid = ?", 
429
            (revid,))
430
        row = cursor.fetchone()
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
431
        if row is not None:
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
432
            return row[0]
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
433
        raise KeyError
0.200.231 by Jelmer Vernooij
Partially fix pull.
434
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
435
    def commit_write_group(self):
0.200.232 by Jelmer Vernooij
Fix pull from remote branches.
436
        self.db.commit()
437
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
438
    def lookup_blob_id(self, fileid, revision):
439
        row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
440
        if row is not None:
441
            return row[0]
442
        raise KeyError(fileid)
443
444
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
445
        row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
446
        if row is not None:
447
            return row[0]
448
        raise KeyError(fileid)
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
449
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
450
    def lookup_git_sha(self, sha):
451
        """Lookup a Git sha in the database.
452
453
        :param sha: Git object sha
454
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
455
            commit: revid, tree sha, verifiers
456
            tree: fileid, revid
457
            blob: fileid, revid
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
458
        """
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
459
        found = False
460
        cursor = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,))
461
        for row in cursor.fetchall():
462
            found = True
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
463
            if row[2] is not None:
464
                verifiers = {"testament3-sha1": row[2]}
465
            else:
466
                verifiers = {}
467
            yield ("commit", (row[0], row[1], verifiers))
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
468
        cursor = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,))
469
        for row in cursor.fetchall():
470
            found = True
471
            yield ("blob", row)
472
        cursor = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,))
473
        for row in cursor.fetchall():
474
            found = True
475
            yield ("tree", row)
476
        if not found:
477
            raise KeyError(sha)
0.200.230 by Jelmer Vernooij
Implement sha cache.
478
479
    def revids(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
480
        """List the revision ids known."""
0.248.7 by Jelmer Vernooij
Avoid fetching all sha1s at once.
481
        return (row for (row,) in self.db.execute("select revid from commits"))
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
482
483
    def sha1s(self):
484
        """List the SHA1s."""
485
        for table in ("blobs", "commits", "trees"):
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
486
            for (sha,) in self.db.execute("select sha1 from %s" % table):
487
                yield sha
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
488
489
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
490
class TdbCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
491
    """Cache updater for tdb-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
492
493
    def __init__(self, cache, rev):
494
        self.cache = cache
495
        self.db = cache.idmap.db
496
        self.revid = rev.revision_id
497
        self.parent_revids = rev.parent_ids
498
        self._commit = None
499
        self._entries = []
500
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
501
    def add_object(self, obj, ie, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
502
        sha = obj.sha().digest()
503
        if obj.type_name == "commit":
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
504
            self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
505
            assert type(ie) is dict, "was %r" % ie
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
506
            type_data = (self.revid, obj.tree)
507
            try:
508
                type_data += (ie["testament3-sha1"],)
509
            except KeyError:
510
                pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
511
            self._commit = obj
512
        elif obj.type_name == "blob":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
513
            if ie is None:
514
                return
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
515
            self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
516
            type_data = (ie.file_id, ie.revision)
517
        elif obj.type_name == "tree":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
518
            if ie is None:
519
                return
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
520
            type_data = (ie.file_id, self.revid)
521
        else:
522
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
523
        entry = "\0".join((obj.type_name, ) + type_data) + "\n"
524
        key = "git\0" + sha
525
        try:
526
            oldval = self.db[key]
527
        except KeyError:
528
            self.db[key] = entry
529
        else:
0.261.3 by Jelmer Vernooij
Fix more tests.
530
            if oldval[-1] != "\n":
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
531
                self.db[key] = "".join([oldval, "\n", entry])
532
            else:
533
                self.db[key] = "".join([oldval, entry])
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
534
535
    def finish(self):
536
        if self._commit is None:
537
            raise AssertionError("No commit object added")
538
        return self._commit
539
540
541
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
0.200.479 by Jelmer Vernooij
Version tdb sha map.
542
0.200.1140 by Jelmer Vernooij
Update now that the control dir formats are no longer in __init__.
543
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
544
class TdbGitCacheFormat(BzrGitCacheFormat):
0.254.51 by Jelmer Vernooij
Add some docstrings.
545
    """Cache format for tdb-based caches."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
546
547
    def get_format_string(self):
548
        return 'bzr-git sha map version 3 using tdb\n'
549
550
    def open(self, transport):
551
        try:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
552
            basepath = transport.local_abspath(".").encode(osutils._fs_enc)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
553
        except bzrlib.errors.NotLocalUrl:
554
            basepath = get_cache_dir()
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
555
        assert isinstance(basepath, str)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
556
        try:
0.200.850 by Jelmer Vernooij
Fix tests.
557
            return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
558
        except ImportError:
559
            raise ImportError(
560
                "Unable to open existing bzr-git cache because 'tdb' is not "
561
                "installed.")
562
563
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
564
class TdbGitShaMap(GitShaMap):
565
    """SHA Map that uses a TDB database.
566
567
    Entries:
568
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
569
    "git <sha1>" -> "<type> <type-data1> <type-data2>"
570
    "commit revid" -> "<sha1> <tree-id>"
0.200.477 by Jelmer Vernooij
More tests for sha maps, fix cache misses in tdb.
571
    "tree fileid revid" -> "<sha1>"
572
    "blob fileid revid" -> "<sha1>"
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
573
    """
574
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
575
    TDB_MAP_VERSION = 3
576
    TDB_HASH_SIZE = 50000
577
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
578
    def __init__(self, path=None):
579
        import tdb
580
        self.path = path
581
        if path is None:
582
            self.db = {}
583
        else:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
584
            assert isinstance(path, str)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
585
            if not mapdbs().has_key(path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
586
                mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
587
                                          os.O_RDWR|os.O_CREAT)
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
588
            self.db = mapdbs()[path]
589
        try:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
590
            if int(self.db["version"]) not in (2, 3):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
591
                trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
592
                              self.db["version"], self.TDB_MAP_VERSION)
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
593
                self.db.clear()
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
594
        except KeyError:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
595
            pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
596
        self.db["version"] = str(self.TDB_MAP_VERSION)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
597
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
598
    def start_write_group(self):
599
        """Start writing changes."""
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
600
        self.db.transaction_start()
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
601
602
    def commit_write_group(self):
603
        """Commit any pending changes."""
604
        self.db.transaction_commit()
605
606
    def abort_write_group(self):
607
        """Abort any pending changes."""
608
        self.db.transaction_cancel()
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
609
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
610
    def __repr__(self):
611
        return "%s(%r)" % (self.__class__.__name__, self.path)
612
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
613
    def lookup_commit(self, revid):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
614
        return sha_to_hex(self.db["commit\0" + revid][:20])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
615
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
616
    def lookup_blob_id(self, fileid, revision):
617
        return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
618
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
619
    def lookup_git_sha(self, sha):
620
        """Lookup a Git sha in the database.
621
622
        :param sha: Git object sha
623
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
624
            commit: revid, tree sha
625
            blob: fileid, revid
626
            tree: fileid, revid
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
627
        """
0.200.564 by Jelmer Vernooij
Accept 'binary' shas.
628
        if len(sha) == 40:
629
            sha = hex_to_sha(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
630
        value = self.db["git\0" + sha]
631
        for data in value.splitlines():
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
632
            data = data.split("\0")
633
            if data[0] == "commit":
634
                if len(data) == 3:
635
                    yield (data[0], (data[1], data[2], {}))
636
                else:
637
                    yield (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
0.261.2 by Jelmer Vernooij
Fix cache tests.
638
            elif data[0] in ("tree", "blob"):
639
                yield (data[0], tuple(data[1:]))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
640
            else:
0.261.2 by Jelmer Vernooij
Fix cache tests.
641
                raise AssertionError("unknown type %r" % data[0])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
642
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
643
    def missing_revisions(self, revids):
644
        ret = set()
645
        for revid in revids:
646
            if self.db.get("commit\0" + revid) is None:
647
                ret.add(revid)
648
        return ret
649
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
650
    def revids(self):
651
        """List the revision ids known."""
652
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
653
            if key.startswith("commit\0"):
654
                yield key[7:]
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
655
656
    def sha1s(self):
657
        """List the SHA1s."""
658
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
659
            if key.startswith("git\0"):
660
                yield sha_to_hex(key[4:])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
661
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
662
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
663
class VersionedFilesContentCache(ContentCache):
664
665
    def __init__(self, vf):
666
        self._vf = vf
667
668
    def add(self, obj):
669
        self._vf.insert_record_stream(
670
            [versionedfile.ChunkedContentFactory((obj.id,), [], None,
671
                obj.as_legacy_object_chunks())])
672
673
    def __getitem__(self, sha):
674
        stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
675
        entry = stream.next() 
676
        if entry.storage_kind == 'absent':
677
            raise KeyError(sha)
678
        return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
679
680
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
681
class GitObjectStoreContentCache(ContentCache):
682
683
    def __init__(self, store):
684
        self.store = store
685
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
686
    def add_multi(self, objs):
687
        self.store.add_objects(objs)
688
689
    def add(self, obj, path):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
690
        self.store.add_object(obj)
691
692
    def __getitem__(self, sha):
693
        return self.store[sha]
694
695
0.254.46 by Jelmer Vernooij
Merge trunk.
696
class IndexCacheUpdater(CacheUpdater):
697
698
    def __init__(self, cache, rev):
699
        self.cache = cache
700
        self.revid = rev.revision_id
701
        self.parent_revids = rev.parent_ids
702
        self._commit = None
703
        self._entries = []
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
704
        self._cache_objs = set()
0.254.46 by Jelmer Vernooij
Merge trunk.
705
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
706
    def add_object(self, obj, ie, path):
0.254.46 by Jelmer Vernooij
Merge trunk.
707
        if obj.type_name == "commit":
708
            self._commit = obj
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
709
            assert type(ie) is dict
0.254.47 by Jelmer Vernooij
Merge trunk.
710
            self.cache.idmap._add_git_sha(obj.id, "commit",
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
711
                (self.revid, obj.tree, ie))
0.254.47 by Jelmer Vernooij
Merge trunk.
712
            self.cache.idmap._add_node(("commit", self.revid, "X"),
0.254.46 by Jelmer Vernooij
Merge trunk.
713
                " ".join((obj.id, obj.tree)))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
714
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
715
        elif obj.type_name == "blob":
0.254.47 by Jelmer Vernooij
Merge trunk.
716
            self.cache.idmap._add_git_sha(obj.id, "blob",
717
                (ie.file_id, ie.revision))
718
            self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
0.254.49 by Jelmer Vernooij
Also cache full contents of symlinks.
719
            if ie.kind == "symlink":
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
720
                self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
721
        elif obj.type_name == "tree":
0.254.47 by Jelmer Vernooij
Merge trunk.
722
            self.cache.idmap._add_git_sha(obj.id, "tree",
723
                (ie.file_id, self.revid))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
724
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
725
        else:
726
            raise AssertionError
727
728
    def finish(self):
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
729
        self.cache.content_cache.add_multi(self._cache_objs)
0.254.46 by Jelmer Vernooij
Merge trunk.
730
        return self._commit
731
732
733
class IndexBzrGitCache(BzrGitCache):
734
735
    def __init__(self, transport=None):
736
        mapper = versionedfile.ConstantMapper("trees")
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
737
        shamap = IndexGitShaMap(transport.clone('index'))
738
        #trees_store = knit.make_file_factory(True, mapper)(transport)
739
        #content_cache = VersionedFilesContentCache(trees_store)
740
        from bzrlib.plugins.git.transportgit import TransportObjectStore
741
        store = TransportObjectStore(transport.clone('objects'))
742
        content_cache = GitObjectStoreContentCache(store)
743
        super(IndexBzrGitCache, self).__init__(shamap, content_cache,
0.254.47 by Jelmer Vernooij
Merge trunk.
744
                IndexCacheUpdater)
0.254.46 by Jelmer Vernooij
Merge trunk.
745
746
0.254.43 by Jelmer Vernooij
Merge trunk.
747
class IndexGitCacheFormat(BzrGitCacheFormat):
748
749
    def get_format_string(self):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
750
        return 'bzr-git sha map with git object cache version 1\n'
0.254.43 by Jelmer Vernooij
Merge trunk.
751
752
    def initialize(self, transport):
753
        super(IndexGitCacheFormat, self).initialize(transport)
754
        transport.mkdir('index')
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
755
        transport.mkdir('objects')
756
        from bzrlib.plugins.git.transportgit import TransportObjectStore
757
        TransportObjectStore.init(transport.clone('objects'))
0.254.43 by Jelmer Vernooij
Merge trunk.
758
759
    def open(self, transport):
0.254.46 by Jelmer Vernooij
Merge trunk.
760
        return IndexBzrGitCache(transport)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
761
762
763
class IndexGitShaMap(GitShaMap):
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
764
    """SHA Map that uses the Bazaar APIs to store a cache.
765
766
    BTree Index file with the following contents:
767
768
    ("git", <sha1>) -> "<type> <type-data1> <type-data2>"
769
    ("commit", <revid>) -> "<sha1> <tree-id>"
0.254.36 by Jelmer Vernooij
Merge trunk.
770
    ("blob", <fileid>, <revid>) -> <sha1>
771
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
772
    """
773
774
    def __init__(self, transport=None):
775
        if transport is None:
0.254.43 by Jelmer Vernooij
Merge trunk.
776
            self._transport = None
0.254.36 by Jelmer Vernooij
Merge trunk.
777
            self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
0.254.2 by jelmer
use btree indexes
778
            self._builder = self._index
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
779
        else:
0.254.30 by Jelmer Vernooij
Move index to separate dir.
780
            self._builder = None
0.254.43 by Jelmer Vernooij
Merge trunk.
781
            self._transport = transport
0.254.2 by jelmer
use btree indexes
782
            self._index = _mod_index.CombinedGraphIndex([])
0.254.43 by Jelmer Vernooij
Merge trunk.
783
            for name in self._transport.list_dir("."):
0.254.2 by jelmer
use btree indexes
784
                if not name.endswith(".rix"):
785
                    continue
0.254.43 by Jelmer Vernooij
Merge trunk.
786
                x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
787
                    self._transport.stat(name).st_size)
0.254.2 by jelmer
use btree indexes
788
                self._index.insert_index(0, x)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
789
790
    @classmethod
791
    def from_repository(cls, repository):
792
        transport = getattr(repository, "_transport", None)
793
        if transport is not None:
0.254.2 by jelmer
use btree indexes
794
            try:
795
                transport.mkdir('git')
796
            except bzrlib.errors.FileExists:
797
                pass
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
798
            return cls(transport.clone('git'))
799
        from bzrlib.transport import get_transport
800
        return cls(get_transport(get_cache_dir()))
801
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
802
    def __repr__(self):
803
        if self._transport is not None:
804
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
805
        else:
806
            return "%s()" % (self.__class__.__name__)
807
0.254.3 by John Arbash Meinel
Add repack function.
808
    def repack(self):
809
        assert self._builder is None
810
        self.start_write_group()
811
        for _, key, value in self._index.iter_all_entries():
812
            self._builder.add_node(key, value)
813
        to_remove = []
0.254.43 by Jelmer Vernooij
Merge trunk.
814
        for name in self._transport.list_dir('.'):
0.254.3 by John Arbash Meinel
Add repack function.
815
            if name.endswith('.rix'):
816
                to_remove.append(name)
817
        self.commit_write_group()
818
        del self._index.indices[1:]
819
        for name in to_remove:
0.254.43 by Jelmer Vernooij
Merge trunk.
820
            self._transport.rename(name, name + '.old')
0.254.3 by John Arbash Meinel
Add repack function.
821
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
822
    def start_write_group(self):
0.254.2 by jelmer
use btree indexes
823
        assert self._builder is None
0.254.36 by Jelmer Vernooij
Merge trunk.
824
        self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
825
        self._name = osutils.sha()
826
827
    def commit_write_group(self):
0.254.2 by jelmer
use btree indexes
828
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
829
        stream = self._builder.finish()
0.254.2 by jelmer
use btree indexes
830
        name = self._name.hexdigest() + ".rix"
0.254.43 by Jelmer Vernooij
Merge trunk.
831
        size = self._transport.put_file(name, stream)
832
        index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
833
        self._index.insert_index(0, index)
834
        self._builder = None
835
        self._name = None
836
837
    def abort_write_group(self):
0.254.2 by jelmer
use btree indexes
838
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
839
        self._builder = None
840
        self._name = None
841
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
842
    def _add_node(self, key, value):
843
        try:
844
            self._builder.add_node(key, value)
845
        except bzrlib.errors.BadIndexDuplicateKey:
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
846
            # Multiple bzr objects can have the same contents
847
            return True
848
        else:
849
            return False
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
850
0.254.2 by jelmer
use btree indexes
851
    def _get_entry(self, key):
852
        entries = self._index.iter_entries([key])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
853
        try:
0.254.2 by jelmer
use btree indexes
854
            return entries.next()[2]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
855
        except StopIteration:
0.254.2 by jelmer
use btree indexes
856
            if self._builder is None:
857
                raise KeyError
858
            entries = self._builder.iter_entries([key])
859
            try:
860
                return entries.next()[2]
861
            except StopIteration:
862
                raise KeyError
863
0.261.2 by Jelmer Vernooij
Fix cache tests.
864
    def _iter_entries_prefix(self, prefix):
0.254.2 by jelmer
use btree indexes
865
        for entry in self._index.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
866
            yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
867
        if self._builder is not None:
868
            for entry in self._builder.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
869
                yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
870
871
    def lookup_commit(self, revid):
0.254.36 by Jelmer Vernooij
Merge trunk.
872
        return self._get_entry(("commit", revid, "X"))[:40]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
873
0.254.33 by Jelmer Vernooij
Merge trunk.
874
    def _add_git_sha(self, hexsha, type, type_data):
0.254.2 by jelmer
use btree indexes
875
        if hexsha is not None:
876
            self._name.update(hexsha)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
877
            if type == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
878
                td = (type_data[0], type_data[1])
879
                try:
880
                    td += (type_data[2]["testament3-sha1"],)
881
                except KeyError:
882
                    pass
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
883
            else:
884
                td = type_data
885
            self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
0.254.2 by jelmer
use btree indexes
886
        else:
887
            # This object is not represented in Git - perhaps an empty
888
            # directory?
889
            self._name.update(type + " ".join(type_data))
0.254.33 by Jelmer Vernooij
Merge trunk.
890
0.254.42 by Jelmer Vernooij
Merge trunk.
891
    def lookup_blob_id(self, fileid, revision):
892
        return self._get_entry(("blob", fileid, revision))
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
893
894
    def lookup_git_sha(self, sha):
895
        if len(sha) == 20:
896
            sha = sha_to_hex(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
897
        found = False
898
        for key, value in self._iter_entries_prefix(("git", sha, None)):
899
            found = True
900
            data = value.split(" ", 3)
901
            if data[0] == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
902
                if data[3]:
903
                    verifiers = {"testament3-sha1": data[3]}
904
                else:
905
                    verifiers = {}
906
                yield ("commit", (data[1], data[2], verifiers))
0.261.2 by Jelmer Vernooij
Fix cache tests.
907
            else:
908
                yield (data[0], tuple(data[1:]))
909
        if not found:
910
            raise KeyError(sha)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
911
912
    def revids(self):
913
        """List the revision ids known."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
914
        for key, value in self._iter_entries_prefix(("commit", None, None)):
0.254.2 by jelmer
use btree indexes
915
            yield key[1]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
916
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
917
    def missing_revisions(self, revids):
918
        """Return set of all the revisions that are not present."""
919
        missing_revids = set(revids)
920
        for _, key, value in self._index.iter_entries((
0.254.37 by Jelmer Vernooij
merge trunk
921
            ("commit", revid, "X") for revid in revids)):
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
922
            missing_revids.remove(key[1])
923
        return missing_revids
924
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
925
    def sha1s(self):
926
        """List the SHA1s."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
927
        for key, value in self._iter_entries_prefix(("git", None, None)):
0.254.2 by jelmer
use btree indexes
928
            yield key[1]
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
929
930
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
931
formats = registry.Registry()
932
formats.register(TdbGitCacheFormat().get_format_string(),
933
    TdbGitCacheFormat())
934
formats.register(SqliteGitCacheFormat().get_format_string(),
935
    SqliteGitCacheFormat())
0.254.43 by Jelmer Vernooij
Merge trunk.
936
formats.register(IndexGitCacheFormat().get_format_string(),
937
    IndexGitCacheFormat())
0.200.951 by Jelmer Vernooij
merge support for git object store-based caching mechanism.
938
# In the future, this will become the default:
939
# formats.register('default', IndexGitCacheFormat())
940
try:
941
    import tdb
942
except ImportError:
943
    formats.register('default', SqliteGitCacheFormat())
944
else:
945
    formats.register('default', TdbGitCacheFormat())
946
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
947
948
949
def migrate_ancient_formats(repo_transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
950
    # Prefer migrating git.db over git.tdb, since the latter may not 
951
    # be openable on some platforms.
952
    if repo_transport.has("git.db"):
953
        SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
954
        repo_transport.rename("git.db", "git/idmap.db")
955
    elif repo_transport.has("git.tdb"):
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
956
        TdbGitCacheFormat().initialize(repo_transport.clone("git"))
957
        repo_transport.rename("git.tdb", "git/idmap.tdb")
958
959
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
960
def remove_readonly_transport_decorator(transport):
961
    if transport.is_readonly():
962
        return transport._decorated
963
    return transport
964
965
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
966
def from_repository(repository):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
967
    """Open a cache file for a repository.
968
969
    If the repository is remote and there is no transport available from it
970
    this will use a local file in the users cache directory
971
    (typically ~/.cache/bazaar/git/)
972
973
    :param repository: A repository object
974
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
975
    repo_transport = getattr(repository, "_transport", None)
976
    if repo_transport is not None:
977
        # Migrate older cache formats
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
978
        repo_transport = remove_readonly_transport_decorator(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
979
        try:
980
            repo_transport.mkdir("git")
981
        except bzrlib.errors.FileExists:
982
            pass
983
        else:
984
            migrate_ancient_formats(repo_transport)
985
    return BzrGitCacheFormat.from_repository(repository)