/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
19
from dulwich.objects import (
20
    sha_to_hex,
21
    hex_to_sha,
22
    )
0.200.292 by Jelmer Vernooij
Fix formatting.
23
import os
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
24
import threading
0.200.292 by Jelmer Vernooij
Fix formatting.
25
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
26
from dulwich.objects import (
27
    ShaFile,
28
    )
29
0.200.228 by Jelmer Vernooij
Split out map.
30
import bzrlib
0.200.528 by Jelmer Vernooij
Fix import.
31
from bzrlib import (
0.254.2 by jelmer
use btree indexes
32
    btree_index as _mod_btree_index,
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
33
    index as _mod_index,
34
    osutils,
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
35
    registry,
0.200.528 by Jelmer Vernooij
Fix import.
36
    trace,
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
37
    versionedfile,
0.200.528 by Jelmer Vernooij
Fix import.
38
    )
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
39
from bzrlib.transport import (
40
    get_transport,
41
    )
0.200.230 by Jelmer Vernooij
Implement sha cache.
42
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
43
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
44
def get_cache_dir():
45
    try:
46
        from xdg.BaseDirectory import xdg_cache_home
47
    except ImportError:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
48
        from bzrlib.config import config_dir
49
        ret = os.path.join(config_dir(), "git")
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
50
    else:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
51
        ret = os.path.join(xdg_cache_home, "bazaar", "git")
52
    if not os.path.isdir(ret):
53
        os.makedirs(ret)
54
    return ret
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
55
56
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
57
def get_remote_cache_transport(repository):
0.200.1027 by Jelmer Vernooij
mark remote git directories as not supporting working trees.
58
    """Retrieve the transport to use when accessing (unwritable) remote 
59
    repositories.
60
    """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
61
    uuid = getattr(repository, "uuid", None)
62
    if uuid is None:
63
        path = get_cache_dir()
64
    else:
65
        path = os.path.join(get_cache_dir(), uuid)
66
        if not os.path.isdir(path):
67
            os.mkdir(path)
68
    return get_transport(path)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
69
70
0.200.228 by Jelmer Vernooij
Split out map.
71
def check_pysqlite_version(sqlite3):
72
    """Check that sqlite library is compatible.
73
74
    """
0.200.675 by Jelmer Vernooij
Fix formatting.
75
    if (sqlite3.sqlite_version_info[0] < 3 or
76
            (sqlite3.sqlite_version_info[0] == 3 and
0.200.228 by Jelmer Vernooij
Split out map.
77
             sqlite3.sqlite_version_info[1] < 3)):
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
78
        trace.warning('Needs at least sqlite 3.3.x')
0.200.228 by Jelmer Vernooij
Split out map.
79
        raise bzrlib.errors.BzrError("incompatible sqlite library")
80
81
try:
82
    try:
83
        import sqlite3
84
        check_pysqlite_version(sqlite3)
0.200.675 by Jelmer Vernooij
Fix formatting.
85
    except (ImportError, bzrlib.errors.BzrError), e:
0.200.228 by Jelmer Vernooij
Split out map.
86
        from pysqlite2 import dbapi2 as sqlite3
87
        check_pysqlite_version(sqlite3)
88
except:
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
89
    trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
0.200.228 by Jelmer Vernooij
Split out map.
90
            'module')
91
    raise bzrlib.errors.BzrError("missing sqlite library")
92
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
93
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
94
_mapdbs = threading.local()
95
def mapdbs():
96
    """Get a cache for this thread's db connections."""
97
    try:
98
        return _mapdbs.cache
99
    except AttributeError:
100
        _mapdbs.cache = {}
101
        return _mapdbs.cache
102
103
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
104
class GitShaMap(object):
105
    """Git<->Bzr revision id mapping database."""
106
107
    def lookup_git_sha(self, sha):
108
        """Lookup a Git sha in the database.
109
        :param sha: Git object sha
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
110
        :return: list with (type, type_data) tuples with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
111
            commit: revid, tree_sha, verifiers
112
            blob: fileid, revid
113
            tree: fileid, revid
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
114
        """
115
        raise NotImplementedError(self.lookup_git_sha)
116
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
117
    def lookup_blob_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
118
        """Retrieve a Git blob SHA by file id.
119
120
        :param file_id: File id of the file/symlink
0.200.806 by Jelmer Vernooij
Make revision_hint mandatory.
121
        :param revision: revision in which the file was last changed.
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
122
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
123
        raise NotImplementedError(self.lookup_blob_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
124
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
125
    def lookup_tree_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
126
        """Retrieve a Git tree SHA by file id.
127
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
128
        raise NotImplementedError(self.lookup_tree_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
129
0.200.1039 by Jelmer Vernooij
Add stub.
130
    def lookup_commit(self, revid):
131
        """Retrieve a Git commit SHA by Bazaar revision id.
132
        """
133
        raise NotImplementedError(self.lookup_commit)
134
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
135
    def revids(self):
136
        """List the revision ids known."""
137
        raise NotImplementedError(self.revids)
138
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
139
    def missing_revisions(self, revids):
140
        """Return set of all the revisions that are not present."""
141
        present_revids = set(self.revids())
142
        if not isinstance(revids, set):
143
            revids = set(revids)
144
        return revids - present_revids
145
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
146
    def sha1s(self):
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
147
        """List the SHA1s."""
148
        raise NotImplementedError(self.sha1s)
149
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
150
    def start_write_group(self):
151
        """Start writing changes."""
152
153
    def commit_write_group(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
154
        """Commit any pending changes."""
155
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
156
    def abort_write_group(self):
157
        """Abort any pending changes."""
158
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
159
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
160
class ContentCache(object):
161
    """Object that can cache Git objects."""
162
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
163
    def add(self, object):
164
        """Add an object."""
165
        raise NotImplementedError(self.add)
166
167
    def add_multi(self, objects):
168
        """Add multiple objects."""
169
        for obj in objects:
170
            self.add(obj)
171
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
172
    def __getitem__(self, sha):
173
        """Retrieve an item, by SHA."""
174
        raise NotImplementedError(self.__getitem__)
175
176
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
177
class BzrGitCacheFormat(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
178
    """Bazaar-Git Cache Format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
179
180
    def get_format_string(self):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
181
        """Return a single-line unique format string for this cache format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
182
        raise NotImplementedError(self.get_format_string)
183
184
    def open(self, transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
185
        """Open this format on a transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
186
        raise NotImplementedError(self.open)
187
188
    def initialize(self, transport):
0.254.51 by Jelmer Vernooij
Add some docstrings.
189
        """Create a new instance of this cache format at transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
190
        transport.put_bytes('format', self.get_format_string())
191
192
    @classmethod
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
193
    def from_transport(self, transport):
194
        """Open a cache file present on a transport, or initialize one.
195
196
        :param transport: Transport to use
197
        :return: A BzrGitCache instance
198
        """
199
        try:
200
            format_name = transport.get_bytes('format')
201
            format = formats.get(format_name)
202
        except bzrlib.errors.NoSuchFile:
203
            format = formats.get('default')
204
            format.initialize(transport)
205
        return format.open(transport)
206
207
    @classmethod
208
    def from_repository(cls, repository):
209
        """Open a cache file for a repository.
210
211
        This will use the repository's transport to store the cache file, or
212
        use the users global cache directory if the repository has no 
213
        transport associated with it.
214
215
        :param repository: Repository to open the cache for
216
        :return: A `BzrGitCache`
217
        """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
218
        from bzrlib.transport.local import LocalTransport
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
219
        repo_transport = getattr(repository, "_transport", None)
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
220
        if repo_transport is not None and isinstance(repo_transport, LocalTransport):
221
            # Even if we don't write to this repo, we should be able
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
222
            # to update its cache.
223
            repo_transport = remove_readonly_transport_decorator(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
224
            try:
225
                repo_transport.mkdir('git')
226
            except bzrlib.errors.FileExists:
227
                pass
228
            transport = repo_transport.clone('git')
229
        else:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
230
            transport = get_remote_cache_transport(repository)
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
231
        return cls.from_transport(transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
232
233
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
234
class CacheUpdater(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
235
    """Base class for objects that can update a bzr-git cache."""
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
236
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
237
    def add_object(self, obj, ie, path):
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
238
        """Add an object.
239
240
        :param obj: Object type ("commit", "blob" or "tree")
241
        :param ie: Inventory entry (for blob/tree) or testament_sha in case
242
            of commit
243
        :param path: Path of the object (optional)
244
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
245
        raise NotImplementedError(self.add_object)
246
247
    def finish(self):
248
        raise NotImplementedError(self.finish)
249
250
251
class BzrGitCache(object):
252
    """Caching backend."""
253
254
    def __init__(self, idmap, content_cache, cache_updater_klass):
255
        self.idmap = idmap
256
        self.content_cache = content_cache
257
        self._cache_updater_klass = cache_updater_klass
258
259
    def get_updater(self, rev):
0.254.51 by Jelmer Vernooij
Add some docstrings.
260
        """Update an object that implements the CacheUpdater interface for 
261
        updating this cache.
262
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
263
        return self._cache_updater_klass(self, rev)
264
265
266
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
267
268
269
class DictCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
270
    """Cache updater for dict-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
271
272
    def __init__(self, cache, rev):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
273
        self.cache = cache
274
        self.revid = rev.revision_id
275
        self.parent_revids = rev.parent_ids
276
        self._commit = None
277
        self._entries = []
278
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
279
    def add_object(self, obj, ie, path):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
280
        if obj.type_name == "commit":
281
            self._commit = obj
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
282
            assert type(ie) is dict
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
283
            key = self.revid
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
284
            type_data = (self.revid, self._commit.tree, ie)
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
285
            self.cache.idmap._by_revid[self.revid] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
286
        elif obj.type_name in ("blob", "tree"):
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
287
            if ie is not None:
288
                if obj.type_name == "blob":
289
                    revision = ie.revision
290
                else:
291
                    revision = self.revid
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
292
                key = type_data = (ie.file_id, revision)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
293
                self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
294
        else:
295
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
296
        entry = (obj.type_name, type_data)
297
        self.cache.idmap._by_sha.setdefault(obj.id, {})[key] = entry
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
298
299
    def finish(self):
300
        if self._commit is None:
301
            raise AssertionError("No commit object added")
302
        return self._commit
303
304
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
305
class DictGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
306
    """Git SHA map that uses a dictionary."""
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
307
308
    def __init__(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
309
        self._by_sha = {}
310
        self._by_fileid = {}
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
311
        self._by_revid = {}
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
312
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
313
    def lookup_blob_id(self, fileid, revision):
314
        return self._by_fileid[revision][fileid]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
315
316
    def lookup_git_sha(self, sha):
0.261.2 by Jelmer Vernooij
Fix cache tests.
317
        for entry in self._by_sha[sha].itervalues():
318
            yield entry
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
319
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
320
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
321
        return self._by_fileid[revision][fileid]
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
322
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
323
    def lookup_commit(self, revid):
324
        return self._by_revid[revid]
325
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
326
    def revids(self):
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
327
        for key, entries in self._by_sha.iteritems():
328
            for (type, type_data) in entries.values():
329
                if type == "commit":
330
                    yield type_data[0]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
331
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
332
    def sha1s(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
333
        return self._by_sha.iterkeys()
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
334
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
335
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
336
class SqliteCacheUpdater(CacheUpdater):
337
338
    def __init__(self, cache, rev):
339
        self.cache = cache
0.200.850 by Jelmer Vernooij
Fix tests.
340
        self.db = self.cache.idmap.db
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
341
        self.revid = rev.revision_id
342
        self._commit = None
343
        self._trees = []
344
        self._blobs = []
345
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
346
    def add_object(self, obj, ie, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
347
        if obj.type_name == "commit":
348
            self._commit = obj
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
349
            self._testament3_sha1 = ie.get("testament3-sha1")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
350
            assert type(ie) is dict
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
351
        elif obj.type_name == "tree":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
352
            if ie is not None:
353
                self._trees.append((obj.id, ie.file_id, self.revid))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
354
        elif obj.type_name == "blob":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
355
            if ie is not None:
356
                self._blobs.append((obj.id, ie.file_id, ie.revision))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
357
        else:
358
            raise AssertionError
359
360
    def finish(self):
361
        if self._commit is None:
362
            raise AssertionError("No commit object added")
0.200.850 by Jelmer Vernooij
Fix tests.
363
        self.db.executemany(
364
            "replace into trees (sha1, fileid, revid) values (?, ?, ?)",
365
            self._trees)
366
        self.db.executemany(
367
            "replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
368
            self._blobs)
369
        self.db.execute(
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
370
            "replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
371
            (self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
372
        return self._commit
373
374
375
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
376
377
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
378
class SqliteGitCacheFormat(BzrGitCacheFormat):
379
380
    def get_format_string(self):
381
        return 'bzr-git sha map version 1 using sqlite\n'
382
383
    def open(self, transport):
384
        try:
385
            basepath = transport.local_abspath(".")
386
        except bzrlib.errors.NotLocalUrl:
387
            basepath = get_cache_dir()
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
388
        return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
389
390
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
391
class SqliteGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
392
    """Bazaar GIT Sha map that uses a sqlite database for storage."""
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
393
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
394
    def __init__(self, path=None):
395
        self.path = path
396
        if path is None:
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
397
            self.db = sqlite3.connect(":memory:")
398
        else:
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
399
            if not mapdbs().has_key(path):
400
                mapdbs()[path] = sqlite3.connect(path)
0.200.675 by Jelmer Vernooij
Fix formatting.
401
            self.db = mapdbs()[path]
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
402
        self.db.text_factory = str
0.200.230 by Jelmer Vernooij
Implement sha cache.
403
        self.db.executescript("""
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
404
        create table if not exists commits(
405
            sha1 text not null check(length(sha1) == 40),
406
            revid text not null,
407
            tree_sha text not null check(length(tree_sha) == 40)
408
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
409
        create index if not exists commit_sha1 on commits(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
410
        create unique index if not exists commit_revid on commits(revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
411
        create table if not exists blobs(
412
            sha1 text not null check(length(sha1) == 40),
413
            fileid text not null,
414
            revid text not null
415
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
416
        create index if not exists blobs_sha1 on blobs(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
417
        create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
418
        create table if not exists trees(
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
419
            sha1 text unique not null check(length(sha1) == 40),
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
420
            fileid text not null,
421
            revid text not null
422
        );
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
423
        create unique index if not exists trees_sha1 on trees(sha1);
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
424
        create unique index if not exists trees_fileid_revid on trees(fileid, revid);
0.200.230 by Jelmer Vernooij
Implement sha cache.
425
""")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
426
        try:
427
            self.db.executescript(
428
                "ALTER TABLE commits ADD testament3_sha1 TEXT;")
429
        except sqlite3.OperationalError:
430
            pass # Column already exists.
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
431
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
432
    def __repr__(self):
433
        return "%s(%r)" % (self.__class__.__name__, self.path)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
434
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
435
    def lookup_commit(self, revid):
0.254.51 by Jelmer Vernooij
Add some docstrings.
436
        cursor = self.db.execute("select sha1 from commits where revid = ?", 
437
            (revid,))
438
        row = cursor.fetchone()
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
439
        if row is not None:
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
440
            return row[0]
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
441
        raise KeyError
0.200.231 by Jelmer Vernooij
Partially fix pull.
442
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
443
    def commit_write_group(self):
0.200.232 by Jelmer Vernooij
Fix pull from remote branches.
444
        self.db.commit()
445
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
446
    def lookup_blob_id(self, fileid, revision):
447
        row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
448
        if row is not None:
449
            return row[0]
450
        raise KeyError(fileid)
451
452
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
453
        row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
454
        if row is not None:
455
            return row[0]
456
        raise KeyError(fileid)
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
457
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
458
    def lookup_git_sha(self, sha):
459
        """Lookup a Git sha in the database.
460
461
        :param sha: Git object sha
462
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
463
            commit: revid, tree sha, verifiers
464
            tree: fileid, revid
465
            blob: fileid, revid
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
466
        """
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
467
        found = False
468
        cursor = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,))
469
        for row in cursor.fetchall():
470
            found = True
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
471
            if row[2] is not None:
472
                verifiers = {"testament3-sha1": row[2]}
473
            else:
474
                verifiers = {}
475
            yield ("commit", (row[0], row[1], verifiers))
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
476
        cursor = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,))
477
        for row in cursor.fetchall():
478
            found = True
479
            yield ("blob", row)
480
        cursor = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,))
481
        for row in cursor.fetchall():
482
            found = True
483
            yield ("tree", row)
484
        if not found:
485
            raise KeyError(sha)
0.200.230 by Jelmer Vernooij
Implement sha cache.
486
487
    def revids(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
488
        """List the revision ids known."""
0.248.7 by Jelmer Vernooij
Avoid fetching all sha1s at once.
489
        return (row for (row,) in self.db.execute("select revid from commits"))
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
490
491
    def sha1s(self):
492
        """List the SHA1s."""
493
        for table in ("blobs", "commits", "trees"):
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
494
            for (sha,) in self.db.execute("select sha1 from %s" % table):
495
                yield sha
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
496
497
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
498
class TdbCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
499
    """Cache updater for tdb-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
500
501
    def __init__(self, cache, rev):
502
        self.cache = cache
503
        self.db = cache.idmap.db
504
        self.revid = rev.revision_id
505
        self.parent_revids = rev.parent_ids
506
        self._commit = None
507
        self._entries = []
508
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
509
    def add_object(self, obj, ie, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
510
        sha = obj.sha().digest()
511
        if obj.type_name == "commit":
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
512
            self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
513
            assert type(ie) is dict, "was %r" % ie
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
514
            type_data = (self.revid, obj.tree)
515
            try:
516
                type_data += (ie["testament3-sha1"],)
517
            except KeyError:
518
                pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
519
            self._commit = obj
520
        elif obj.type_name == "blob":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
521
            if ie is None:
522
                return
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
523
            self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
524
            type_data = (ie.file_id, ie.revision)
525
        elif obj.type_name == "tree":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
526
            if ie is None:
527
                return
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
528
            type_data = (ie.file_id, self.revid)
529
        else:
530
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
531
        entry = "\0".join((obj.type_name, ) + type_data) + "\n"
532
        key = "git\0" + sha
533
        try:
534
            oldval = self.db[key]
535
        except KeyError:
536
            self.db[key] = entry
537
        else:
0.261.3 by Jelmer Vernooij
Fix more tests.
538
            if oldval[-1] != "\n":
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
539
                self.db[key] = "".join([oldval, "\n", entry])
540
            else:
541
                self.db[key] = "".join([oldval, entry])
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
542
543
    def finish(self):
544
        if self._commit is None:
545
            raise AssertionError("No commit object added")
546
        return self._commit
547
548
549
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
0.200.479 by Jelmer Vernooij
Version tdb sha map.
550
0.200.1140 by Jelmer Vernooij
Update now that the control dir formats are no longer in __init__.
551
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
552
class TdbGitCacheFormat(BzrGitCacheFormat):
0.254.51 by Jelmer Vernooij
Add some docstrings.
553
    """Cache format for tdb-based caches."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
554
555
    def get_format_string(self):
556
        return 'bzr-git sha map version 3 using tdb\n'
557
558
    def open(self, transport):
559
        try:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
560
            basepath = transport.local_abspath(".").encode(osutils._fs_enc)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
561
        except bzrlib.errors.NotLocalUrl:
562
            basepath = get_cache_dir()
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
563
        assert isinstance(basepath, str)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
564
        try:
0.200.850 by Jelmer Vernooij
Fix tests.
565
            return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
566
        except ImportError:
567
            raise ImportError(
568
                "Unable to open existing bzr-git cache because 'tdb' is not "
569
                "installed.")
570
571
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
572
class TdbGitShaMap(GitShaMap):
573
    """SHA Map that uses a TDB database.
574
575
    Entries:
576
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
577
    "git <sha1>" -> "<type> <type-data1> <type-data2>"
578
    "commit revid" -> "<sha1> <tree-id>"
0.200.477 by Jelmer Vernooij
More tests for sha maps, fix cache misses in tdb.
579
    "tree fileid revid" -> "<sha1>"
580
    "blob fileid revid" -> "<sha1>"
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
581
    """
582
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
583
    TDB_MAP_VERSION = 3
584
    TDB_HASH_SIZE = 50000
585
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
586
    def __init__(self, path=None):
587
        import tdb
588
        self.path = path
589
        if path is None:
590
            self.db = {}
591
        else:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
592
            assert isinstance(path, str)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
593
            if not mapdbs().has_key(path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
594
                mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
595
                                          os.O_RDWR|os.O_CREAT)
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
596
            self.db = mapdbs()[path]
597
        try:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
598
            if int(self.db["version"]) not in (2, 3):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
599
                trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
600
                              self.db["version"], self.TDB_MAP_VERSION)
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
601
                self.db.clear()
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
602
        except KeyError:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
603
            pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
604
        self.db["version"] = str(self.TDB_MAP_VERSION)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
605
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
606
    def start_write_group(self):
607
        """Start writing changes."""
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
608
        self.db.transaction_start()
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
609
610
    def commit_write_group(self):
611
        """Commit any pending changes."""
612
        self.db.transaction_commit()
613
614
    def abort_write_group(self):
615
        """Abort any pending changes."""
616
        self.db.transaction_cancel()
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
617
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
618
    def __repr__(self):
619
        return "%s(%r)" % (self.__class__.__name__, self.path)
620
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
621
    def lookup_commit(self, revid):
0.200.1264 by Jelmer Vernooij
Fix updating cache for single revision - don't consider it an update of the full cache.
622
        try:
623
            return sha_to_hex(self.db["commit\0" + revid][:20])
624
        except KeyError:
625
            raise KeyError("No cache entry for %r" % revid)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
626
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
627
    def lookup_blob_id(self, fileid, revision):
628
        return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
629
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
630
    def lookup_git_sha(self, sha):
631
        """Lookup a Git sha in the database.
632
633
        :param sha: Git object sha
634
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
635
            commit: revid, tree sha
636
            blob: fileid, revid
637
            tree: fileid, revid
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
638
        """
0.200.564 by Jelmer Vernooij
Accept 'binary' shas.
639
        if len(sha) == 40:
640
            sha = hex_to_sha(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
641
        value = self.db["git\0" + sha]
642
        for data in value.splitlines():
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
643
            data = data.split("\0")
644
            if data[0] == "commit":
645
                if len(data) == 3:
646
                    yield (data[0], (data[1], data[2], {}))
647
                else:
648
                    yield (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
0.261.2 by Jelmer Vernooij
Fix cache tests.
649
            elif data[0] in ("tree", "blob"):
650
                yield (data[0], tuple(data[1:]))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
651
            else:
0.261.2 by Jelmer Vernooij
Fix cache tests.
652
                raise AssertionError("unknown type %r" % data[0])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
653
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
654
    def missing_revisions(self, revids):
655
        ret = set()
656
        for revid in revids:
657
            if self.db.get("commit\0" + revid) is None:
658
                ret.add(revid)
659
        return ret
660
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
661
    def revids(self):
662
        """List the revision ids known."""
663
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
664
            if key.startswith("commit\0"):
665
                yield key[7:]
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
666
667
    def sha1s(self):
668
        """List the SHA1s."""
669
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
670
            if key.startswith("git\0"):
671
                yield sha_to_hex(key[4:])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
672
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
673
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
674
class VersionedFilesContentCache(ContentCache):
675
676
    def __init__(self, vf):
677
        self._vf = vf
678
679
    def add(self, obj):
680
        self._vf.insert_record_stream(
681
            [versionedfile.ChunkedContentFactory((obj.id,), [], None,
682
                obj.as_legacy_object_chunks())])
683
684
    def __getitem__(self, sha):
685
        stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
686
        entry = stream.next() 
687
        if entry.storage_kind == 'absent':
688
            raise KeyError(sha)
689
        return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
690
691
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
692
class GitObjectStoreContentCache(ContentCache):
693
694
    def __init__(self, store):
695
        self.store = store
696
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
697
    def add_multi(self, objs):
698
        self.store.add_objects(objs)
699
700
    def add(self, obj, path):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
701
        self.store.add_object(obj)
702
703
    def __getitem__(self, sha):
704
        return self.store[sha]
705
706
0.254.46 by Jelmer Vernooij
Merge trunk.
707
class IndexCacheUpdater(CacheUpdater):
708
709
    def __init__(self, cache, rev):
710
        self.cache = cache
711
        self.revid = rev.revision_id
712
        self.parent_revids = rev.parent_ids
713
        self._commit = None
714
        self._entries = []
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
715
        self._cache_objs = set()
0.254.46 by Jelmer Vernooij
Merge trunk.
716
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
717
    def add_object(self, obj, ie, path):
0.254.46 by Jelmer Vernooij
Merge trunk.
718
        if obj.type_name == "commit":
719
            self._commit = obj
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
720
            assert type(ie) is dict
0.254.47 by Jelmer Vernooij
Merge trunk.
721
            self.cache.idmap._add_git_sha(obj.id, "commit",
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
722
                (self.revid, obj.tree, ie))
0.254.47 by Jelmer Vernooij
Merge trunk.
723
            self.cache.idmap._add_node(("commit", self.revid, "X"),
0.254.46 by Jelmer Vernooij
Merge trunk.
724
                " ".join((obj.id, obj.tree)))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
725
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
726
        elif obj.type_name == "blob":
0.254.47 by Jelmer Vernooij
Merge trunk.
727
            self.cache.idmap._add_git_sha(obj.id, "blob",
728
                (ie.file_id, ie.revision))
729
            self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
0.254.49 by Jelmer Vernooij
Also cache full contents of symlinks.
730
            if ie.kind == "symlink":
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
731
                self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
732
        elif obj.type_name == "tree":
0.254.47 by Jelmer Vernooij
Merge trunk.
733
            self.cache.idmap._add_git_sha(obj.id, "tree",
734
                (ie.file_id, self.revid))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
735
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
736
        else:
737
            raise AssertionError
738
739
    def finish(self):
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
740
        self.cache.content_cache.add_multi(self._cache_objs)
0.254.46 by Jelmer Vernooij
Merge trunk.
741
        return self._commit
742
743
744
class IndexBzrGitCache(BzrGitCache):
745
746
    def __init__(self, transport=None):
747
        mapper = versionedfile.ConstantMapper("trees")
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
748
        shamap = IndexGitShaMap(transport.clone('index'))
749
        #trees_store = knit.make_file_factory(True, mapper)(transport)
750
        #content_cache = VersionedFilesContentCache(trees_store)
751
        from bzrlib.plugins.git.transportgit import TransportObjectStore
752
        store = TransportObjectStore(transport.clone('objects'))
753
        content_cache = GitObjectStoreContentCache(store)
754
        super(IndexBzrGitCache, self).__init__(shamap, content_cache,
0.254.47 by Jelmer Vernooij
Merge trunk.
755
                IndexCacheUpdater)
0.254.46 by Jelmer Vernooij
Merge trunk.
756
757
0.254.43 by Jelmer Vernooij
Merge trunk.
758
class IndexGitCacheFormat(BzrGitCacheFormat):
759
760
    def get_format_string(self):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
761
        return 'bzr-git sha map with git object cache version 1\n'
0.254.43 by Jelmer Vernooij
Merge trunk.
762
763
    def initialize(self, transport):
764
        super(IndexGitCacheFormat, self).initialize(transport)
765
        transport.mkdir('index')
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
766
        transport.mkdir('objects')
767
        from bzrlib.plugins.git.transportgit import TransportObjectStore
768
        TransportObjectStore.init(transport.clone('objects'))
0.254.43 by Jelmer Vernooij
Merge trunk.
769
770
    def open(self, transport):
0.254.46 by Jelmer Vernooij
Merge trunk.
771
        return IndexBzrGitCache(transport)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
772
773
774
class IndexGitShaMap(GitShaMap):
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
775
    """SHA Map that uses the Bazaar APIs to store a cache.
776
777
    BTree Index file with the following contents:
778
779
    ("git", <sha1>) -> "<type> <type-data1> <type-data2>"
780
    ("commit", <revid>) -> "<sha1> <tree-id>"
0.254.36 by Jelmer Vernooij
Merge trunk.
781
    ("blob", <fileid>, <revid>) -> <sha1>
782
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
783
    """
784
785
    def __init__(self, transport=None):
786
        if transport is None:
0.254.43 by Jelmer Vernooij
Merge trunk.
787
            self._transport = None
0.254.36 by Jelmer Vernooij
Merge trunk.
788
            self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
0.254.2 by jelmer
use btree indexes
789
            self._builder = self._index
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
790
        else:
0.254.30 by Jelmer Vernooij
Move index to separate dir.
791
            self._builder = None
0.254.43 by Jelmer Vernooij
Merge trunk.
792
            self._transport = transport
0.254.2 by jelmer
use btree indexes
793
            self._index = _mod_index.CombinedGraphIndex([])
0.254.43 by Jelmer Vernooij
Merge trunk.
794
            for name in self._transport.list_dir("."):
0.254.2 by jelmer
use btree indexes
795
                if not name.endswith(".rix"):
796
                    continue
0.254.43 by Jelmer Vernooij
Merge trunk.
797
                x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
798
                    self._transport.stat(name).st_size)
0.254.2 by jelmer
use btree indexes
799
                self._index.insert_index(0, x)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
800
801
    @classmethod
802
    def from_repository(cls, repository):
803
        transport = getattr(repository, "_transport", None)
804
        if transport is not None:
0.254.2 by jelmer
use btree indexes
805
            try:
806
                transport.mkdir('git')
807
            except bzrlib.errors.FileExists:
808
                pass
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
809
            return cls(transport.clone('git'))
810
        from bzrlib.transport import get_transport
811
        return cls(get_transport(get_cache_dir()))
812
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
813
    def __repr__(self):
814
        if self._transport is not None:
815
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
816
        else:
817
            return "%s()" % (self.__class__.__name__)
818
0.254.3 by John Arbash Meinel
Add repack function.
819
    def repack(self):
820
        assert self._builder is None
821
        self.start_write_group()
822
        for _, key, value in self._index.iter_all_entries():
823
            self._builder.add_node(key, value)
824
        to_remove = []
0.254.43 by Jelmer Vernooij
Merge trunk.
825
        for name in self._transport.list_dir('.'):
0.254.3 by John Arbash Meinel
Add repack function.
826
            if name.endswith('.rix'):
827
                to_remove.append(name)
828
        self.commit_write_group()
829
        del self._index.indices[1:]
830
        for name in to_remove:
0.254.43 by Jelmer Vernooij
Merge trunk.
831
            self._transport.rename(name, name + '.old')
0.254.3 by John Arbash Meinel
Add repack function.
832
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
833
    def start_write_group(self):
0.254.2 by jelmer
use btree indexes
834
        assert self._builder is None
0.254.36 by Jelmer Vernooij
Merge trunk.
835
        self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
836
        self._name = osutils.sha()
837
838
    def commit_write_group(self):
0.254.2 by jelmer
use btree indexes
839
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
840
        stream = self._builder.finish()
0.254.2 by jelmer
use btree indexes
841
        name = self._name.hexdigest() + ".rix"
0.254.43 by Jelmer Vernooij
Merge trunk.
842
        size = self._transport.put_file(name, stream)
843
        index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
844
        self._index.insert_index(0, index)
845
        self._builder = None
846
        self._name = None
847
848
    def abort_write_group(self):
0.254.2 by jelmer
use btree indexes
849
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
850
        self._builder = None
851
        self._name = None
852
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
853
    def _add_node(self, key, value):
854
        try:
855
            self._builder.add_node(key, value)
856
        except bzrlib.errors.BadIndexDuplicateKey:
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
857
            # Multiple bzr objects can have the same contents
858
            return True
859
        else:
860
            return False
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
861
0.254.2 by jelmer
use btree indexes
862
    def _get_entry(self, key):
863
        entries = self._index.iter_entries([key])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
864
        try:
0.254.2 by jelmer
use btree indexes
865
            return entries.next()[2]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
866
        except StopIteration:
0.254.2 by jelmer
use btree indexes
867
            if self._builder is None:
868
                raise KeyError
869
            entries = self._builder.iter_entries([key])
870
            try:
871
                return entries.next()[2]
872
            except StopIteration:
873
                raise KeyError
874
0.261.2 by Jelmer Vernooij
Fix cache tests.
875
    def _iter_entries_prefix(self, prefix):
0.254.2 by jelmer
use btree indexes
876
        for entry in self._index.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
877
            yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
878
        if self._builder is not None:
879
            for entry in self._builder.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
880
                yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
881
882
    def lookup_commit(self, revid):
0.254.36 by Jelmer Vernooij
Merge trunk.
883
        return self._get_entry(("commit", revid, "X"))[:40]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
884
0.254.33 by Jelmer Vernooij
Merge trunk.
885
    def _add_git_sha(self, hexsha, type, type_data):
0.254.2 by jelmer
use btree indexes
886
        if hexsha is not None:
887
            self._name.update(hexsha)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
888
            if type == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
889
                td = (type_data[0], type_data[1])
890
                try:
891
                    td += (type_data[2]["testament3-sha1"],)
892
                except KeyError:
893
                    pass
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
894
            else:
895
                td = type_data
896
            self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
0.254.2 by jelmer
use btree indexes
897
        else:
898
            # This object is not represented in Git - perhaps an empty
899
            # directory?
900
            self._name.update(type + " ".join(type_data))
0.254.33 by Jelmer Vernooij
Merge trunk.
901
0.254.42 by Jelmer Vernooij
Merge trunk.
902
    def lookup_blob_id(self, fileid, revision):
903
        return self._get_entry(("blob", fileid, revision))
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
904
905
    def lookup_git_sha(self, sha):
906
        if len(sha) == 20:
907
            sha = sha_to_hex(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
908
        found = False
909
        for key, value in self._iter_entries_prefix(("git", sha, None)):
910
            found = True
911
            data = value.split(" ", 3)
912
            if data[0] == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
913
                if data[3]:
914
                    verifiers = {"testament3-sha1": data[3]}
915
                else:
916
                    verifiers = {}
917
                yield ("commit", (data[1], data[2], verifiers))
0.261.2 by Jelmer Vernooij
Fix cache tests.
918
            else:
919
                yield (data[0], tuple(data[1:]))
920
        if not found:
921
            raise KeyError(sha)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
922
923
    def revids(self):
924
        """List the revision ids known."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
925
        for key, value in self._iter_entries_prefix(("commit", None, None)):
0.254.2 by jelmer
use btree indexes
926
            yield key[1]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
927
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
928
    def missing_revisions(self, revids):
929
        """Return set of all the revisions that are not present."""
930
        missing_revids = set(revids)
931
        for _, key, value in self._index.iter_entries((
0.254.37 by Jelmer Vernooij
merge trunk
932
            ("commit", revid, "X") for revid in revids)):
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
933
            missing_revids.remove(key[1])
934
        return missing_revids
935
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
936
    def sha1s(self):
937
        """List the SHA1s."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
938
        for key, value in self._iter_entries_prefix(("git", None, None)):
0.254.2 by jelmer
use btree indexes
939
            yield key[1]
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
940
941
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
942
formats = registry.Registry()
943
formats.register(TdbGitCacheFormat().get_format_string(),
944
    TdbGitCacheFormat())
945
formats.register(SqliteGitCacheFormat().get_format_string(),
946
    SqliteGitCacheFormat())
0.254.43 by Jelmer Vernooij
Merge trunk.
947
formats.register(IndexGitCacheFormat().get_format_string(),
948
    IndexGitCacheFormat())
0.200.951 by Jelmer Vernooij
merge support for git object store-based caching mechanism.
949
# In the future, this will become the default:
950
# formats.register('default', IndexGitCacheFormat())
951
try:
952
    import tdb
953
except ImportError:
954
    formats.register('default', SqliteGitCacheFormat())
955
else:
956
    formats.register('default', TdbGitCacheFormat())
957
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
958
959
960
def migrate_ancient_formats(repo_transport):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
961
    # Migrate older cache formats
962
    repo_transport = remove_readonly_transport_decorator(repo_transport)
963
    has_sqlite = repo_transport.has("git.db")
964
    has_tdb = repo_transport.has("git.tdb")
965
    if not has_sqlite or has_tdb:
966
        return
967
    try:
968
        repo_transport.mkdir("git")
969
    except bzrlib.errors.FileExists:
970
        return
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
971
    # Prefer migrating git.db over git.tdb, since the latter may not 
972
    # be openable on some platforms.
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
973
    if has_sqlite:
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
974
        SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
975
        repo_transport.rename("git.db", "git/idmap.db")
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
976
    elif has_tdb:
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
977
        TdbGitCacheFormat().initialize(repo_transport.clone("git"))
978
        repo_transport.rename("git.tdb", "git/idmap.tdb")
979
980
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
981
def remove_readonly_transport_decorator(transport):
982
    if transport.is_readonly():
983
        return transport._decorated
984
    return transport
985
986
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
987
def from_repository(repository):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
988
    """Open a cache file for a repository.
989
990
    If the repository is remote and there is no transport available from it
991
    this will use a local file in the users cache directory
992
    (typically ~/.cache/bazaar/git/)
993
994
    :param repository: A repository object
995
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
996
    repo_transport = getattr(repository, "_transport", None)
997
    if repo_transport is not None:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
998
        migrate_ancient_formats(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
999
    return BzrGitCacheFormat.from_repository(repository)