/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
19
from dulwich.objects import (
20
    sha_to_hex,
21
    hex_to_sha,
22
    )
0.200.292 by Jelmer Vernooij
Fix formatting.
23
import os
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
24
import threading
0.200.292 by Jelmer Vernooij
Fix formatting.
25
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
26
from dulwich.objects import (
27
    ShaFile,
28
    )
29
0.200.228 by Jelmer Vernooij
Split out map.
30
import bzrlib
0.200.528 by Jelmer Vernooij
Fix import.
31
from bzrlib import (
0.254.2 by jelmer
use btree indexes
32
    btree_index as _mod_btree_index,
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
33
    index as _mod_index,
34
    osutils,
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
35
    registry,
0.200.528 by Jelmer Vernooij
Fix import.
36
    trace,
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
37
    versionedfile,
0.200.528 by Jelmer Vernooij
Fix import.
38
    )
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
39
from bzrlib.transport import (
40
    get_transport,
41
    )
0.200.230 by Jelmer Vernooij
Implement sha cache.
42
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
43
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
44
def get_cache_dir():
45
    try:
46
        from xdg.BaseDirectory import xdg_cache_home
47
    except ImportError:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
48
        from bzrlib.config import config_dir
49
        ret = os.path.join(config_dir(), "git")
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
50
    else:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
51
        ret = os.path.join(xdg_cache_home, "bazaar", "git")
52
    if not os.path.isdir(ret):
53
        os.makedirs(ret)
54
    return ret
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
55
56
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
57
def get_remote_cache_transport(repository):
0.200.1027 by Jelmer Vernooij
mark remote git directories as not supporting working trees.
58
    """Retrieve the transport to use when accessing (unwritable) remote 
59
    repositories.
60
    """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
61
    uuid = getattr(repository, "uuid", None)
62
    if uuid is None:
63
        path = get_cache_dir()
64
    else:
65
        path = os.path.join(get_cache_dir(), uuid)
66
        if not os.path.isdir(path):
67
            os.mkdir(path)
68
    return get_transport(path)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
69
70
0.200.228 by Jelmer Vernooij
Split out map.
71
def check_pysqlite_version(sqlite3):
72
    """Check that sqlite library is compatible.
73
74
    """
0.200.675 by Jelmer Vernooij
Fix formatting.
75
    if (sqlite3.sqlite_version_info[0] < 3 or
76
            (sqlite3.sqlite_version_info[0] == 3 and
0.200.228 by Jelmer Vernooij
Split out map.
77
             sqlite3.sqlite_version_info[1] < 3)):
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
78
        trace.warning('Needs at least sqlite 3.3.x')
0.200.228 by Jelmer Vernooij
Split out map.
79
        raise bzrlib.errors.BzrError("incompatible sqlite library")
80
81
try:
82
    try:
83
        import sqlite3
84
        check_pysqlite_version(sqlite3)
0.200.675 by Jelmer Vernooij
Fix formatting.
85
    except (ImportError, bzrlib.errors.BzrError), e:
0.200.228 by Jelmer Vernooij
Split out map.
86
        from pysqlite2 import dbapi2 as sqlite3
87
        check_pysqlite_version(sqlite3)
88
except:
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
89
    trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
0.200.228 by Jelmer Vernooij
Split out map.
90
            'module')
91
    raise bzrlib.errors.BzrError("missing sqlite library")
92
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
93
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
94
_mapdbs = threading.local()
95
def mapdbs():
96
    """Get a cache for this thread's db connections."""
97
    try:
98
        return _mapdbs.cache
99
    except AttributeError:
100
        _mapdbs.cache = {}
101
        return _mapdbs.cache
102
103
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
104
class GitShaMap(object):
105
    """Git<->Bzr revision id mapping database."""
106
107
    def lookup_git_sha(self, sha):
108
        """Lookup a Git sha in the database.
109
        :param sha: Git object sha
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
110
        :return: list with (type, type_data) tuples with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
111
            commit: revid, tree_sha, verifiers
112
            blob: fileid, revid
113
            tree: fileid, revid
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
114
        """
115
        raise NotImplementedError(self.lookup_git_sha)
116
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
117
    def lookup_blob_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
118
        """Retrieve a Git blob SHA by file id.
119
120
        :param file_id: File id of the file/symlink
0.200.806 by Jelmer Vernooij
Make revision_hint mandatory.
121
        :param revision: revision in which the file was last changed.
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
122
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
123
        raise NotImplementedError(self.lookup_blob_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
124
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
125
    def lookup_tree_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
126
        """Retrieve a Git tree SHA by file id.
127
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
128
        raise NotImplementedError(self.lookup_tree_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
129
0.200.1039 by Jelmer Vernooij
Add stub.
130
    def lookup_commit(self, revid):
131
        """Retrieve a Git commit SHA by Bazaar revision id.
132
        """
133
        raise NotImplementedError(self.lookup_commit)
134
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
135
    def revids(self):
136
        """List the revision ids known."""
137
        raise NotImplementedError(self.revids)
138
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
139
    def missing_revisions(self, revids):
140
        """Return set of all the revisions that are not present."""
141
        present_revids = set(self.revids())
142
        if not isinstance(revids, set):
143
            revids = set(revids)
144
        return revids - present_revids
145
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
146
    def sha1s(self):
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
147
        """List the SHA1s."""
148
        raise NotImplementedError(self.sha1s)
149
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
150
    def start_write_group(self):
151
        """Start writing changes."""
152
153
    def commit_write_group(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
154
        """Commit any pending changes."""
155
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
156
    def abort_write_group(self):
157
        """Abort any pending changes."""
158
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
159
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
160
class ContentCache(object):
161
    """Object that can cache Git objects."""
162
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
163
    def add(self, object):
164
        """Add an object."""
165
        raise NotImplementedError(self.add)
166
167
    def add_multi(self, objects):
168
        """Add multiple objects."""
169
        for obj in objects:
170
            self.add(obj)
171
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
172
    def __getitem__(self, sha):
173
        """Retrieve an item, by SHA."""
174
        raise NotImplementedError(self.__getitem__)
175
176
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
177
class BzrGitCacheFormat(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
178
    """Bazaar-Git Cache Format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
179
180
    def get_format_string(self):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
181
        """Return a single-line unique format string for this cache format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
182
        raise NotImplementedError(self.get_format_string)
183
184
    def open(self, transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
185
        """Open this format on a transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
186
        raise NotImplementedError(self.open)
187
188
    def initialize(self, transport):
0.254.51 by Jelmer Vernooij
Add some docstrings.
189
        """Create a new instance of this cache format at transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
190
        transport.put_bytes('format', self.get_format_string())
191
192
    @classmethod
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
193
    def from_transport(self, transport):
194
        """Open a cache file present on a transport, or initialize one.
195
196
        :param transport: Transport to use
197
        :return: A BzrGitCache instance
198
        """
199
        try:
200
            format_name = transport.get_bytes('format')
201
            format = formats.get(format_name)
202
        except bzrlib.errors.NoSuchFile:
203
            format = formats.get('default')
204
            format.initialize(transport)
205
        return format.open(transport)
206
207
    @classmethod
208
    def from_repository(cls, repository):
209
        """Open a cache file for a repository.
210
211
        This will use the repository's transport to store the cache file, or
212
        use the users global cache directory if the repository has no 
213
        transport associated with it.
214
215
        :param repository: Repository to open the cache for
216
        :return: A `BzrGitCache`
217
        """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
218
        from bzrlib.transport.local import LocalTransport
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
219
        repo_transport = getattr(repository, "_transport", None)
0.200.1414 by Jelmer Vernooij
Fix pulling into bound branches.
220
        if (repo_transport is not None and
221
            isinstance(repo_transport, LocalTransport)):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
222
            # Even if we don't write to this repo, we should be able
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
223
            # to update its cache.
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
224
            try:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
225
                repo_transport = remove_readonly_transport_decorator(repo_transport)
226
            except bzrlib.errors.ReadOnlyError:
227
                transport = None
228
            else:
229
                try:
230
                    repo_transport.mkdir('git')
231
                except bzrlib.errors.FileExists:
232
                    pass
233
                transport = repo_transport.clone('git')
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
234
        else:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
235
            transport = None
236
        if transport is None:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
237
            transport = get_remote_cache_transport(repository)
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
238
        return cls.from_transport(transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
239
240
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
241
class CacheUpdater(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
242
    """Base class for objects that can update a bzr-git cache."""
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
243
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
244
    def add_object(self, obj, bzr_key_data, path):
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
245
        """Add an object.
246
247
        :param obj: Object type ("commit", "blob" or "tree")
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
248
        :param bzr_key_data: bzr key store data or testament_sha in case
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
249
            of commit
250
        :param path: Path of the object (optional)
251
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
252
        raise NotImplementedError(self.add_object)
253
254
    def finish(self):
255
        raise NotImplementedError(self.finish)
256
257
258
class BzrGitCache(object):
259
    """Caching backend."""
260
261
    def __init__(self, idmap, content_cache, cache_updater_klass):
262
        self.idmap = idmap
263
        self.content_cache = content_cache
264
        self._cache_updater_klass = cache_updater_klass
265
266
    def get_updater(self, rev):
0.254.51 by Jelmer Vernooij
Add some docstrings.
267
        """Update an object that implements the CacheUpdater interface for 
268
        updating this cache.
269
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
270
        return self._cache_updater_klass(self, rev)
271
272
273
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
274
275
276
class DictCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
277
    """Cache updater for dict-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
278
279
    def __init__(self, cache, rev):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
280
        self.cache = cache
281
        self.revid = rev.revision_id
282
        self.parent_revids = rev.parent_ids
283
        self._commit = None
284
        self._entries = []
285
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
286
    def add_object(self, obj, bzr_key_data, path):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
287
        if obj.type_name == "commit":
288
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
289
            assert type(bzr_key_data) is dict
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
290
            key = self.revid
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
291
            type_data = (self.revid, self._commit.tree, bzr_key_data)
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
292
            self.cache.idmap._by_revid[self.revid] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
293
        elif obj.type_name in ("blob", "tree"):
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
294
            if bzr_key_data is not None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
295
                if obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
296
                    revision = bzr_key_data[1]
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
297
                else:
298
                    revision = self.revid
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
299
                key = type_data = (bzr_key_data[0], revision)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
300
                self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
301
        else:
302
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
303
        entry = (obj.type_name, type_data)
304
        self.cache.idmap._by_sha.setdefault(obj.id, {})[key] = entry
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
305
306
    def finish(self):
307
        if self._commit is None:
308
            raise AssertionError("No commit object added")
309
        return self._commit
310
311
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
312
class DictGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
313
    """Git SHA map that uses a dictionary."""
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
314
315
    def __init__(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
316
        self._by_sha = {}
317
        self._by_fileid = {}
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
318
        self._by_revid = {}
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
319
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
320
    def lookup_blob_id(self, fileid, revision):
321
        return self._by_fileid[revision][fileid]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
322
323
    def lookup_git_sha(self, sha):
0.261.2 by Jelmer Vernooij
Fix cache tests.
324
        for entry in self._by_sha[sha].itervalues():
325
            yield entry
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
326
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
327
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
328
        return self._by_fileid[revision][fileid]
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
329
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
330
    def lookup_commit(self, revid):
331
        return self._by_revid[revid]
332
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
333
    def revids(self):
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
334
        for key, entries in self._by_sha.iteritems():
335
            for (type, type_data) in entries.values():
336
                if type == "commit":
337
                    yield type_data[0]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
338
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
339
    def sha1s(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
340
        return self._by_sha.iterkeys()
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
341
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
342
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
343
class SqliteCacheUpdater(CacheUpdater):
344
345
    def __init__(self, cache, rev):
346
        self.cache = cache
0.200.850 by Jelmer Vernooij
Fix tests.
347
        self.db = self.cache.idmap.db
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
348
        self.revid = rev.revision_id
349
        self._commit = None
350
        self._trees = []
351
        self._blobs = []
352
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
353
    def add_object(self, obj, bzr_key_data, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
354
        if obj.type_name == "commit":
355
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
356
            assert type(bzr_key_data) is dict
357
            self._testament3_sha1 = bzr_key_data.get("testament3-sha1")
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
358
        elif obj.type_name == "tree":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
359
            if bzr_key_data is not None:
360
                self._trees.append((obj.id, bzr_key_data[0], self.revid))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
361
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
362
            if bzr_key_data is not None:
363
                self._blobs.append((obj.id, bzr_key_data[0], bzr_key_data[1]))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
364
        else:
365
            raise AssertionError
366
367
    def finish(self):
368
        if self._commit is None:
369
            raise AssertionError("No commit object added")
0.200.850 by Jelmer Vernooij
Fix tests.
370
        self.db.executemany(
371
            "replace into trees (sha1, fileid, revid) values (?, ?, ?)",
372
            self._trees)
373
        self.db.executemany(
374
            "replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
375
            self._blobs)
376
        self.db.execute(
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
377
            "replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
378
            (self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
379
        return self._commit
380
381
382
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
383
384
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
385
class SqliteGitCacheFormat(BzrGitCacheFormat):
386
387
    def get_format_string(self):
388
        return 'bzr-git sha map version 1 using sqlite\n'
389
390
    def open(self, transport):
391
        try:
392
            basepath = transport.local_abspath(".")
393
        except bzrlib.errors.NotLocalUrl:
394
            basepath = get_cache_dir()
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
395
        return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
396
397
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
398
class SqliteGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
399
    """Bazaar GIT Sha map that uses a sqlite database for storage."""
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
400
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
401
    def __init__(self, path=None):
402
        self.path = path
403
        if path is None:
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
404
            self.db = sqlite3.connect(":memory:")
405
        else:
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
406
            if not mapdbs().has_key(path):
407
                mapdbs()[path] = sqlite3.connect(path)
0.200.675 by Jelmer Vernooij
Fix formatting.
408
            self.db = mapdbs()[path]
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
409
        self.db.text_factory = str
0.200.230 by Jelmer Vernooij
Implement sha cache.
410
        self.db.executescript("""
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
411
        create table if not exists commits(
412
            sha1 text not null check(length(sha1) == 40),
413
            revid text not null,
414
            tree_sha text not null check(length(tree_sha) == 40)
415
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
416
        create index if not exists commit_sha1 on commits(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
417
        create unique index if not exists commit_revid on commits(revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
418
        create table if not exists blobs(
419
            sha1 text not null check(length(sha1) == 40),
420
            fileid text not null,
421
            revid text not null
422
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
423
        create index if not exists blobs_sha1 on blobs(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
424
        create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
425
        create table if not exists trees(
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
426
            sha1 text unique not null check(length(sha1) == 40),
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
427
            fileid text not null,
428
            revid text not null
429
        );
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
430
        create unique index if not exists trees_sha1 on trees(sha1);
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
431
        create unique index if not exists trees_fileid_revid on trees(fileid, revid);
0.200.230 by Jelmer Vernooij
Implement sha cache.
432
""")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
433
        try:
434
            self.db.executescript(
435
                "ALTER TABLE commits ADD testament3_sha1 TEXT;")
436
        except sqlite3.OperationalError:
437
            pass # Column already exists.
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
438
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
439
    def __repr__(self):
440
        return "%s(%r)" % (self.__class__.__name__, self.path)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
441
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
442
    def lookup_commit(self, revid):
0.254.51 by Jelmer Vernooij
Add some docstrings.
443
        cursor = self.db.execute("select sha1 from commits where revid = ?", 
444
            (revid,))
445
        row = cursor.fetchone()
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
446
        if row is not None:
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
447
            return row[0]
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
448
        raise KeyError
0.200.231 by Jelmer Vernooij
Partially fix pull.
449
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
450
    def commit_write_group(self):
0.200.232 by Jelmer Vernooij
Fix pull from remote branches.
451
        self.db.commit()
452
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
453
    def lookup_blob_id(self, fileid, revision):
454
        row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
455
        if row is not None:
456
            return row[0]
457
        raise KeyError(fileid)
458
459
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
460
        row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
461
        if row is not None:
462
            return row[0]
463
        raise KeyError(fileid)
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
464
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
465
    def lookup_git_sha(self, sha):
466
        """Lookup a Git sha in the database.
467
468
        :param sha: Git object sha
469
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
470
            commit: revid, tree sha, verifiers
471
            tree: fileid, revid
472
            blob: fileid, revid
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
473
        """
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
474
        found = False
475
        cursor = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,))
476
        for row in cursor.fetchall():
477
            found = True
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
478
            if row[2] is not None:
479
                verifiers = {"testament3-sha1": row[2]}
480
            else:
481
                verifiers = {}
482
            yield ("commit", (row[0], row[1], verifiers))
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
483
        cursor = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,))
484
        for row in cursor.fetchall():
485
            found = True
486
            yield ("blob", row)
487
        cursor = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,))
488
        for row in cursor.fetchall():
489
            found = True
490
            yield ("tree", row)
491
        if not found:
492
            raise KeyError(sha)
0.200.230 by Jelmer Vernooij
Implement sha cache.
493
494
    def revids(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
495
        """List the revision ids known."""
0.248.7 by Jelmer Vernooij
Avoid fetching all sha1s at once.
496
        return (row for (row,) in self.db.execute("select revid from commits"))
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
497
498
    def sha1s(self):
499
        """List the SHA1s."""
500
        for table in ("blobs", "commits", "trees"):
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
501
            for (sha,) in self.db.execute("select sha1 from %s" % table):
502
                yield sha
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
503
504
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
505
class TdbCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
506
    """Cache updater for tdb-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
507
508
    def __init__(self, cache, rev):
509
        self.cache = cache
510
        self.db = cache.idmap.db
511
        self.revid = rev.revision_id
512
        self.parent_revids = rev.parent_ids
513
        self._commit = None
514
        self._entries = []
515
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
516
    def add_object(self, obj, bzr_key_data, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
517
        sha = obj.sha().digest()
518
        if obj.type_name == "commit":
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
519
            self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
520
            assert type(bzr_key_data) is dict, "was %r" % bzr_key_data
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
521
            type_data = (self.revid, obj.tree)
522
            try:
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
523
                type_data += (bzr_key_data["testament3-sha1"],)
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
524
            except KeyError:
525
                pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
526
            self._commit = obj
527
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
528
            if bzr_key_data is None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
529
                return
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
530
            self.db["\0".join(("blob", bzr_key_data[0], bzr_key_data[1]))] = sha
531
            type_data = bzr_key_data
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
532
        elif obj.type_name == "tree":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
533
            if bzr_key_data is None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
534
                return
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
535
            (file_id, ) = bzr_key_data
536
            type_data = (file_id, self.revid)
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
537
        else:
538
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
539
        entry = "\0".join((obj.type_name, ) + type_data) + "\n"
540
        key = "git\0" + sha
541
        try:
542
            oldval = self.db[key]
543
        except KeyError:
544
            self.db[key] = entry
545
        else:
0.261.3 by Jelmer Vernooij
Fix more tests.
546
            if oldval[-1] != "\n":
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
547
                self.db[key] = "".join([oldval, "\n", entry])
548
            else:
549
                self.db[key] = "".join([oldval, entry])
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
550
551
    def finish(self):
552
        if self._commit is None:
553
            raise AssertionError("No commit object added")
554
        return self._commit
555
556
557
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
0.200.479 by Jelmer Vernooij
Version tdb sha map.
558
0.200.1140 by Jelmer Vernooij
Update now that the control dir formats are no longer in __init__.
559
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
560
class TdbGitCacheFormat(BzrGitCacheFormat):
0.254.51 by Jelmer Vernooij
Add some docstrings.
561
    """Cache format for tdb-based caches."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
562
563
    def get_format_string(self):
564
        return 'bzr-git sha map version 3 using tdb\n'
565
566
    def open(self, transport):
567
        try:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
568
            basepath = transport.local_abspath(".").encode(osutils._fs_enc)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
569
        except bzrlib.errors.NotLocalUrl:
570
            basepath = get_cache_dir()
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
571
        assert isinstance(basepath, str)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
572
        try:
0.200.850 by Jelmer Vernooij
Fix tests.
573
            return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
574
        except ImportError:
575
            raise ImportError(
576
                "Unable to open existing bzr-git cache because 'tdb' is not "
577
                "installed.")
578
579
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
580
class TdbGitShaMap(GitShaMap):
581
    """SHA Map that uses a TDB database.
582
583
    Entries:
584
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
585
    "git <sha1>" -> "<type> <type-data1> <type-data2>"
586
    "commit revid" -> "<sha1> <tree-id>"
0.200.477 by Jelmer Vernooij
More tests for sha maps, fix cache misses in tdb.
587
    "tree fileid revid" -> "<sha1>"
588
    "blob fileid revid" -> "<sha1>"
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
589
    """
590
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
591
    TDB_MAP_VERSION = 3
592
    TDB_HASH_SIZE = 50000
593
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
594
    def __init__(self, path=None):
595
        import tdb
596
        self.path = path
597
        if path is None:
598
            self.db = {}
599
        else:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
600
            assert isinstance(path, str)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
601
            if not mapdbs().has_key(path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
602
                mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
603
                                          os.O_RDWR|os.O_CREAT)
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
604
            self.db = mapdbs()[path]
605
        try:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
606
            if int(self.db["version"]) not in (2, 3):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
607
                trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
608
                              self.db["version"], self.TDB_MAP_VERSION)
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
609
                self.db.clear()
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
610
        except KeyError:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
611
            pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
612
        self.db["version"] = str(self.TDB_MAP_VERSION)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
613
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
614
    def start_write_group(self):
615
        """Start writing changes."""
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
616
        self.db.transaction_start()
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
617
618
    def commit_write_group(self):
619
        """Commit any pending changes."""
620
        self.db.transaction_commit()
621
622
    def abort_write_group(self):
623
        """Abort any pending changes."""
624
        self.db.transaction_cancel()
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
625
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
626
    def __repr__(self):
627
        return "%s(%r)" % (self.__class__.__name__, self.path)
628
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
629
    def lookup_commit(self, revid):
0.200.1264 by Jelmer Vernooij
Fix updating cache for single revision - don't consider it an update of the full cache.
630
        try:
631
            return sha_to_hex(self.db["commit\0" + revid][:20])
632
        except KeyError:
633
            raise KeyError("No cache entry for %r" % revid)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
634
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
635
    def lookup_blob_id(self, fileid, revision):
636
        return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
637
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
638
    def lookup_git_sha(self, sha):
639
        """Lookup a Git sha in the database.
640
641
        :param sha: Git object sha
642
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
643
            commit: revid, tree sha
644
            blob: fileid, revid
645
            tree: fileid, revid
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
646
        """
0.200.564 by Jelmer Vernooij
Accept 'binary' shas.
647
        if len(sha) == 40:
648
            sha = hex_to_sha(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
649
        value = self.db["git\0" + sha]
650
        for data in value.splitlines():
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
651
            data = data.split("\0")
652
            if data[0] == "commit":
653
                if len(data) == 3:
654
                    yield (data[0], (data[1], data[2], {}))
655
                else:
656
                    yield (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
0.261.2 by Jelmer Vernooij
Fix cache tests.
657
            elif data[0] in ("tree", "blob"):
658
                yield (data[0], tuple(data[1:]))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
659
            else:
0.261.2 by Jelmer Vernooij
Fix cache tests.
660
                raise AssertionError("unknown type %r" % data[0])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
661
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
662
    def missing_revisions(self, revids):
663
        ret = set()
664
        for revid in revids:
665
            if self.db.get("commit\0" + revid) is None:
666
                ret.add(revid)
667
        return ret
668
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
669
    def revids(self):
670
        """List the revision ids known."""
671
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
672
            if key.startswith("commit\0"):
673
                yield key[7:]
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
674
675
    def sha1s(self):
676
        """List the SHA1s."""
677
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
678
            if key.startswith("git\0"):
679
                yield sha_to_hex(key[4:])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
680
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
681
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
682
class VersionedFilesContentCache(ContentCache):
683
684
    def __init__(self, vf):
685
        self._vf = vf
686
687
    def add(self, obj):
688
        self._vf.insert_record_stream(
689
            [versionedfile.ChunkedContentFactory((obj.id,), [], None,
690
                obj.as_legacy_object_chunks())])
691
692
    def __getitem__(self, sha):
693
        stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
694
        entry = stream.next() 
695
        if entry.storage_kind == 'absent':
696
            raise KeyError(sha)
697
        return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
698
699
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
700
class GitObjectStoreContentCache(ContentCache):
701
702
    def __init__(self, store):
703
        self.store = store
704
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
705
    def add_multi(self, objs):
706
        self.store.add_objects(objs)
707
708
    def add(self, obj, path):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
709
        self.store.add_object(obj)
710
711
    def __getitem__(self, sha):
712
        return self.store[sha]
713
714
0.254.46 by Jelmer Vernooij
Merge trunk.
715
class IndexCacheUpdater(CacheUpdater):
716
717
    def __init__(self, cache, rev):
718
        self.cache = cache
719
        self.revid = rev.revision_id
720
        self.parent_revids = rev.parent_ids
721
        self._commit = None
722
        self._entries = []
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
723
        self._cache_objs = set()
0.254.46 by Jelmer Vernooij
Merge trunk.
724
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
725
    def add_object(self, obj, bzr_key_data, path):
0.254.46 by Jelmer Vernooij
Merge trunk.
726
        if obj.type_name == "commit":
727
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
728
            assert type(bzr_key_data) is dict
0.254.47 by Jelmer Vernooij
Merge trunk.
729
            self.cache.idmap._add_git_sha(obj.id, "commit",
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
730
                (self.revid, obj.tree, bzr_key_data))
0.254.47 by Jelmer Vernooij
Merge trunk.
731
            self.cache.idmap._add_node(("commit", self.revid, "X"),
0.254.46 by Jelmer Vernooij
Merge trunk.
732
                " ".join((obj.id, obj.tree)))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
733
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
734
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
735
            self.cache.idmap._add_git_sha(obj.id, "blob", bzr_key_data)
736
            self.cache.idmap._add_node(("blob", bzr_key_data[0],
737
                bzr_key_data[1]), obj.id)
0.254.46 by Jelmer Vernooij
Merge trunk.
738
        elif obj.type_name == "tree":
0.254.47 by Jelmer Vernooij
Merge trunk.
739
            self.cache.idmap._add_git_sha(obj.id, "tree",
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
740
                (bzr_key_data[0], self.revid))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
741
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
742
        else:
743
            raise AssertionError
744
745
    def finish(self):
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
746
        self.cache.content_cache.add_multi(self._cache_objs)
0.254.46 by Jelmer Vernooij
Merge trunk.
747
        return self._commit
748
749
750
class IndexBzrGitCache(BzrGitCache):
751
752
    def __init__(self, transport=None):
753
        mapper = versionedfile.ConstantMapper("trees")
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
754
        shamap = IndexGitShaMap(transport.clone('index'))
755
        #trees_store = knit.make_file_factory(True, mapper)(transport)
756
        #content_cache = VersionedFilesContentCache(trees_store)
757
        from bzrlib.plugins.git.transportgit import TransportObjectStore
758
        store = TransportObjectStore(transport.clone('objects'))
759
        content_cache = GitObjectStoreContentCache(store)
760
        super(IndexBzrGitCache, self).__init__(shamap, content_cache,
0.254.47 by Jelmer Vernooij
Merge trunk.
761
                IndexCacheUpdater)
0.254.46 by Jelmer Vernooij
Merge trunk.
762
763
0.254.43 by Jelmer Vernooij
Merge trunk.
764
class IndexGitCacheFormat(BzrGitCacheFormat):
765
766
    def get_format_string(self):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
767
        return 'bzr-git sha map with git object cache version 1\n'
0.254.43 by Jelmer Vernooij
Merge trunk.
768
769
    def initialize(self, transport):
770
        super(IndexGitCacheFormat, self).initialize(transport)
771
        transport.mkdir('index')
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
772
        transport.mkdir('objects')
773
        from bzrlib.plugins.git.transportgit import TransportObjectStore
774
        TransportObjectStore.init(transport.clone('objects'))
0.254.43 by Jelmer Vernooij
Merge trunk.
775
776
    def open(self, transport):
0.254.46 by Jelmer Vernooij
Merge trunk.
777
        return IndexBzrGitCache(transport)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
778
779
780
class IndexGitShaMap(GitShaMap):
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
781
    """SHA Map that uses the Bazaar APIs to store a cache.
782
783
    BTree Index file with the following contents:
784
785
    ("git", <sha1>) -> "<type> <type-data1> <type-data2>"
786
    ("commit", <revid>) -> "<sha1> <tree-id>"
0.254.36 by Jelmer Vernooij
Merge trunk.
787
    ("blob", <fileid>, <revid>) -> <sha1>
788
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
789
    """
790
791
    def __init__(self, transport=None):
792
        if transport is None:
0.254.43 by Jelmer Vernooij
Merge trunk.
793
            self._transport = None
0.254.36 by Jelmer Vernooij
Merge trunk.
794
            self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
0.254.2 by jelmer
use btree indexes
795
            self._builder = self._index
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
796
        else:
0.254.30 by Jelmer Vernooij
Move index to separate dir.
797
            self._builder = None
0.254.43 by Jelmer Vernooij
Merge trunk.
798
            self._transport = transport
0.254.2 by jelmer
use btree indexes
799
            self._index = _mod_index.CombinedGraphIndex([])
0.254.43 by Jelmer Vernooij
Merge trunk.
800
            for name in self._transport.list_dir("."):
0.254.2 by jelmer
use btree indexes
801
                if not name.endswith(".rix"):
802
                    continue
0.254.43 by Jelmer Vernooij
Merge trunk.
803
                x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
804
                    self._transport.stat(name).st_size)
0.254.2 by jelmer
use btree indexes
805
                self._index.insert_index(0, x)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
806
807
    @classmethod
808
    def from_repository(cls, repository):
809
        transport = getattr(repository, "_transport", None)
810
        if transport is not None:
0.254.2 by jelmer
use btree indexes
811
            try:
812
                transport.mkdir('git')
813
            except bzrlib.errors.FileExists:
814
                pass
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
815
            return cls(transport.clone('git'))
816
        from bzrlib.transport import get_transport
817
        return cls(get_transport(get_cache_dir()))
818
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
819
    def __repr__(self):
820
        if self._transport is not None:
821
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
822
        else:
823
            return "%s()" % (self.__class__.__name__)
824
0.254.3 by John Arbash Meinel
Add repack function.
825
    def repack(self):
826
        assert self._builder is None
827
        self.start_write_group()
828
        for _, key, value in self._index.iter_all_entries():
829
            self._builder.add_node(key, value)
830
        to_remove = []
0.254.43 by Jelmer Vernooij
Merge trunk.
831
        for name in self._transport.list_dir('.'):
0.254.3 by John Arbash Meinel
Add repack function.
832
            if name.endswith('.rix'):
833
                to_remove.append(name)
834
        self.commit_write_group()
835
        del self._index.indices[1:]
836
        for name in to_remove:
0.254.43 by Jelmer Vernooij
Merge trunk.
837
            self._transport.rename(name, name + '.old')
0.254.3 by John Arbash Meinel
Add repack function.
838
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
839
    def start_write_group(self):
0.254.2 by jelmer
use btree indexes
840
        assert self._builder is None
0.254.36 by Jelmer Vernooij
Merge trunk.
841
        self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
842
        self._name = osutils.sha()
843
844
    def commit_write_group(self):
0.254.2 by jelmer
use btree indexes
845
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
846
        stream = self._builder.finish()
0.254.2 by jelmer
use btree indexes
847
        name = self._name.hexdigest() + ".rix"
0.254.43 by Jelmer Vernooij
Merge trunk.
848
        size = self._transport.put_file(name, stream)
849
        index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
850
        self._index.insert_index(0, index)
851
        self._builder = None
852
        self._name = None
853
854
    def abort_write_group(self):
0.254.2 by jelmer
use btree indexes
855
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
856
        self._builder = None
857
        self._name = None
858
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
859
    def _add_node(self, key, value):
860
        try:
861
            self._builder.add_node(key, value)
862
        except bzrlib.errors.BadIndexDuplicateKey:
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
863
            # Multiple bzr objects can have the same contents
864
            return True
865
        else:
866
            return False
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
867
0.254.2 by jelmer
use btree indexes
868
    def _get_entry(self, key):
869
        entries = self._index.iter_entries([key])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
870
        try:
0.254.2 by jelmer
use btree indexes
871
            return entries.next()[2]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
872
        except StopIteration:
0.254.2 by jelmer
use btree indexes
873
            if self._builder is None:
874
                raise KeyError
875
            entries = self._builder.iter_entries([key])
876
            try:
877
                return entries.next()[2]
878
            except StopIteration:
879
                raise KeyError
880
0.261.2 by Jelmer Vernooij
Fix cache tests.
881
    def _iter_entries_prefix(self, prefix):
0.254.2 by jelmer
use btree indexes
882
        for entry in self._index.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
883
            yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
884
        if self._builder is not None:
885
            for entry in self._builder.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
886
                yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
887
888
    def lookup_commit(self, revid):
0.254.36 by Jelmer Vernooij
Merge trunk.
889
        return self._get_entry(("commit", revid, "X"))[:40]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
890
0.254.33 by Jelmer Vernooij
Merge trunk.
891
    def _add_git_sha(self, hexsha, type, type_data):
0.254.2 by jelmer
use btree indexes
892
        if hexsha is not None:
893
            self._name.update(hexsha)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
894
            if type == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
895
                td = (type_data[0], type_data[1])
896
                try:
897
                    td += (type_data[2]["testament3-sha1"],)
898
                except KeyError:
899
                    pass
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
900
            else:
901
                td = type_data
902
            self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
0.254.2 by jelmer
use btree indexes
903
        else:
904
            # This object is not represented in Git - perhaps an empty
905
            # directory?
906
            self._name.update(type + " ".join(type_data))
0.254.33 by Jelmer Vernooij
Merge trunk.
907
0.254.42 by Jelmer Vernooij
Merge trunk.
908
    def lookup_blob_id(self, fileid, revision):
909
        return self._get_entry(("blob", fileid, revision))
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
910
911
    def lookup_git_sha(self, sha):
912
        if len(sha) == 20:
913
            sha = sha_to_hex(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
914
        found = False
915
        for key, value in self._iter_entries_prefix(("git", sha, None)):
916
            found = True
917
            data = value.split(" ", 3)
918
            if data[0] == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
919
                if data[3]:
920
                    verifiers = {"testament3-sha1": data[3]}
921
                else:
922
                    verifiers = {}
923
                yield ("commit", (data[1], data[2], verifiers))
0.261.2 by Jelmer Vernooij
Fix cache tests.
924
            else:
925
                yield (data[0], tuple(data[1:]))
926
        if not found:
927
            raise KeyError(sha)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
928
929
    def revids(self):
930
        """List the revision ids known."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
931
        for key, value in self._iter_entries_prefix(("commit", None, None)):
0.254.2 by jelmer
use btree indexes
932
            yield key[1]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
933
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
934
    def missing_revisions(self, revids):
935
        """Return set of all the revisions that are not present."""
936
        missing_revids = set(revids)
937
        for _, key, value in self._index.iter_entries((
0.254.37 by Jelmer Vernooij
merge trunk
938
            ("commit", revid, "X") for revid in revids)):
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
939
            missing_revids.remove(key[1])
940
        return missing_revids
941
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
942
    def sha1s(self):
943
        """List the SHA1s."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
944
        for key, value in self._iter_entries_prefix(("git", None, None)):
0.254.2 by jelmer
use btree indexes
945
            yield key[1]
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
946
947
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
948
formats = registry.Registry()
949
formats.register(TdbGitCacheFormat().get_format_string(),
950
    TdbGitCacheFormat())
951
formats.register(SqliteGitCacheFormat().get_format_string(),
952
    SqliteGitCacheFormat())
0.254.43 by Jelmer Vernooij
Merge trunk.
953
formats.register(IndexGitCacheFormat().get_format_string(),
954
    IndexGitCacheFormat())
0.200.951 by Jelmer Vernooij
merge support for git object store-based caching mechanism.
955
# In the future, this will become the default:
956
# formats.register('default', IndexGitCacheFormat())
957
try:
958
    import tdb
959
except ImportError:
960
    formats.register('default', SqliteGitCacheFormat())
961
else:
962
    formats.register('default', TdbGitCacheFormat())
963
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
964
965
966
def migrate_ancient_formats(repo_transport):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
967
    # Migrate older cache formats
968
    repo_transport = remove_readonly_transport_decorator(repo_transport)
969
    has_sqlite = repo_transport.has("git.db")
970
    has_tdb = repo_transport.has("git.tdb")
971
    if not has_sqlite or has_tdb:
972
        return
973
    try:
974
        repo_transport.mkdir("git")
975
    except bzrlib.errors.FileExists:
976
        return
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
977
    # Prefer migrating git.db over git.tdb, since the latter may not 
978
    # be openable on some platforms.
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
979
    if has_sqlite:
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
980
        SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
981
        repo_transport.rename("git.db", "git/idmap.db")
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
982
    elif has_tdb:
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
983
        TdbGitCacheFormat().initialize(repo_transport.clone("git"))
984
        repo_transport.rename("git.tdb", "git/idmap.tdb")
985
986
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
987
def remove_readonly_transport_decorator(transport):
988
    if transport.is_readonly():
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
989
        try:
990
            return transport._decorated
991
        except AttributeError:
992
            raise bzrlib.errors.ReadOnlyError(transport)
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
993
    return transport
994
995
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
996
def from_repository(repository):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
997
    """Open a cache file for a repository.
998
999
    If the repository is remote and there is no transport available from it
1000
    this will use a local file in the users cache directory
1001
    (typically ~/.cache/bazaar/git/)
1002
1003
    :param repository: A repository object
1004
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
1005
    repo_transport = getattr(repository, "_transport", None)
1006
    if repo_transport is not None:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
1007
        try:
1008
            migrate_ancient_formats(repo_transport)
1009
        except bzrlib.errors.ReadOnlyError:
1010
            pass # Not much we can do
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
1011
    return BzrGitCacheFormat.from_repository(repository)