/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.200.1594 by Jelmer Vernooij
Use absolute_import everywhere.
19
from __future__ import absolute_import
20
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
21
from dulwich.objects import (
22
    sha_to_hex,
23
    hex_to_sha,
24
    )
0.200.292 by Jelmer Vernooij
Fix formatting.
25
import os
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
26
import threading
0.200.292 by Jelmer Vernooij
Fix formatting.
27
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
28
from dulwich.objects import (
29
    ShaFile,
30
    )
31
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
32
from ... import (
33
    errors as bzr_errors,
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
34
    osutils,
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
35
    registry,
0.200.528 by Jelmer Vernooij
Fix import.
36
    trace,
0.200.1648 by Jelmer Vernooij
Fix compatibility with newer versions of breezy.
37
    )
38
from ...bzr import (
39
    btree_index as _mod_btree_index,
40
    index as _mod_index,
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
41
    versionedfile,
0.200.528 by Jelmer Vernooij
Fix import.
42
    )
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
43
from ...transport import (
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
44
    get_transport,
45
    )
0.200.230 by Jelmer Vernooij
Implement sha cache.
46
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
47
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
48
def get_cache_dir():
49
    try:
50
        from xdg.BaseDirectory import xdg_cache_home
51
    except ImportError:
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
52
        from ...config import config_dir
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
53
        ret = os.path.join(config_dir(), "git")
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
54
    else:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
55
        ret = os.path.join(xdg_cache_home, "bazaar", "git")
56
    if not os.path.isdir(ret):
57
        os.makedirs(ret)
58
    return ret
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
59
60
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
61
def get_remote_cache_transport(repository):
0.200.1027 by Jelmer Vernooij
mark remote git directories as not supporting working trees.
62
    """Retrieve the transport to use when accessing (unwritable) remote 
63
    repositories.
64
    """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
65
    uuid = getattr(repository, "uuid", None)
66
    if uuid is None:
67
        path = get_cache_dir()
68
    else:
69
        path = os.path.join(get_cache_dir(), uuid)
70
        if not os.path.isdir(path):
71
            os.mkdir(path)
72
    return get_transport(path)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
73
74
0.200.228 by Jelmer Vernooij
Split out map.
75
def check_pysqlite_version(sqlite3):
76
    """Check that sqlite library is compatible.
77
78
    """
0.200.675 by Jelmer Vernooij
Fix formatting.
79
    if (sqlite3.sqlite_version_info[0] < 3 or
80
            (sqlite3.sqlite_version_info[0] == 3 and
0.200.228 by Jelmer Vernooij
Split out map.
81
             sqlite3.sqlite_version_info[1] < 3)):
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
82
        trace.warning('Needs at least sqlite 3.3.x')
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
83
        raise bzr_errors.BzrError("incompatible sqlite library")
0.200.228 by Jelmer Vernooij
Split out map.
84
85
try:
86
    try:
87
        import sqlite3
88
        check_pysqlite_version(sqlite3)
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
89
    except (ImportError, bzr_errors.BzrError), e:
0.200.228 by Jelmer Vernooij
Split out map.
90
        from pysqlite2 import dbapi2 as sqlite3
91
        check_pysqlite_version(sqlite3)
92
except:
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
93
    trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
0.200.228 by Jelmer Vernooij
Split out map.
94
            'module')
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
95
    raise bzr_errors.BzrError("missing sqlite library")
0.200.228 by Jelmer Vernooij
Split out map.
96
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
97
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
98
_mapdbs = threading.local()
99
def mapdbs():
100
    """Get a cache for this thread's db connections."""
101
    try:
102
        return _mapdbs.cache
103
    except AttributeError:
104
        _mapdbs.cache = {}
105
        return _mapdbs.cache
106
107
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
108
class GitShaMap(object):
109
    """Git<->Bzr revision id mapping database."""
110
111
    def lookup_git_sha(self, sha):
112
        """Lookup a Git sha in the database.
113
        :param sha: Git object sha
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
114
        :return: list with (type, type_data) tuples with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
115
            commit: revid, tree_sha, verifiers
116
            blob: fileid, revid
117
            tree: fileid, revid
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
118
        """
119
        raise NotImplementedError(self.lookup_git_sha)
120
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
121
    def lookup_blob_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
122
        """Retrieve a Git blob SHA by file id.
123
124
        :param file_id: File id of the file/symlink
0.200.806 by Jelmer Vernooij
Make revision_hint mandatory.
125
        :param revision: revision in which the file was last changed.
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
126
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
127
        raise NotImplementedError(self.lookup_blob_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
128
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
129
    def lookup_tree_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
130
        """Retrieve a Git tree SHA by file id.
131
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
132
        raise NotImplementedError(self.lookup_tree_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
133
0.200.1039 by Jelmer Vernooij
Add stub.
134
    def lookup_commit(self, revid):
135
        """Retrieve a Git commit SHA by Bazaar revision id.
136
        """
137
        raise NotImplementedError(self.lookup_commit)
138
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
139
    def revids(self):
140
        """List the revision ids known."""
141
        raise NotImplementedError(self.revids)
142
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
143
    def missing_revisions(self, revids):
144
        """Return set of all the revisions that are not present."""
145
        present_revids = set(self.revids())
146
        if not isinstance(revids, set):
147
            revids = set(revids)
148
        return revids - present_revids
149
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
150
    def sha1s(self):
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
151
        """List the SHA1s."""
152
        raise NotImplementedError(self.sha1s)
153
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
154
    def start_write_group(self):
155
        """Start writing changes."""
156
157
    def commit_write_group(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
158
        """Commit any pending changes."""
159
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
160
    def abort_write_group(self):
161
        """Abort any pending changes."""
162
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
163
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
164
class ContentCache(object):
165
    """Object that can cache Git objects."""
166
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
167
    def add(self, object):
168
        """Add an object."""
169
        raise NotImplementedError(self.add)
170
171
    def add_multi(self, objects):
172
        """Add multiple objects."""
173
        for obj in objects:
174
            self.add(obj)
175
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
176
    def __getitem__(self, sha):
177
        """Retrieve an item, by SHA."""
178
        raise NotImplementedError(self.__getitem__)
179
180
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
181
class BzrGitCacheFormat(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
182
    """Bazaar-Git Cache Format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
183
184
    def get_format_string(self):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
185
        """Return a single-line unique format string for this cache format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
186
        raise NotImplementedError(self.get_format_string)
187
188
    def open(self, transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
189
        """Open this format on a transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
190
        raise NotImplementedError(self.open)
191
192
    def initialize(self, transport):
0.254.51 by Jelmer Vernooij
Add some docstrings.
193
        """Create a new instance of this cache format at transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
194
        transport.put_bytes('format', self.get_format_string())
195
196
    @classmethod
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
197
    def from_transport(self, transport):
198
        """Open a cache file present on a transport, or initialize one.
199
200
        :param transport: Transport to use
201
        :return: A BzrGitCache instance
202
        """
203
        try:
204
            format_name = transport.get_bytes('format')
205
            format = formats.get(format_name)
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
206
        except bzr_errors.NoSuchFile:
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
207
            format = formats.get('default')
208
            format.initialize(transport)
209
        return format.open(transport)
210
211
    @classmethod
212
    def from_repository(cls, repository):
213
        """Open a cache file for a repository.
214
215
        This will use the repository's transport to store the cache file, or
216
        use the users global cache directory if the repository has no 
217
        transport associated with it.
218
219
        :param repository: Repository to open the cache for
220
        :return: A `BzrGitCache`
221
        """
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
222
        from ...transport.local import LocalTransport
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
223
        repo_transport = getattr(repository, "_transport", None)
0.200.1414 by Jelmer Vernooij
Fix pulling into bound branches.
224
        if (repo_transport is not None and
225
            isinstance(repo_transport, LocalTransport)):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
226
            # Even if we don't write to this repo, we should be able
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
227
            # to update its cache.
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
228
            try:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
229
                repo_transport = remove_readonly_transport_decorator(repo_transport)
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
230
            except bzr_errors.ReadOnlyError:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
231
                transport = None
232
            else:
233
                try:
234
                    repo_transport.mkdir('git')
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
235
                except bzr_errors.FileExists:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
236
                    pass
237
                transport = repo_transport.clone('git')
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
238
        else:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
239
            transport = None
240
        if transport is None:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
241
            transport = get_remote_cache_transport(repository)
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
242
        return cls.from_transport(transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
243
244
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
245
class CacheUpdater(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
246
    """Base class for objects that can update a bzr-git cache."""
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
247
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
248
    def add_object(self, obj, bzr_key_data, path):
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
249
        """Add an object.
250
251
        :param obj: Object type ("commit", "blob" or "tree")
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
252
        :param bzr_key_data: bzr key store data or testament_sha in case
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
253
            of commit
254
        :param path: Path of the object (optional)
255
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
256
        raise NotImplementedError(self.add_object)
257
258
    def finish(self):
259
        raise NotImplementedError(self.finish)
260
261
262
class BzrGitCache(object):
263
    """Caching backend."""
264
265
    def __init__(self, idmap, content_cache, cache_updater_klass):
266
        self.idmap = idmap
267
        self.content_cache = content_cache
268
        self._cache_updater_klass = cache_updater_klass
269
270
    def get_updater(self, rev):
0.254.51 by Jelmer Vernooij
Add some docstrings.
271
        """Update an object that implements the CacheUpdater interface for 
272
        updating this cache.
273
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
274
        return self._cache_updater_klass(self, rev)
275
276
277
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
278
279
280
class DictCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
281
    """Cache updater for dict-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
282
283
    def __init__(self, cache, rev):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
284
        self.cache = cache
285
        self.revid = rev.revision_id
286
        self.parent_revids = rev.parent_ids
287
        self._commit = None
288
        self._entries = []
289
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
290
    def add_object(self, obj, bzr_key_data, path):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
291
        if obj.type_name == "commit":
292
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
293
            assert type(bzr_key_data) is dict
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
294
            key = self.revid
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
295
            type_data = (self.revid, self._commit.tree, bzr_key_data)
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
296
            self.cache.idmap._by_revid[self.revid] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
297
        elif obj.type_name in ("blob", "tree"):
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
298
            if bzr_key_data is not None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
299
                if obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
300
                    revision = bzr_key_data[1]
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
301
                else:
302
                    revision = self.revid
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
303
                key = type_data = (bzr_key_data[0], revision)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
304
                self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
305
        else:
306
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
307
        entry = (obj.type_name, type_data)
308
        self.cache.idmap._by_sha.setdefault(obj.id, {})[key] = entry
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
309
310
    def finish(self):
311
        if self._commit is None:
312
            raise AssertionError("No commit object added")
313
        return self._commit
314
315
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
316
class DictGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
317
    """Git SHA map that uses a dictionary."""
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
318
319
    def __init__(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
320
        self._by_sha = {}
321
        self._by_fileid = {}
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
322
        self._by_revid = {}
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
323
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
324
    def lookup_blob_id(self, fileid, revision):
325
        return self._by_fileid[revision][fileid]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
326
327
    def lookup_git_sha(self, sha):
0.261.2 by Jelmer Vernooij
Fix cache tests.
328
        for entry in self._by_sha[sha].itervalues():
329
            yield entry
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
330
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
331
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
332
        return self._by_fileid[revision][fileid]
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
333
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
334
    def lookup_commit(self, revid):
335
        return self._by_revid[revid]
336
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
337
    def revids(self):
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
338
        for key, entries in self._by_sha.iteritems():
339
            for (type, type_data) in entries.values():
340
                if type == "commit":
341
                    yield type_data[0]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
342
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
343
    def sha1s(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
344
        return self._by_sha.iterkeys()
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
345
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
346
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
347
class SqliteCacheUpdater(CacheUpdater):
348
349
    def __init__(self, cache, rev):
350
        self.cache = cache
0.200.850 by Jelmer Vernooij
Fix tests.
351
        self.db = self.cache.idmap.db
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
352
        self.revid = rev.revision_id
353
        self._commit = None
354
        self._trees = []
355
        self._blobs = []
356
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
357
    def add_object(self, obj, bzr_key_data, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
358
        if obj.type_name == "commit":
359
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
360
            assert type(bzr_key_data) is dict
361
            self._testament3_sha1 = bzr_key_data.get("testament3-sha1")
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
362
        elif obj.type_name == "tree":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
363
            if bzr_key_data is not None:
364
                self._trees.append((obj.id, bzr_key_data[0], self.revid))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
365
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
366
            if bzr_key_data is not None:
367
                self._blobs.append((obj.id, bzr_key_data[0], bzr_key_data[1]))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
368
        else:
369
            raise AssertionError
370
371
    def finish(self):
372
        if self._commit is None:
373
            raise AssertionError("No commit object added")
0.200.850 by Jelmer Vernooij
Fix tests.
374
        self.db.executemany(
375
            "replace into trees (sha1, fileid, revid) values (?, ?, ?)",
376
            self._trees)
377
        self.db.executemany(
378
            "replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
379
            self._blobs)
380
        self.db.execute(
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
381
            "replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
382
            (self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
383
        return self._commit
384
385
386
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
387
388
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
389
class SqliteGitCacheFormat(BzrGitCacheFormat):
390
391
    def get_format_string(self):
392
        return 'bzr-git sha map version 1 using sqlite\n'
393
394
    def open(self, transport):
395
        try:
396
            basepath = transport.local_abspath(".")
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
397
        except bzr_errors.NotLocalUrl:
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
398
            basepath = get_cache_dir()
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
399
        return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
400
401
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
402
class SqliteGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
403
    """Bazaar GIT Sha map that uses a sqlite database for storage."""
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
404
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
405
    def __init__(self, path=None):
406
        self.path = path
407
        if path is None:
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
408
            self.db = sqlite3.connect(":memory:")
409
        else:
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
410
            if not mapdbs().has_key(path):
411
                mapdbs()[path] = sqlite3.connect(path)
0.200.675 by Jelmer Vernooij
Fix formatting.
412
            self.db = mapdbs()[path]
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
413
        self.db.text_factory = str
0.200.230 by Jelmer Vernooij
Implement sha cache.
414
        self.db.executescript("""
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
415
        create table if not exists commits(
416
            sha1 text not null check(length(sha1) == 40),
417
            revid text not null,
418
            tree_sha text not null check(length(tree_sha) == 40)
419
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
420
        create index if not exists commit_sha1 on commits(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
421
        create unique index if not exists commit_revid on commits(revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
422
        create table if not exists blobs(
423
            sha1 text not null check(length(sha1) == 40),
424
            fileid text not null,
425
            revid text not null
426
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
427
        create index if not exists blobs_sha1 on blobs(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
428
        create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
429
        create table if not exists trees(
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
430
            sha1 text unique not null check(length(sha1) == 40),
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
431
            fileid text not null,
432
            revid text not null
433
        );
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
434
        create unique index if not exists trees_sha1 on trees(sha1);
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
435
        create unique index if not exists trees_fileid_revid on trees(fileid, revid);
0.200.230 by Jelmer Vernooij
Implement sha cache.
436
""")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
437
        try:
438
            self.db.executescript(
439
                "ALTER TABLE commits ADD testament3_sha1 TEXT;")
440
        except sqlite3.OperationalError:
441
            pass # Column already exists.
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
442
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
443
    def __repr__(self):
444
        return "%s(%r)" % (self.__class__.__name__, self.path)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
445
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
446
    def lookup_commit(self, revid):
0.254.51 by Jelmer Vernooij
Add some docstrings.
447
        cursor = self.db.execute("select sha1 from commits where revid = ?", 
448
            (revid,))
449
        row = cursor.fetchone()
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
450
        if row is not None:
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
451
            return row[0]
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
452
        raise KeyError
0.200.231 by Jelmer Vernooij
Partially fix pull.
453
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
454
    def commit_write_group(self):
0.200.232 by Jelmer Vernooij
Fix pull from remote branches.
455
        self.db.commit()
456
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
457
    def lookup_blob_id(self, fileid, revision):
458
        row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
459
        if row is not None:
460
            return row[0]
461
        raise KeyError(fileid)
462
463
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
464
        row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
465
        if row is not None:
466
            return row[0]
467
        raise KeyError(fileid)
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
468
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
469
    def lookup_git_sha(self, sha):
470
        """Lookup a Git sha in the database.
471
472
        :param sha: Git object sha
473
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
474
            commit: revid, tree sha, verifiers
475
            tree: fileid, revid
476
            blob: fileid, revid
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
477
        """
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
478
        found = False
479
        cursor = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,))
480
        for row in cursor.fetchall():
481
            found = True
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
482
            if row[2] is not None:
483
                verifiers = {"testament3-sha1": row[2]}
484
            else:
485
                verifiers = {}
486
            yield ("commit", (row[0], row[1], verifiers))
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
487
        cursor = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,))
488
        for row in cursor.fetchall():
489
            found = True
490
            yield ("blob", row)
491
        cursor = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,))
492
        for row in cursor.fetchall():
493
            found = True
494
            yield ("tree", row)
495
        if not found:
496
            raise KeyError(sha)
0.200.230 by Jelmer Vernooij
Implement sha cache.
497
498
    def revids(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
499
        """List the revision ids known."""
0.248.7 by Jelmer Vernooij
Avoid fetching all sha1s at once.
500
        return (row for (row,) in self.db.execute("select revid from commits"))
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
501
502
    def sha1s(self):
503
        """List the SHA1s."""
504
        for table in ("blobs", "commits", "trees"):
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
505
            for (sha,) in self.db.execute("select sha1 from %s" % table):
506
                yield sha
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
507
508
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
509
class TdbCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
510
    """Cache updater for tdb-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
511
512
    def __init__(self, cache, rev):
513
        self.cache = cache
514
        self.db = cache.idmap.db
515
        self.revid = rev.revision_id
516
        self.parent_revids = rev.parent_ids
517
        self._commit = None
518
        self._entries = []
519
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
520
    def add_object(self, obj, bzr_key_data, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
521
        sha = obj.sha().digest()
522
        if obj.type_name == "commit":
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
523
            self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
524
            assert type(bzr_key_data) is dict, "was %r" % bzr_key_data
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
525
            type_data = (self.revid, obj.tree)
526
            try:
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
527
                type_data += (bzr_key_data["testament3-sha1"],)
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
528
            except KeyError:
529
                pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
530
            self._commit = obj
531
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
532
            if bzr_key_data is None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
533
                return
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
534
            self.db["\0".join(("blob", bzr_key_data[0], bzr_key_data[1]))] = sha
535
            type_data = bzr_key_data
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
536
        elif obj.type_name == "tree":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
537
            if bzr_key_data is None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
538
                return
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
539
            (file_id, ) = bzr_key_data
540
            type_data = (file_id, self.revid)
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
541
        else:
542
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
543
        entry = "\0".join((obj.type_name, ) + type_data) + "\n"
544
        key = "git\0" + sha
545
        try:
546
            oldval = self.db[key]
547
        except KeyError:
548
            self.db[key] = entry
549
        else:
0.261.3 by Jelmer Vernooij
Fix more tests.
550
            if oldval[-1] != "\n":
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
551
                self.db[key] = "".join([oldval, "\n", entry])
552
            else:
553
                self.db[key] = "".join([oldval, entry])
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
554
555
    def finish(self):
556
        if self._commit is None:
557
            raise AssertionError("No commit object added")
558
        return self._commit
559
560
561
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
0.200.479 by Jelmer Vernooij
Version tdb sha map.
562
0.200.1140 by Jelmer Vernooij
Update now that the control dir formats are no longer in __init__.
563
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
564
class TdbGitCacheFormat(BzrGitCacheFormat):
0.254.51 by Jelmer Vernooij
Add some docstrings.
565
    """Cache format for tdb-based caches."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
566
567
    def get_format_string(self):
568
        return 'bzr-git sha map version 3 using tdb\n'
569
570
    def open(self, transport):
571
        try:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
572
            basepath = transport.local_abspath(".").encode(osutils._fs_enc)
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
573
        except bzr_errors.NotLocalUrl:
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
574
            basepath = get_cache_dir()
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
575
        assert isinstance(basepath, str)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
576
        try:
0.200.850 by Jelmer Vernooij
Fix tests.
577
            return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
578
        except ImportError:
579
            raise ImportError(
580
                "Unable to open existing bzr-git cache because 'tdb' is not "
581
                "installed.")
582
583
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
584
class TdbGitShaMap(GitShaMap):
585
    """SHA Map that uses a TDB database.
586
587
    Entries:
588
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
589
    "git <sha1>" -> "<type> <type-data1> <type-data2>"
590
    "commit revid" -> "<sha1> <tree-id>"
0.200.477 by Jelmer Vernooij
More tests for sha maps, fix cache misses in tdb.
591
    "tree fileid revid" -> "<sha1>"
592
    "blob fileid revid" -> "<sha1>"
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
593
    """
594
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
595
    TDB_MAP_VERSION = 3
596
    TDB_HASH_SIZE = 50000
597
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
598
    def __init__(self, path=None):
599
        import tdb
600
        self.path = path
601
        if path is None:
602
            self.db = {}
603
        else:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
604
            assert isinstance(path, str)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
605
            if not mapdbs().has_key(path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
606
                mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
607
                                          os.O_RDWR|os.O_CREAT)
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
608
            self.db = mapdbs()[path]
609
        try:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
610
            if int(self.db["version"]) not in (2, 3):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
611
                trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
612
                              self.db["version"], self.TDB_MAP_VERSION)
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
613
                self.db.clear()
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
614
        except KeyError:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
615
            pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
616
        self.db["version"] = str(self.TDB_MAP_VERSION)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
617
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
618
    def start_write_group(self):
619
        """Start writing changes."""
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
620
        self.db.transaction_start()
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
621
622
    def commit_write_group(self):
623
        """Commit any pending changes."""
624
        self.db.transaction_commit()
625
626
    def abort_write_group(self):
627
        """Abort any pending changes."""
628
        self.db.transaction_cancel()
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
629
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
630
    def __repr__(self):
631
        return "%s(%r)" % (self.__class__.__name__, self.path)
632
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
633
    def lookup_commit(self, revid):
0.200.1264 by Jelmer Vernooij
Fix updating cache for single revision - don't consider it an update of the full cache.
634
        try:
635
            return sha_to_hex(self.db["commit\0" + revid][:20])
636
        except KeyError:
637
            raise KeyError("No cache entry for %r" % revid)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
638
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
639
    def lookup_blob_id(self, fileid, revision):
640
        return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
641
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
642
    def lookup_git_sha(self, sha):
643
        """Lookup a Git sha in the database.
644
645
        :param sha: Git object sha
646
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
647
            commit: revid, tree sha
648
            blob: fileid, revid
649
            tree: fileid, revid
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
650
        """
0.200.564 by Jelmer Vernooij
Accept 'binary' shas.
651
        if len(sha) == 40:
652
            sha = hex_to_sha(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
653
        value = self.db["git\0" + sha]
654
        for data in value.splitlines():
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
655
            data = data.split("\0")
656
            if data[0] == "commit":
657
                if len(data) == 3:
658
                    yield (data[0], (data[1], data[2], {}))
659
                else:
660
                    yield (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
0.261.2 by Jelmer Vernooij
Fix cache tests.
661
            elif data[0] in ("tree", "blob"):
662
                yield (data[0], tuple(data[1:]))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
663
            else:
0.261.2 by Jelmer Vernooij
Fix cache tests.
664
                raise AssertionError("unknown type %r" % data[0])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
665
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
666
    def missing_revisions(self, revids):
667
        ret = set()
668
        for revid in revids:
669
            if self.db.get("commit\0" + revid) is None:
670
                ret.add(revid)
671
        return ret
672
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
673
    def revids(self):
674
        """List the revision ids known."""
675
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
676
            if key.startswith("commit\0"):
677
                yield key[7:]
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
678
679
    def sha1s(self):
680
        """List the SHA1s."""
681
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
682
            if key.startswith("git\0"):
683
                yield sha_to_hex(key[4:])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
684
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
685
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
686
class VersionedFilesContentCache(ContentCache):
687
688
    def __init__(self, vf):
689
        self._vf = vf
690
691
    def add(self, obj):
692
        self._vf.insert_record_stream(
693
            [versionedfile.ChunkedContentFactory((obj.id,), [], None,
694
                obj.as_legacy_object_chunks())])
695
696
    def __getitem__(self, sha):
697
        stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
698
        entry = stream.next() 
699
        if entry.storage_kind == 'absent':
700
            raise KeyError(sha)
701
        return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
702
703
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
704
class GitObjectStoreContentCache(ContentCache):
705
706
    def __init__(self, store):
707
        self.store = store
708
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
709
    def add_multi(self, objs):
710
        self.store.add_objects(objs)
711
712
    def add(self, obj, path):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
713
        self.store.add_object(obj)
714
715
    def __getitem__(self, sha):
716
        return self.store[sha]
717
718
0.254.46 by Jelmer Vernooij
Merge trunk.
719
class IndexCacheUpdater(CacheUpdater):
720
721
    def __init__(self, cache, rev):
722
        self.cache = cache
723
        self.revid = rev.revision_id
724
        self.parent_revids = rev.parent_ids
725
        self._commit = None
726
        self._entries = []
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
727
        self._cache_objs = set()
0.254.46 by Jelmer Vernooij
Merge trunk.
728
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
729
    def add_object(self, obj, bzr_key_data, path):
0.254.46 by Jelmer Vernooij
Merge trunk.
730
        if obj.type_name == "commit":
731
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
732
            assert type(bzr_key_data) is dict
0.254.47 by Jelmer Vernooij
Merge trunk.
733
            self.cache.idmap._add_git_sha(obj.id, "commit",
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
734
                (self.revid, obj.tree, bzr_key_data))
0.254.47 by Jelmer Vernooij
Merge trunk.
735
            self.cache.idmap._add_node(("commit", self.revid, "X"),
0.254.46 by Jelmer Vernooij
Merge trunk.
736
                " ".join((obj.id, obj.tree)))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
737
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
738
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
739
            self.cache.idmap._add_git_sha(obj.id, "blob", bzr_key_data)
740
            self.cache.idmap._add_node(("blob", bzr_key_data[0],
741
                bzr_key_data[1]), obj.id)
0.254.46 by Jelmer Vernooij
Merge trunk.
742
        elif obj.type_name == "tree":
0.254.47 by Jelmer Vernooij
Merge trunk.
743
            self.cache.idmap._add_git_sha(obj.id, "tree",
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
744
                (bzr_key_data[0], self.revid))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
745
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
746
        else:
747
            raise AssertionError
748
749
    def finish(self):
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
750
        self.cache.content_cache.add_multi(self._cache_objs)
0.254.46 by Jelmer Vernooij
Merge trunk.
751
        return self._commit
752
753
754
class IndexBzrGitCache(BzrGitCache):
755
756
    def __init__(self, transport=None):
757
        mapper = versionedfile.ConstantMapper("trees")
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
758
        shamap = IndexGitShaMap(transport.clone('index'))
759
        #trees_store = knit.make_file_factory(True, mapper)(transport)
760
        #content_cache = VersionedFilesContentCache(trees_store)
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
761
        from .transportgit import TransportObjectStore
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
762
        store = TransportObjectStore(transport.clone('objects'))
763
        content_cache = GitObjectStoreContentCache(store)
764
        super(IndexBzrGitCache, self).__init__(shamap, content_cache,
0.254.47 by Jelmer Vernooij
Merge trunk.
765
                IndexCacheUpdater)
0.254.46 by Jelmer Vernooij
Merge trunk.
766
767
0.254.43 by Jelmer Vernooij
Merge trunk.
768
class IndexGitCacheFormat(BzrGitCacheFormat):
769
770
    def get_format_string(self):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
771
        return 'bzr-git sha map with git object cache version 1\n'
0.254.43 by Jelmer Vernooij
Merge trunk.
772
773
    def initialize(self, transport):
774
        super(IndexGitCacheFormat, self).initialize(transport)
775
        transport.mkdir('index')
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
776
        transport.mkdir('objects')
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
777
        from .transportgit import TransportObjectStore
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
778
        TransportObjectStore.init(transport.clone('objects'))
0.254.43 by Jelmer Vernooij
Merge trunk.
779
780
    def open(self, transport):
0.254.46 by Jelmer Vernooij
Merge trunk.
781
        return IndexBzrGitCache(transport)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
782
783
784
class IndexGitShaMap(GitShaMap):
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
785
    """SHA Map that uses the Bazaar APIs to store a cache.
786
787
    BTree Index file with the following contents:
788
789
    ("git", <sha1>) -> "<type> <type-data1> <type-data2>"
790
    ("commit", <revid>) -> "<sha1> <tree-id>"
0.254.36 by Jelmer Vernooij
Merge trunk.
791
    ("blob", <fileid>, <revid>) -> <sha1>
792
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
793
    """
794
795
    def __init__(self, transport=None):
796
        if transport is None:
0.254.43 by Jelmer Vernooij
Merge trunk.
797
            self._transport = None
0.254.36 by Jelmer Vernooij
Merge trunk.
798
            self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
0.254.2 by jelmer
use btree indexes
799
            self._builder = self._index
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
800
        else:
0.254.30 by Jelmer Vernooij
Move index to separate dir.
801
            self._builder = None
0.254.43 by Jelmer Vernooij
Merge trunk.
802
            self._transport = transport
0.254.2 by jelmer
use btree indexes
803
            self._index = _mod_index.CombinedGraphIndex([])
0.254.43 by Jelmer Vernooij
Merge trunk.
804
            for name in self._transport.list_dir("."):
0.254.2 by jelmer
use btree indexes
805
                if not name.endswith(".rix"):
806
                    continue
0.254.43 by Jelmer Vernooij
Merge trunk.
807
                x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
808
                    self._transport.stat(name).st_size)
0.254.2 by jelmer
use btree indexes
809
                self._index.insert_index(0, x)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
810
811
    @classmethod
812
    def from_repository(cls, repository):
813
        transport = getattr(repository, "_transport", None)
814
        if transport is not None:
0.254.2 by jelmer
use btree indexes
815
            try:
816
                transport.mkdir('git')
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
817
            except bzr_errors.FileExists:
0.254.2 by jelmer
use btree indexes
818
                pass
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
819
            return cls(transport.clone('git'))
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
820
        from ...transport import get_transport
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
821
        return cls(get_transport(get_cache_dir()))
822
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
823
    def __repr__(self):
824
        if self._transport is not None:
825
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
826
        else:
827
            return "%s()" % (self.__class__.__name__)
828
0.254.3 by John Arbash Meinel
Add repack function.
829
    def repack(self):
830
        assert self._builder is None
831
        self.start_write_group()
832
        for _, key, value in self._index.iter_all_entries():
833
            self._builder.add_node(key, value)
834
        to_remove = []
0.254.43 by Jelmer Vernooij
Merge trunk.
835
        for name in self._transport.list_dir('.'):
0.254.3 by John Arbash Meinel
Add repack function.
836
            if name.endswith('.rix'):
837
                to_remove.append(name)
838
        self.commit_write_group()
839
        del self._index.indices[1:]
840
        for name in to_remove:
0.254.43 by Jelmer Vernooij
Merge trunk.
841
            self._transport.rename(name, name + '.old')
0.254.3 by John Arbash Meinel
Add repack function.
842
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
843
    def start_write_group(self):
0.254.2 by jelmer
use btree indexes
844
        assert self._builder is None
0.254.36 by Jelmer Vernooij
Merge trunk.
845
        self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
846
        self._name = osutils.sha()
847
848
    def commit_write_group(self):
0.254.2 by jelmer
use btree indexes
849
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
850
        stream = self._builder.finish()
0.254.2 by jelmer
use btree indexes
851
        name = self._name.hexdigest() + ".rix"
0.254.43 by Jelmer Vernooij
Merge trunk.
852
        size = self._transport.put_file(name, stream)
853
        index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
854
        self._index.insert_index(0, index)
855
        self._builder = None
856
        self._name = None
857
858
    def abort_write_group(self):
0.254.2 by jelmer
use btree indexes
859
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
860
        self._builder = None
861
        self._name = None
862
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
863
    def _add_node(self, key, value):
864
        try:
865
            self._builder.add_node(key, value)
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
866
        except bzr_errors.BadIndexDuplicateKey:
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
867
            # Multiple bzr objects can have the same contents
868
            return True
869
        else:
870
            return False
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
871
0.254.2 by jelmer
use btree indexes
872
    def _get_entry(self, key):
873
        entries = self._index.iter_entries([key])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
874
        try:
0.254.2 by jelmer
use btree indexes
875
            return entries.next()[2]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
876
        except StopIteration:
0.254.2 by jelmer
use btree indexes
877
            if self._builder is None:
878
                raise KeyError
879
            entries = self._builder.iter_entries([key])
880
            try:
881
                return entries.next()[2]
882
            except StopIteration:
883
                raise KeyError
884
0.261.2 by Jelmer Vernooij
Fix cache tests.
885
    def _iter_entries_prefix(self, prefix):
0.254.2 by jelmer
use btree indexes
886
        for entry in self._index.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
887
            yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
888
        if self._builder is not None:
889
            for entry in self._builder.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
890
                yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
891
892
    def lookup_commit(self, revid):
0.254.36 by Jelmer Vernooij
Merge trunk.
893
        return self._get_entry(("commit", revid, "X"))[:40]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
894
0.254.33 by Jelmer Vernooij
Merge trunk.
895
    def _add_git_sha(self, hexsha, type, type_data):
0.254.2 by jelmer
use btree indexes
896
        if hexsha is not None:
897
            self._name.update(hexsha)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
898
            if type == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
899
                td = (type_data[0], type_data[1])
900
                try:
901
                    td += (type_data[2]["testament3-sha1"],)
902
                except KeyError:
903
                    pass
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
904
            else:
905
                td = type_data
906
            self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
0.254.2 by jelmer
use btree indexes
907
        else:
908
            # This object is not represented in Git - perhaps an empty
909
            # directory?
910
            self._name.update(type + " ".join(type_data))
0.254.33 by Jelmer Vernooij
Merge trunk.
911
0.254.42 by Jelmer Vernooij
Merge trunk.
912
    def lookup_blob_id(self, fileid, revision):
913
        return self._get_entry(("blob", fileid, revision))
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
914
915
    def lookup_git_sha(self, sha):
916
        if len(sha) == 20:
917
            sha = sha_to_hex(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
918
        found = False
919
        for key, value in self._iter_entries_prefix(("git", sha, None)):
920
            found = True
921
            data = value.split(" ", 3)
922
            if data[0] == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
923
                if data[3]:
924
                    verifiers = {"testament3-sha1": data[3]}
925
                else:
926
                    verifiers = {}
927
                yield ("commit", (data[1], data[2], verifiers))
0.261.2 by Jelmer Vernooij
Fix cache tests.
928
            else:
929
                yield (data[0], tuple(data[1:]))
930
        if not found:
931
            raise KeyError(sha)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
932
933
    def revids(self):
934
        """List the revision ids known."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
935
        for key, value in self._iter_entries_prefix(("commit", None, None)):
0.254.2 by jelmer
use btree indexes
936
            yield key[1]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
937
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
938
    def missing_revisions(self, revids):
939
        """Return set of all the revisions that are not present."""
940
        missing_revids = set(revids)
941
        for _, key, value in self._index.iter_entries((
0.254.37 by Jelmer Vernooij
merge trunk
942
            ("commit", revid, "X") for revid in revids)):
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
943
            missing_revids.remove(key[1])
944
        return missing_revids
945
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
946
    def sha1s(self):
947
        """List the SHA1s."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
948
        for key, value in self._iter_entries_prefix(("git", None, None)):
0.254.2 by jelmer
use btree indexes
949
            yield key[1]
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
950
951
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
952
formats = registry.Registry()
953
formats.register(TdbGitCacheFormat().get_format_string(),
954
    TdbGitCacheFormat())
955
formats.register(SqliteGitCacheFormat().get_format_string(),
956
    SqliteGitCacheFormat())
0.254.43 by Jelmer Vernooij
Merge trunk.
957
formats.register(IndexGitCacheFormat().get_format_string(),
958
    IndexGitCacheFormat())
0.200.951 by Jelmer Vernooij
merge support for git object store-based caching mechanism.
959
# In the future, this will become the default:
960
# formats.register('default', IndexGitCacheFormat())
961
try:
962
    import tdb
963
except ImportError:
964
    formats.register('default', SqliteGitCacheFormat())
965
else:
966
    formats.register('default', TdbGitCacheFormat())
967
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
968
969
970
def migrate_ancient_formats(repo_transport):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
971
    # Migrate older cache formats
972
    repo_transport = remove_readonly_transport_decorator(repo_transport)
973
    has_sqlite = repo_transport.has("git.db")
974
    has_tdb = repo_transport.has("git.tdb")
975
    if not has_sqlite or has_tdb:
976
        return
977
    try:
978
        repo_transport.mkdir("git")
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
979
    except bzr_errors.FileExists:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
980
        return
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
981
    # Prefer migrating git.db over git.tdb, since the latter may not 
982
    # be openable on some platforms.
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
983
    if has_sqlite:
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
984
        SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
985
        repo_transport.rename("git.db", "git/idmap.db")
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
986
    elif has_tdb:
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
987
        TdbGitCacheFormat().initialize(repo_transport.clone("git"))
988
        repo_transport.rename("git.tdb", "git/idmap.tdb")
989
990
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
991
def remove_readonly_transport_decorator(transport):
992
    if transport.is_readonly():
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
993
        try:
994
            return transport._decorated
995
        except AttributeError:
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
996
            raise bzr_errors.ReadOnlyError(transport)
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
997
    return transport
998
999
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
1000
def from_repository(repository):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
1001
    """Open a cache file for a repository.
1002
1003
    If the repository is remote and there is no transport available from it
1004
    this will use a local file in the users cache directory
1005
    (typically ~/.cache/bazaar/git/)
1006
1007
    :param repository: A repository object
1008
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
1009
    repo_transport = getattr(repository, "_transport", None)
1010
    if repo_transport is not None:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
1011
        try:
1012
            migrate_ancient_formats(repo_transport)
0.200.1641 by Jelmer Vernooij
Use relative imports where possible.
1013
        except bzr_errors.ReadOnlyError:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
1014
            pass # Not much we can do
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
1015
    return BzrGitCacheFormat.from_repository(repository)