/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.200.1594 by Jelmer Vernooij
Use absolute_import everywhere.
19
from __future__ import absolute_import
20
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
21
from dulwich.objects import (
22
    sha_to_hex,
23
    hex_to_sha,
24
    )
0.200.292 by Jelmer Vernooij
Fix formatting.
25
import os
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
26
import threading
0.200.292 by Jelmer Vernooij
Fix formatting.
27
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
28
from dulwich.objects import (
29
    ShaFile,
30
    )
31
0.200.228 by Jelmer Vernooij
Split out map.
32
import bzrlib
0.200.528 by Jelmer Vernooij
Fix import.
33
from bzrlib import (
0.254.2 by jelmer
use btree indexes
34
    btree_index as _mod_btree_index,
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
35
    index as _mod_index,
36
    osutils,
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
37
    registry,
0.200.528 by Jelmer Vernooij
Fix import.
38
    trace,
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
39
    versionedfile,
0.200.528 by Jelmer Vernooij
Fix import.
40
    )
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
41
from bzrlib.transport import (
42
    get_transport,
43
    )
0.200.230 by Jelmer Vernooij
Implement sha cache.
44
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
45
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
46
def get_cache_dir():
47
    try:
48
        from xdg.BaseDirectory import xdg_cache_home
49
    except ImportError:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
50
        from bzrlib.config import config_dir
51
        ret = os.path.join(config_dir(), "git")
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
52
    else:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
53
        ret = os.path.join(xdg_cache_home, "bazaar", "git")
54
    if not os.path.isdir(ret):
55
        os.makedirs(ret)
56
    return ret
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
57
58
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
59
def get_remote_cache_transport(repository):
0.200.1027 by Jelmer Vernooij
mark remote git directories as not supporting working trees.
60
    """Retrieve the transport to use when accessing (unwritable) remote 
61
    repositories.
62
    """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
63
    uuid = getattr(repository, "uuid", None)
64
    if uuid is None:
65
        path = get_cache_dir()
66
    else:
67
        path = os.path.join(get_cache_dir(), uuid)
68
        if not os.path.isdir(path):
69
            os.mkdir(path)
70
    return get_transport(path)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
71
72
0.200.228 by Jelmer Vernooij
Split out map.
73
def check_pysqlite_version(sqlite3):
74
    """Check that sqlite library is compatible.
75
76
    """
0.200.675 by Jelmer Vernooij
Fix formatting.
77
    if (sqlite3.sqlite_version_info[0] < 3 or
78
            (sqlite3.sqlite_version_info[0] == 3 and
0.200.228 by Jelmer Vernooij
Split out map.
79
             sqlite3.sqlite_version_info[1] < 3)):
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
80
        trace.warning('Needs at least sqlite 3.3.x')
0.200.228 by Jelmer Vernooij
Split out map.
81
        raise bzrlib.errors.BzrError("incompatible sqlite library")
82
83
try:
84
    try:
85
        import sqlite3
86
        check_pysqlite_version(sqlite3)
0.200.675 by Jelmer Vernooij
Fix formatting.
87
    except (ImportError, bzrlib.errors.BzrError), e:
0.200.228 by Jelmer Vernooij
Split out map.
88
        from pysqlite2 import dbapi2 as sqlite3
89
        check_pysqlite_version(sqlite3)
90
except:
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
91
    trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
0.200.228 by Jelmer Vernooij
Split out map.
92
            'module')
93
    raise bzrlib.errors.BzrError("missing sqlite library")
94
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
95
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
96
_mapdbs = threading.local()
97
def mapdbs():
98
    """Get a cache for this thread's db connections."""
99
    try:
100
        return _mapdbs.cache
101
    except AttributeError:
102
        _mapdbs.cache = {}
103
        return _mapdbs.cache
104
105
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
106
class GitShaMap(object):
107
    """Git<->Bzr revision id mapping database."""
108
109
    def lookup_git_sha(self, sha):
110
        """Lookup a Git sha in the database.
111
        :param sha: Git object sha
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
112
        :return: list with (type, type_data) tuples with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
113
            commit: revid, tree_sha, verifiers
114
            blob: fileid, revid
115
            tree: fileid, revid
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
116
        """
117
        raise NotImplementedError(self.lookup_git_sha)
118
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
119
    def lookup_blob_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
120
        """Retrieve a Git blob SHA by file id.
121
122
        :param file_id: File id of the file/symlink
0.200.806 by Jelmer Vernooij
Make revision_hint mandatory.
123
        :param revision: revision in which the file was last changed.
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
124
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
125
        raise NotImplementedError(self.lookup_blob_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
126
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
127
    def lookup_tree_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
128
        """Retrieve a Git tree SHA by file id.
129
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
130
        raise NotImplementedError(self.lookup_tree_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
131
0.200.1039 by Jelmer Vernooij
Add stub.
132
    def lookup_commit(self, revid):
133
        """Retrieve a Git commit SHA by Bazaar revision id.
134
        """
135
        raise NotImplementedError(self.lookup_commit)
136
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
137
    def revids(self):
138
        """List the revision ids known."""
139
        raise NotImplementedError(self.revids)
140
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
141
    def missing_revisions(self, revids):
142
        """Return set of all the revisions that are not present."""
143
        present_revids = set(self.revids())
144
        if not isinstance(revids, set):
145
            revids = set(revids)
146
        return revids - present_revids
147
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
148
    def sha1s(self):
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
149
        """List the SHA1s."""
150
        raise NotImplementedError(self.sha1s)
151
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
152
    def start_write_group(self):
153
        """Start writing changes."""
154
155
    def commit_write_group(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
156
        """Commit any pending changes."""
157
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
158
    def abort_write_group(self):
159
        """Abort any pending changes."""
160
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
161
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
162
class ContentCache(object):
163
    """Object that can cache Git objects."""
164
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
165
    def add(self, object):
166
        """Add an object."""
167
        raise NotImplementedError(self.add)
168
169
    def add_multi(self, objects):
170
        """Add multiple objects."""
171
        for obj in objects:
172
            self.add(obj)
173
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
174
    def __getitem__(self, sha):
175
        """Retrieve an item, by SHA."""
176
        raise NotImplementedError(self.__getitem__)
177
178
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
179
class BzrGitCacheFormat(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
180
    """Bazaar-Git Cache Format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
181
182
    def get_format_string(self):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
183
        """Return a single-line unique format string for this cache format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
184
        raise NotImplementedError(self.get_format_string)
185
186
    def open(self, transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
187
        """Open this format on a transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
188
        raise NotImplementedError(self.open)
189
190
    def initialize(self, transport):
0.254.51 by Jelmer Vernooij
Add some docstrings.
191
        """Create a new instance of this cache format at transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
192
        transport.put_bytes('format', self.get_format_string())
193
194
    @classmethod
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
195
    def from_transport(self, transport):
196
        """Open a cache file present on a transport, or initialize one.
197
198
        :param transport: Transport to use
199
        :return: A BzrGitCache instance
200
        """
201
        try:
202
            format_name = transport.get_bytes('format')
203
            format = formats.get(format_name)
204
        except bzrlib.errors.NoSuchFile:
205
            format = formats.get('default')
206
            format.initialize(transport)
207
        return format.open(transport)
208
209
    @classmethod
210
    def from_repository(cls, repository):
211
        """Open a cache file for a repository.
212
213
        This will use the repository's transport to store the cache file, or
214
        use the users global cache directory if the repository has no 
215
        transport associated with it.
216
217
        :param repository: Repository to open the cache for
218
        :return: A `BzrGitCache`
219
        """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
220
        from bzrlib.transport.local import LocalTransport
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
221
        repo_transport = getattr(repository, "_transport", None)
0.200.1414 by Jelmer Vernooij
Fix pulling into bound branches.
222
        if (repo_transport is not None and
223
            isinstance(repo_transport, LocalTransport)):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
224
            # Even if we don't write to this repo, we should be able
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
225
            # to update its cache.
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
226
            try:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
227
                repo_transport = remove_readonly_transport_decorator(repo_transport)
228
            except bzrlib.errors.ReadOnlyError:
229
                transport = None
230
            else:
231
                try:
232
                    repo_transport.mkdir('git')
233
                except bzrlib.errors.FileExists:
234
                    pass
235
                transport = repo_transport.clone('git')
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
236
        else:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
237
            transport = None
238
        if transport is None:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
239
            transport = get_remote_cache_transport(repository)
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
240
        return cls.from_transport(transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
241
242
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
243
class CacheUpdater(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
244
    """Base class for objects that can update a bzr-git cache."""
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
245
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
246
    def add_object(self, obj, bzr_key_data, path):
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
247
        """Add an object.
248
249
        :param obj: Object type ("commit", "blob" or "tree")
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
250
        :param bzr_key_data: bzr key store data or testament_sha in case
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
251
            of commit
252
        :param path: Path of the object (optional)
253
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
254
        raise NotImplementedError(self.add_object)
255
256
    def finish(self):
257
        raise NotImplementedError(self.finish)
258
259
260
class BzrGitCache(object):
261
    """Caching backend."""
262
263
    def __init__(self, idmap, content_cache, cache_updater_klass):
264
        self.idmap = idmap
265
        self.content_cache = content_cache
266
        self._cache_updater_klass = cache_updater_klass
267
268
    def get_updater(self, rev):
0.254.51 by Jelmer Vernooij
Add some docstrings.
269
        """Update an object that implements the CacheUpdater interface for 
270
        updating this cache.
271
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
272
        return self._cache_updater_klass(self, rev)
273
274
275
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
276
277
278
class DictCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
279
    """Cache updater for dict-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
280
281
    def __init__(self, cache, rev):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
282
        self.cache = cache
283
        self.revid = rev.revision_id
284
        self.parent_revids = rev.parent_ids
285
        self._commit = None
286
        self._entries = []
287
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
288
    def add_object(self, obj, bzr_key_data, path):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
289
        if obj.type_name == "commit":
290
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
291
            assert type(bzr_key_data) is dict
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
292
            key = self.revid
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
293
            type_data = (self.revid, self._commit.tree, bzr_key_data)
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
294
            self.cache.idmap._by_revid[self.revid] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
295
        elif obj.type_name in ("blob", "tree"):
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
296
            if bzr_key_data is not None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
297
                if obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
298
                    revision = bzr_key_data[1]
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
299
                else:
300
                    revision = self.revid
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
301
                key = type_data = (bzr_key_data[0], revision)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
302
                self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
303
        else:
304
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
305
        entry = (obj.type_name, type_data)
306
        self.cache.idmap._by_sha.setdefault(obj.id, {})[key] = entry
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
307
308
    def finish(self):
309
        if self._commit is None:
310
            raise AssertionError("No commit object added")
311
        return self._commit
312
313
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
314
class DictGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
315
    """Git SHA map that uses a dictionary."""
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
316
317
    def __init__(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
318
        self._by_sha = {}
319
        self._by_fileid = {}
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
320
        self._by_revid = {}
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
321
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
322
    def lookup_blob_id(self, fileid, revision):
323
        return self._by_fileid[revision][fileid]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
324
325
    def lookup_git_sha(self, sha):
0.261.2 by Jelmer Vernooij
Fix cache tests.
326
        for entry in self._by_sha[sha].itervalues():
327
            yield entry
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
328
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
329
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
330
        return self._by_fileid[revision][fileid]
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
331
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
332
    def lookup_commit(self, revid):
333
        return self._by_revid[revid]
334
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
335
    def revids(self):
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
336
        for key, entries in self._by_sha.iteritems():
337
            for (type, type_data) in entries.values():
338
                if type == "commit":
339
                    yield type_data[0]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
340
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
341
    def sha1s(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
342
        return self._by_sha.iterkeys()
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
343
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
344
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
345
class SqliteCacheUpdater(CacheUpdater):
346
347
    def __init__(self, cache, rev):
348
        self.cache = cache
0.200.850 by Jelmer Vernooij
Fix tests.
349
        self.db = self.cache.idmap.db
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
350
        self.revid = rev.revision_id
351
        self._commit = None
352
        self._trees = []
353
        self._blobs = []
354
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
355
    def add_object(self, obj, bzr_key_data, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
356
        if obj.type_name == "commit":
357
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
358
            assert type(bzr_key_data) is dict
359
            self._testament3_sha1 = bzr_key_data.get("testament3-sha1")
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
360
        elif obj.type_name == "tree":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
361
            if bzr_key_data is not None:
362
                self._trees.append((obj.id, bzr_key_data[0], self.revid))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
363
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
364
            if bzr_key_data is not None:
365
                self._blobs.append((obj.id, bzr_key_data[0], bzr_key_data[1]))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
366
        else:
367
            raise AssertionError
368
369
    def finish(self):
370
        if self._commit is None:
371
            raise AssertionError("No commit object added")
0.200.850 by Jelmer Vernooij
Fix tests.
372
        self.db.executemany(
373
            "replace into trees (sha1, fileid, revid) values (?, ?, ?)",
374
            self._trees)
375
        self.db.executemany(
376
            "replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
377
            self._blobs)
378
        self.db.execute(
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
379
            "replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
380
            (self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
381
        return self._commit
382
383
384
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
385
386
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
387
class SqliteGitCacheFormat(BzrGitCacheFormat):
388
389
    def get_format_string(self):
390
        return 'bzr-git sha map version 1 using sqlite\n'
391
392
    def open(self, transport):
393
        try:
394
            basepath = transport.local_abspath(".")
395
        except bzrlib.errors.NotLocalUrl:
396
            basepath = get_cache_dir()
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
397
        return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
398
399
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
400
class SqliteGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
401
    """Bazaar GIT Sha map that uses a sqlite database for storage."""
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
402
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
403
    def __init__(self, path=None):
404
        self.path = path
405
        if path is None:
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
406
            self.db = sqlite3.connect(":memory:")
407
        else:
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
408
            if not mapdbs().has_key(path):
409
                mapdbs()[path] = sqlite3.connect(path)
0.200.675 by Jelmer Vernooij
Fix formatting.
410
            self.db = mapdbs()[path]
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
411
        self.db.text_factory = str
0.200.230 by Jelmer Vernooij
Implement sha cache.
412
        self.db.executescript("""
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
413
        create table if not exists commits(
414
            sha1 text not null check(length(sha1) == 40),
415
            revid text not null,
416
            tree_sha text not null check(length(tree_sha) == 40)
417
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
418
        create index if not exists commit_sha1 on commits(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
419
        create unique index if not exists commit_revid on commits(revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
420
        create table if not exists blobs(
421
            sha1 text not null check(length(sha1) == 40),
422
            fileid text not null,
423
            revid text not null
424
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
425
        create index if not exists blobs_sha1 on blobs(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
426
        create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
427
        create table if not exists trees(
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
428
            sha1 text unique not null check(length(sha1) == 40),
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
429
            fileid text not null,
430
            revid text not null
431
        );
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
432
        create unique index if not exists trees_sha1 on trees(sha1);
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
433
        create unique index if not exists trees_fileid_revid on trees(fileid, revid);
0.200.230 by Jelmer Vernooij
Implement sha cache.
434
""")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
435
        try:
436
            self.db.executescript(
437
                "ALTER TABLE commits ADD testament3_sha1 TEXT;")
438
        except sqlite3.OperationalError:
439
            pass # Column already exists.
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
440
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
441
    def __repr__(self):
442
        return "%s(%r)" % (self.__class__.__name__, self.path)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
443
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
444
    def lookup_commit(self, revid):
0.254.51 by Jelmer Vernooij
Add some docstrings.
445
        cursor = self.db.execute("select sha1 from commits where revid = ?", 
446
            (revid,))
447
        row = cursor.fetchone()
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
448
        if row is not None:
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
449
            return row[0]
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
450
        raise KeyError
0.200.231 by Jelmer Vernooij
Partially fix pull.
451
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
452
    def commit_write_group(self):
0.200.232 by Jelmer Vernooij
Fix pull from remote branches.
453
        self.db.commit()
454
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
455
    def lookup_blob_id(self, fileid, revision):
456
        row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
457
        if row is not None:
458
            return row[0]
459
        raise KeyError(fileid)
460
461
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
462
        row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
463
        if row is not None:
464
            return row[0]
465
        raise KeyError(fileid)
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
466
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
467
    def lookup_git_sha(self, sha):
468
        """Lookup a Git sha in the database.
469
470
        :param sha: Git object sha
471
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
472
            commit: revid, tree sha, verifiers
473
            tree: fileid, revid
474
            blob: fileid, revid
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
475
        """
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
476
        found = False
477
        cursor = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,))
478
        for row in cursor.fetchall():
479
            found = True
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
480
            if row[2] is not None:
481
                verifiers = {"testament3-sha1": row[2]}
482
            else:
483
                verifiers = {}
484
            yield ("commit", (row[0], row[1], verifiers))
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
485
        cursor = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,))
486
        for row in cursor.fetchall():
487
            found = True
488
            yield ("blob", row)
489
        cursor = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,))
490
        for row in cursor.fetchall():
491
            found = True
492
            yield ("tree", row)
493
        if not found:
494
            raise KeyError(sha)
0.200.230 by Jelmer Vernooij
Implement sha cache.
495
496
    def revids(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
497
        """List the revision ids known."""
0.248.7 by Jelmer Vernooij
Avoid fetching all sha1s at once.
498
        return (row for (row,) in self.db.execute("select revid from commits"))
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
499
500
    def sha1s(self):
501
        """List the SHA1s."""
502
        for table in ("blobs", "commits", "trees"):
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
503
            for (sha,) in self.db.execute("select sha1 from %s" % table):
504
                yield sha
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
505
506
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
507
class TdbCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
508
    """Cache updater for tdb-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
509
510
    def __init__(self, cache, rev):
511
        self.cache = cache
512
        self.db = cache.idmap.db
513
        self.revid = rev.revision_id
514
        self.parent_revids = rev.parent_ids
515
        self._commit = None
516
        self._entries = []
517
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
518
    def add_object(self, obj, bzr_key_data, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
519
        sha = obj.sha().digest()
520
        if obj.type_name == "commit":
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
521
            self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
522
            assert type(bzr_key_data) is dict, "was %r" % bzr_key_data
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
523
            type_data = (self.revid, obj.tree)
524
            try:
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
525
                type_data += (bzr_key_data["testament3-sha1"],)
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
526
            except KeyError:
527
                pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
528
            self._commit = obj
529
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
530
            if bzr_key_data is None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
531
                return
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
532
            self.db["\0".join(("blob", bzr_key_data[0], bzr_key_data[1]))] = sha
533
            type_data = bzr_key_data
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
534
        elif obj.type_name == "tree":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
535
            if bzr_key_data is None:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
536
                return
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
537
            (file_id, ) = bzr_key_data
538
            type_data = (file_id, self.revid)
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
539
        else:
540
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
541
        entry = "\0".join((obj.type_name, ) + type_data) + "\n"
542
        key = "git\0" + sha
543
        try:
544
            oldval = self.db[key]
545
        except KeyError:
546
            self.db[key] = entry
547
        else:
0.261.3 by Jelmer Vernooij
Fix more tests.
548
            if oldval[-1] != "\n":
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
549
                self.db[key] = "".join([oldval, "\n", entry])
550
            else:
551
                self.db[key] = "".join([oldval, entry])
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
552
553
    def finish(self):
554
        if self._commit is None:
555
            raise AssertionError("No commit object added")
556
        return self._commit
557
558
559
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
0.200.479 by Jelmer Vernooij
Version tdb sha map.
560
0.200.1140 by Jelmer Vernooij
Update now that the control dir formats are no longer in __init__.
561
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
562
class TdbGitCacheFormat(BzrGitCacheFormat):
0.254.51 by Jelmer Vernooij
Add some docstrings.
563
    """Cache format for tdb-based caches."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
564
565
    def get_format_string(self):
566
        return 'bzr-git sha map version 3 using tdb\n'
567
568
    def open(self, transport):
569
        try:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
570
            basepath = transport.local_abspath(".").encode(osutils._fs_enc)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
571
        except bzrlib.errors.NotLocalUrl:
572
            basepath = get_cache_dir()
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
573
        assert isinstance(basepath, str)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
574
        try:
0.200.850 by Jelmer Vernooij
Fix tests.
575
            return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
576
        except ImportError:
577
            raise ImportError(
578
                "Unable to open existing bzr-git cache because 'tdb' is not "
579
                "installed.")
580
581
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
582
class TdbGitShaMap(GitShaMap):
583
    """SHA Map that uses a TDB database.
584
585
    Entries:
586
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
587
    "git <sha1>" -> "<type> <type-data1> <type-data2>"
588
    "commit revid" -> "<sha1> <tree-id>"
0.200.477 by Jelmer Vernooij
More tests for sha maps, fix cache misses in tdb.
589
    "tree fileid revid" -> "<sha1>"
590
    "blob fileid revid" -> "<sha1>"
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
591
    """
592
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
593
    TDB_MAP_VERSION = 3
594
    TDB_HASH_SIZE = 50000
595
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
596
    def __init__(self, path=None):
597
        import tdb
598
        self.path = path
599
        if path is None:
600
            self.db = {}
601
        else:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
602
            assert isinstance(path, str)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
603
            if not mapdbs().has_key(path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
604
                mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
605
                                          os.O_RDWR|os.O_CREAT)
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
606
            self.db = mapdbs()[path]
607
        try:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
608
            if int(self.db["version"]) not in (2, 3):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
609
                trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
610
                              self.db["version"], self.TDB_MAP_VERSION)
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
611
                self.db.clear()
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
612
        except KeyError:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
613
            pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
614
        self.db["version"] = str(self.TDB_MAP_VERSION)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
615
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
616
    def start_write_group(self):
617
        """Start writing changes."""
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
618
        self.db.transaction_start()
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
619
620
    def commit_write_group(self):
621
        """Commit any pending changes."""
622
        self.db.transaction_commit()
623
624
    def abort_write_group(self):
625
        """Abort any pending changes."""
626
        self.db.transaction_cancel()
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
627
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
628
    def __repr__(self):
629
        return "%s(%r)" % (self.__class__.__name__, self.path)
630
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
631
    def lookup_commit(self, revid):
0.200.1264 by Jelmer Vernooij
Fix updating cache for single revision - don't consider it an update of the full cache.
632
        try:
633
            return sha_to_hex(self.db["commit\0" + revid][:20])
634
        except KeyError:
635
            raise KeyError("No cache entry for %r" % revid)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
636
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
637
    def lookup_blob_id(self, fileid, revision):
638
        return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
639
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
640
    def lookup_git_sha(self, sha):
641
        """Lookup a Git sha in the database.
642
643
        :param sha: Git object sha
644
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
645
            commit: revid, tree sha
646
            blob: fileid, revid
647
            tree: fileid, revid
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
648
        """
0.200.564 by Jelmer Vernooij
Accept 'binary' shas.
649
        if len(sha) == 40:
650
            sha = hex_to_sha(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
651
        value = self.db["git\0" + sha]
652
        for data in value.splitlines():
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
653
            data = data.split("\0")
654
            if data[0] == "commit":
655
                if len(data) == 3:
656
                    yield (data[0], (data[1], data[2], {}))
657
                else:
658
                    yield (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
0.261.2 by Jelmer Vernooij
Fix cache tests.
659
            elif data[0] in ("tree", "blob"):
660
                yield (data[0], tuple(data[1:]))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
661
            else:
0.261.2 by Jelmer Vernooij
Fix cache tests.
662
                raise AssertionError("unknown type %r" % data[0])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
663
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
664
    def missing_revisions(self, revids):
665
        ret = set()
666
        for revid in revids:
667
            if self.db.get("commit\0" + revid) is None:
668
                ret.add(revid)
669
        return ret
670
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
671
    def revids(self):
672
        """List the revision ids known."""
673
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
674
            if key.startswith("commit\0"):
675
                yield key[7:]
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
676
677
    def sha1s(self):
678
        """List the SHA1s."""
679
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
680
            if key.startswith("git\0"):
681
                yield sha_to_hex(key[4:])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
682
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
683
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
684
class VersionedFilesContentCache(ContentCache):
685
686
    def __init__(self, vf):
687
        self._vf = vf
688
689
    def add(self, obj):
690
        self._vf.insert_record_stream(
691
            [versionedfile.ChunkedContentFactory((obj.id,), [], None,
692
                obj.as_legacy_object_chunks())])
693
694
    def __getitem__(self, sha):
695
        stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
696
        entry = stream.next() 
697
        if entry.storage_kind == 'absent':
698
            raise KeyError(sha)
699
        return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
700
701
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
702
class GitObjectStoreContentCache(ContentCache):
703
704
    def __init__(self, store):
705
        self.store = store
706
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
707
    def add_multi(self, objs):
708
        self.store.add_objects(objs)
709
710
    def add(self, obj, path):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
711
        self.store.add_object(obj)
712
713
    def __getitem__(self, sha):
714
        return self.store[sha]
715
716
0.254.46 by Jelmer Vernooij
Merge trunk.
717
class IndexCacheUpdater(CacheUpdater):
718
719
    def __init__(self, cache, rev):
720
        self.cache = cache
721
        self.revid = rev.revision_id
722
        self.parent_revids = rev.parent_ids
723
        self._commit = None
724
        self._entries = []
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
725
        self._cache_objs = set()
0.254.46 by Jelmer Vernooij
Merge trunk.
726
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
727
    def add_object(self, obj, bzr_key_data, path):
0.254.46 by Jelmer Vernooij
Merge trunk.
728
        if obj.type_name == "commit":
729
            self._commit = obj
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
730
            assert type(bzr_key_data) is dict
0.254.47 by Jelmer Vernooij
Merge trunk.
731
            self.cache.idmap._add_git_sha(obj.id, "commit",
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
732
                (self.revid, obj.tree, bzr_key_data))
0.254.47 by Jelmer Vernooij
Merge trunk.
733
            self.cache.idmap._add_node(("commit", self.revid, "X"),
0.254.46 by Jelmer Vernooij
Merge trunk.
734
                " ".join((obj.id, obj.tree)))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
735
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
736
        elif obj.type_name == "blob":
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
737
            self.cache.idmap._add_git_sha(obj.id, "blob", bzr_key_data)
738
            self.cache.idmap._add_node(("blob", bzr_key_data[0],
739
                bzr_key_data[1]), obj.id)
0.254.46 by Jelmer Vernooij
Merge trunk.
740
        elif obj.type_name == "tree":
0.254.47 by Jelmer Vernooij
Merge trunk.
741
            self.cache.idmap._add_git_sha(obj.id, "tree",
0.275.2 by Jelmer Vernooij
Pass tuples around for cache entries, rather than inventory entries.
742
                (bzr_key_data[0], self.revid))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
743
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
744
        else:
745
            raise AssertionError
746
747
    def finish(self):
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
748
        self.cache.content_cache.add_multi(self._cache_objs)
0.254.46 by Jelmer Vernooij
Merge trunk.
749
        return self._commit
750
751
752
class IndexBzrGitCache(BzrGitCache):
753
754
    def __init__(self, transport=None):
755
        mapper = versionedfile.ConstantMapper("trees")
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
756
        shamap = IndexGitShaMap(transport.clone('index'))
757
        #trees_store = knit.make_file_factory(True, mapper)(transport)
758
        #content_cache = VersionedFilesContentCache(trees_store)
759
        from bzrlib.plugins.git.transportgit import TransportObjectStore
760
        store = TransportObjectStore(transport.clone('objects'))
761
        content_cache = GitObjectStoreContentCache(store)
762
        super(IndexBzrGitCache, self).__init__(shamap, content_cache,
0.254.47 by Jelmer Vernooij
Merge trunk.
763
                IndexCacheUpdater)
0.254.46 by Jelmer Vernooij
Merge trunk.
764
765
0.254.43 by Jelmer Vernooij
Merge trunk.
766
class IndexGitCacheFormat(BzrGitCacheFormat):
767
768
    def get_format_string(self):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
769
        return 'bzr-git sha map with git object cache version 1\n'
0.254.43 by Jelmer Vernooij
Merge trunk.
770
771
    def initialize(self, transport):
772
        super(IndexGitCacheFormat, self).initialize(transport)
773
        transport.mkdir('index')
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
774
        transport.mkdir('objects')
775
        from bzrlib.plugins.git.transportgit import TransportObjectStore
776
        TransportObjectStore.init(transport.clone('objects'))
0.254.43 by Jelmer Vernooij
Merge trunk.
777
778
    def open(self, transport):
0.254.46 by Jelmer Vernooij
Merge trunk.
779
        return IndexBzrGitCache(transport)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
780
781
782
class IndexGitShaMap(GitShaMap):
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
783
    """SHA Map that uses the Bazaar APIs to store a cache.
784
785
    BTree Index file with the following contents:
786
787
    ("git", <sha1>) -> "<type> <type-data1> <type-data2>"
788
    ("commit", <revid>) -> "<sha1> <tree-id>"
0.254.36 by Jelmer Vernooij
Merge trunk.
789
    ("blob", <fileid>, <revid>) -> <sha1>
790
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
791
    """
792
793
    def __init__(self, transport=None):
794
        if transport is None:
0.254.43 by Jelmer Vernooij
Merge trunk.
795
            self._transport = None
0.254.36 by Jelmer Vernooij
Merge trunk.
796
            self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
0.254.2 by jelmer
use btree indexes
797
            self._builder = self._index
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
798
        else:
0.254.30 by Jelmer Vernooij
Move index to separate dir.
799
            self._builder = None
0.254.43 by Jelmer Vernooij
Merge trunk.
800
            self._transport = transport
0.254.2 by jelmer
use btree indexes
801
            self._index = _mod_index.CombinedGraphIndex([])
0.254.43 by Jelmer Vernooij
Merge trunk.
802
            for name in self._transport.list_dir("."):
0.254.2 by jelmer
use btree indexes
803
                if not name.endswith(".rix"):
804
                    continue
0.254.43 by Jelmer Vernooij
Merge trunk.
805
                x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
806
                    self._transport.stat(name).st_size)
0.254.2 by jelmer
use btree indexes
807
                self._index.insert_index(0, x)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
808
809
    @classmethod
810
    def from_repository(cls, repository):
811
        transport = getattr(repository, "_transport", None)
812
        if transport is not None:
0.254.2 by jelmer
use btree indexes
813
            try:
814
                transport.mkdir('git')
815
            except bzrlib.errors.FileExists:
816
                pass
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
817
            return cls(transport.clone('git'))
818
        from bzrlib.transport import get_transport
819
        return cls(get_transport(get_cache_dir()))
820
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
821
    def __repr__(self):
822
        if self._transport is not None:
823
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
824
        else:
825
            return "%s()" % (self.__class__.__name__)
826
0.254.3 by John Arbash Meinel
Add repack function.
827
    def repack(self):
828
        assert self._builder is None
829
        self.start_write_group()
830
        for _, key, value in self._index.iter_all_entries():
831
            self._builder.add_node(key, value)
832
        to_remove = []
0.254.43 by Jelmer Vernooij
Merge trunk.
833
        for name in self._transport.list_dir('.'):
0.254.3 by John Arbash Meinel
Add repack function.
834
            if name.endswith('.rix'):
835
                to_remove.append(name)
836
        self.commit_write_group()
837
        del self._index.indices[1:]
838
        for name in to_remove:
0.254.43 by Jelmer Vernooij
Merge trunk.
839
            self._transport.rename(name, name + '.old')
0.254.3 by John Arbash Meinel
Add repack function.
840
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
841
    def start_write_group(self):
0.254.2 by jelmer
use btree indexes
842
        assert self._builder is None
0.254.36 by Jelmer Vernooij
Merge trunk.
843
        self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
844
        self._name = osutils.sha()
845
846
    def commit_write_group(self):
0.254.2 by jelmer
use btree indexes
847
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
848
        stream = self._builder.finish()
0.254.2 by jelmer
use btree indexes
849
        name = self._name.hexdigest() + ".rix"
0.254.43 by Jelmer Vernooij
Merge trunk.
850
        size = self._transport.put_file(name, stream)
851
        index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
852
        self._index.insert_index(0, index)
853
        self._builder = None
854
        self._name = None
855
856
    def abort_write_group(self):
0.254.2 by jelmer
use btree indexes
857
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
858
        self._builder = None
859
        self._name = None
860
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
861
    def _add_node(self, key, value):
862
        try:
863
            self._builder.add_node(key, value)
864
        except bzrlib.errors.BadIndexDuplicateKey:
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
865
            # Multiple bzr objects can have the same contents
866
            return True
867
        else:
868
            return False
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
869
0.254.2 by jelmer
use btree indexes
870
    def _get_entry(self, key):
871
        entries = self._index.iter_entries([key])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
872
        try:
0.254.2 by jelmer
use btree indexes
873
            return entries.next()[2]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
874
        except StopIteration:
0.254.2 by jelmer
use btree indexes
875
            if self._builder is None:
876
                raise KeyError
877
            entries = self._builder.iter_entries([key])
878
            try:
879
                return entries.next()[2]
880
            except StopIteration:
881
                raise KeyError
882
0.261.2 by Jelmer Vernooij
Fix cache tests.
883
    def _iter_entries_prefix(self, prefix):
0.254.2 by jelmer
use btree indexes
884
        for entry in self._index.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
885
            yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
886
        if self._builder is not None:
887
            for entry in self._builder.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
888
                yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
889
890
    def lookup_commit(self, revid):
0.254.36 by Jelmer Vernooij
Merge trunk.
891
        return self._get_entry(("commit", revid, "X"))[:40]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
892
0.254.33 by Jelmer Vernooij
Merge trunk.
893
    def _add_git_sha(self, hexsha, type, type_data):
0.254.2 by jelmer
use btree indexes
894
        if hexsha is not None:
895
            self._name.update(hexsha)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
896
            if type == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
897
                td = (type_data[0], type_data[1])
898
                try:
899
                    td += (type_data[2]["testament3-sha1"],)
900
                except KeyError:
901
                    pass
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
902
            else:
903
                td = type_data
904
            self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
0.254.2 by jelmer
use btree indexes
905
        else:
906
            # This object is not represented in Git - perhaps an empty
907
            # directory?
908
            self._name.update(type + " ".join(type_data))
0.254.33 by Jelmer Vernooij
Merge trunk.
909
0.254.42 by Jelmer Vernooij
Merge trunk.
910
    def lookup_blob_id(self, fileid, revision):
911
        return self._get_entry(("blob", fileid, revision))
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
912
913
    def lookup_git_sha(self, sha):
914
        if len(sha) == 20:
915
            sha = sha_to_hex(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
916
        found = False
917
        for key, value in self._iter_entries_prefix(("git", sha, None)):
918
            found = True
919
            data = value.split(" ", 3)
920
            if data[0] == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
921
                if data[3]:
922
                    verifiers = {"testament3-sha1": data[3]}
923
                else:
924
                    verifiers = {}
925
                yield ("commit", (data[1], data[2], verifiers))
0.261.2 by Jelmer Vernooij
Fix cache tests.
926
            else:
927
                yield (data[0], tuple(data[1:]))
928
        if not found:
929
            raise KeyError(sha)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
930
931
    def revids(self):
932
        """List the revision ids known."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
933
        for key, value in self._iter_entries_prefix(("commit", None, None)):
0.254.2 by jelmer
use btree indexes
934
            yield key[1]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
935
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
936
    def missing_revisions(self, revids):
937
        """Return set of all the revisions that are not present."""
938
        missing_revids = set(revids)
939
        for _, key, value in self._index.iter_entries((
0.254.37 by Jelmer Vernooij
merge trunk
940
            ("commit", revid, "X") for revid in revids)):
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
941
            missing_revids.remove(key[1])
942
        return missing_revids
943
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
944
    def sha1s(self):
945
        """List the SHA1s."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
946
        for key, value in self._iter_entries_prefix(("git", None, None)):
0.254.2 by jelmer
use btree indexes
947
            yield key[1]
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
948
949
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
950
formats = registry.Registry()
951
formats.register(TdbGitCacheFormat().get_format_string(),
952
    TdbGitCacheFormat())
953
formats.register(SqliteGitCacheFormat().get_format_string(),
954
    SqliteGitCacheFormat())
0.254.43 by Jelmer Vernooij
Merge trunk.
955
formats.register(IndexGitCacheFormat().get_format_string(),
956
    IndexGitCacheFormat())
0.200.951 by Jelmer Vernooij
merge support for git object store-based caching mechanism.
957
# In the future, this will become the default:
958
# formats.register('default', IndexGitCacheFormat())
959
try:
960
    import tdb
961
except ImportError:
962
    formats.register('default', SqliteGitCacheFormat())
963
else:
964
    formats.register('default', TdbGitCacheFormat())
965
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
966
967
968
def migrate_ancient_formats(repo_transport):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
969
    # Migrate older cache formats
970
    repo_transport = remove_readonly_transport_decorator(repo_transport)
971
    has_sqlite = repo_transport.has("git.db")
972
    has_tdb = repo_transport.has("git.tdb")
973
    if not has_sqlite or has_tdb:
974
        return
975
    try:
976
        repo_transport.mkdir("git")
977
    except bzrlib.errors.FileExists:
978
        return
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
979
    # Prefer migrating git.db over git.tdb, since the latter may not 
980
    # be openable on some platforms.
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
981
    if has_sqlite:
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
982
        SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
983
        repo_transport.rename("git.db", "git/idmap.db")
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
984
    elif has_tdb:
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
985
        TdbGitCacheFormat().initialize(repo_transport.clone("git"))
986
        repo_transport.rename("git.tdb", "git/idmap.tdb")
987
988
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
989
def remove_readonly_transport_decorator(transport):
990
    if transport.is_readonly():
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
991
        try:
992
            return transport._decorated
993
        except AttributeError:
994
            raise bzrlib.errors.ReadOnlyError(transport)
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
995
    return transport
996
997
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
998
def from_repository(repository):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
999
    """Open a cache file for a repository.
1000
1001
    If the repository is remote and there is no transport available from it
1002
    this will use a local file in the users cache directory
1003
    (typically ~/.cache/bazaar/git/)
1004
1005
    :param repository: A repository object
1006
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
1007
    repo_transport = getattr(repository, "_transport", None)
1008
    if repo_transport is not None:
0.200.1438 by Jelmer Vernooij
Cope with remote branches not being readonly at all better.
1009
        try:
1010
            migrate_ancient_formats(repo_transport)
1011
        except bzrlib.errors.ReadOnlyError:
1012
            pass # Not much we can do
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
1013
    return BzrGitCacheFormat.from_repository(repository)