/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
19
from dulwich.objects import (
20
    sha_to_hex,
21
    hex_to_sha,
22
    )
0.200.292 by Jelmer Vernooij
Fix formatting.
23
import os
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
24
import threading
0.200.292 by Jelmer Vernooij
Fix formatting.
25
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
26
from dulwich.objects import (
27
    ShaFile,
28
    )
29
0.200.228 by Jelmer Vernooij
Split out map.
30
import bzrlib
0.200.528 by Jelmer Vernooij
Fix import.
31
from bzrlib import (
0.254.2 by jelmer
use btree indexes
32
    btree_index as _mod_btree_index,
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
33
    index as _mod_index,
34
    osutils,
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
35
    registry,
0.200.528 by Jelmer Vernooij
Fix import.
36
    trace,
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
37
    versionedfile,
0.200.528 by Jelmer Vernooij
Fix import.
38
    )
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
39
from bzrlib.transport import (
40
    get_transport,
41
    )
0.200.230 by Jelmer Vernooij
Implement sha cache.
42
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
43
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
44
def get_cache_dir():
45
    try:
46
        from xdg.BaseDirectory import xdg_cache_home
47
    except ImportError:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
48
        from bzrlib.config import config_dir
49
        ret = os.path.join(config_dir(), "git")
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
50
    else:
0.200.558 by Jelmer Vernooij
Create cache dir if it doesn't exist yet.
51
        ret = os.path.join(xdg_cache_home, "bazaar", "git")
52
    if not os.path.isdir(ret):
53
        os.makedirs(ret)
54
    return ret
0.200.534 by Jelmer Vernooij
Use XDG cache directory if the python xdg module is available.
55
56
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
57
def get_remote_cache_transport(repository):
0.200.1027 by Jelmer Vernooij
mark remote git directories as not supporting working trees.
58
    """Retrieve the transport to use when accessing (unwritable) remote 
59
    repositories.
60
    """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
61
    uuid = getattr(repository, "uuid", None)
62
    if uuid is None:
63
        path = get_cache_dir()
64
    else:
65
        path = os.path.join(get_cache_dir(), uuid)
66
        if not os.path.isdir(path):
67
            os.mkdir(path)
68
    return get_transport(path)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
69
70
0.200.228 by Jelmer Vernooij
Split out map.
71
def check_pysqlite_version(sqlite3):
72
    """Check that sqlite library is compatible.
73
74
    """
0.200.675 by Jelmer Vernooij
Fix formatting.
75
    if (sqlite3.sqlite_version_info[0] < 3 or
76
            (sqlite3.sqlite_version_info[0] == 3 and
0.200.228 by Jelmer Vernooij
Split out map.
77
             sqlite3.sqlite_version_info[1] < 3)):
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
78
        trace.warning('Needs at least sqlite 3.3.x')
0.200.228 by Jelmer Vernooij
Split out map.
79
        raise bzrlib.errors.BzrError("incompatible sqlite library")
80
81
try:
82
    try:
83
        import sqlite3
84
        check_pysqlite_version(sqlite3)
0.200.675 by Jelmer Vernooij
Fix formatting.
85
    except (ImportError, bzrlib.errors.BzrError), e:
0.200.228 by Jelmer Vernooij
Split out map.
86
        from pysqlite2 import dbapi2 as sqlite3
87
        check_pysqlite_version(sqlite3)
88
except:
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
89
    trace.warning('Needs at least Python2.5 or Python2.4 with the pysqlite2 '
0.200.228 by Jelmer Vernooij
Split out map.
90
            'module')
91
    raise bzrlib.errors.BzrError("missing sqlite library")
92
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
93
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
94
_mapdbs = threading.local()
95
def mapdbs():
96
    """Get a cache for this thread's db connections."""
97
    try:
98
        return _mapdbs.cache
99
    except AttributeError:
100
        _mapdbs.cache = {}
101
        return _mapdbs.cache
102
103
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
104
class GitShaMap(object):
105
    """Git<->Bzr revision id mapping database."""
106
107
    def lookup_git_sha(self, sha):
108
        """Lookup a Git sha in the database.
109
        :param sha: Git object sha
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
110
        :return: list with (type, type_data) tuples with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
111
            commit: revid, tree_sha, verifiers
112
            blob: fileid, revid
113
            tree: fileid, revid
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
114
        """
115
        raise NotImplementedError(self.lookup_git_sha)
116
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
117
    def lookup_blob_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
118
        """Retrieve a Git blob SHA by file id.
119
120
        :param file_id: File id of the file/symlink
0.200.806 by Jelmer Vernooij
Make revision_hint mandatory.
121
        :param revision: revision in which the file was last changed.
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
122
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
123
        raise NotImplementedError(self.lookup_blob_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
124
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
125
    def lookup_tree_id(self, file_id, revision):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
126
        """Retrieve a Git tree SHA by file id.
127
        """
0.200.835 by Jelmer Vernooij
Rename lookup_{tree,blob} -> lookup_{tree,blob}_id.
128
        raise NotImplementedError(self.lookup_tree_id)
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
129
0.200.1039 by Jelmer Vernooij
Add stub.
130
    def lookup_commit(self, revid):
131
        """Retrieve a Git commit SHA by Bazaar revision id.
132
        """
133
        raise NotImplementedError(self.lookup_commit)
134
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
135
    def revids(self):
136
        """List the revision ids known."""
137
        raise NotImplementedError(self.revids)
138
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
139
    def missing_revisions(self, revids):
140
        """Return set of all the revisions that are not present."""
141
        present_revids = set(self.revids())
142
        if not isinstance(revids, set):
143
            revids = set(revids)
144
        return revids - present_revids
145
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
146
    def sha1s(self):
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
147
        """List the SHA1s."""
148
        raise NotImplementedError(self.sha1s)
149
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
150
    def start_write_group(self):
151
        """Start writing changes."""
152
153
    def commit_write_group(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
154
        """Commit any pending changes."""
155
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
156
    def abort_write_group(self):
157
        """Abort any pending changes."""
158
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
159
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
160
class ContentCache(object):
161
    """Object that can cache Git objects."""
162
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
163
    def add(self, object):
164
        """Add an object."""
165
        raise NotImplementedError(self.add)
166
167
    def add_multi(self, objects):
168
        """Add multiple objects."""
169
        for obj in objects:
170
            self.add(obj)
171
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
172
    def __getitem__(self, sha):
173
        """Retrieve an item, by SHA."""
174
        raise NotImplementedError(self.__getitem__)
175
176
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
177
class BzrGitCacheFormat(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
178
    """Bazaar-Git Cache Format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
179
180
    def get_format_string(self):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
181
        """Return a single-line unique format string for this cache format."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
182
        raise NotImplementedError(self.get_format_string)
183
184
    def open(self, transport):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
185
        """Open this format on a transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
186
        raise NotImplementedError(self.open)
187
188
    def initialize(self, transport):
0.254.51 by Jelmer Vernooij
Add some docstrings.
189
        """Create a new instance of this cache format at transport."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
190
        transport.put_bytes('format', self.get_format_string())
191
192
    @classmethod
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
193
    def from_transport(self, transport):
194
        """Open a cache file present on a transport, or initialize one.
195
196
        :param transport: Transport to use
197
        :return: A BzrGitCache instance
198
        """
199
        try:
200
            format_name = transport.get_bytes('format')
201
            format = formats.get(format_name)
202
        except bzrlib.errors.NoSuchFile:
203
            format = formats.get('default')
204
            format.initialize(transport)
205
        return format.open(transport)
206
207
    @classmethod
208
    def from_repository(cls, repository):
209
        """Open a cache file for a repository.
210
211
        This will use the repository's transport to store the cache file, or
212
        use the users global cache directory if the repository has no 
213
        transport associated with it.
214
215
        :param repository: Repository to open the cache for
216
        :return: A `BzrGitCache`
217
        """
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
218
        from bzrlib.transport.local import LocalTransport
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
219
        repo_transport = getattr(repository, "_transport", None)
0.200.1414 by Jelmer Vernooij
Fix pulling into bound branches.
220
        if (repo_transport is not None and
221
            isinstance(repo_transport, LocalTransport)):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
222
            # Even if we don't write to this repo, we should be able
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
223
            # to update its cache.
224
            repo_transport = remove_readonly_transport_decorator(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
225
            try:
226
                repo_transport.mkdir('git')
227
            except bzrlib.errors.FileExists:
228
                pass
229
            transport = repo_transport.clone('git')
230
        else:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
231
            transport = get_remote_cache_transport(repository)
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
232
        return cls.from_transport(transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
233
234
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
235
class CacheUpdater(object):
0.254.51 by Jelmer Vernooij
Add some docstrings.
236
    """Base class for objects that can update a bzr-git cache."""
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
237
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
238
    def add_object(self, obj, ie, path):
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
239
        """Add an object.
240
241
        :param obj: Object type ("commit", "blob" or "tree")
242
        :param ie: Inventory entry (for blob/tree) or testament_sha in case
243
            of commit
244
        :param path: Path of the object (optional)
245
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
246
        raise NotImplementedError(self.add_object)
247
248
    def finish(self):
249
        raise NotImplementedError(self.finish)
250
251
252
class BzrGitCache(object):
253
    """Caching backend."""
254
255
    def __init__(self, idmap, content_cache, cache_updater_klass):
256
        self.idmap = idmap
257
        self.content_cache = content_cache
258
        self._cache_updater_klass = cache_updater_klass
259
260
    def get_updater(self, rev):
0.254.51 by Jelmer Vernooij
Add some docstrings.
261
        """Update an object that implements the CacheUpdater interface for 
262
        updating this cache.
263
        """
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
264
        return self._cache_updater_klass(self, rev)
265
266
267
DictBzrGitCache = lambda: BzrGitCache(DictGitShaMap(), None, DictCacheUpdater)
268
269
270
class DictCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
271
    """Cache updater for dict-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
272
273
    def __init__(self, cache, rev):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
274
        self.cache = cache
275
        self.revid = rev.revision_id
276
        self.parent_revids = rev.parent_ids
277
        self._commit = None
278
        self._entries = []
279
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
280
    def add_object(self, obj, ie, path):
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
281
        if obj.type_name == "commit":
282
            self._commit = obj
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
283
            assert type(ie) is dict
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
284
            key = self.revid
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
285
            type_data = (self.revid, self._commit.tree, ie)
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
286
            self.cache.idmap._by_revid[self.revid] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
287
        elif obj.type_name in ("blob", "tree"):
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
288
            if ie is not None:
289
                if obj.type_name == "blob":
290
                    revision = ie.revision
291
                else:
292
                    revision = self.revid
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
293
                key = type_data = (ie.file_id, revision)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
294
                self.cache.idmap._by_fileid.setdefault(type_data[1], {})[type_data[0]] = obj.id
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
295
        else:
296
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
297
        entry = (obj.type_name, type_data)
298
        self.cache.idmap._by_sha.setdefault(obj.id, {})[key] = entry
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
299
300
    def finish(self):
301
        if self._commit is None:
302
            raise AssertionError("No commit object added")
303
        return self._commit
304
305
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
306
class DictGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
307
    """Git SHA map that uses a dictionary."""
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
308
309
    def __init__(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
310
        self._by_sha = {}
311
        self._by_fileid = {}
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
312
        self._by_revid = {}
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
313
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
314
    def lookup_blob_id(self, fileid, revision):
315
        return self._by_fileid[revision][fileid]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
316
317
    def lookup_git_sha(self, sha):
0.261.2 by Jelmer Vernooij
Fix cache tests.
318
        for entry in self._by_sha[sha].itervalues():
319
            yield entry
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
320
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
321
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
322
        return self._by_fileid[revision][fileid]
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
323
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
324
    def lookup_commit(self, revid):
325
        return self._by_revid[revid]
326
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
327
    def revids(self):
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
328
        for key, entries in self._by_sha.iteritems():
329
            for (type, type_data) in entries.values():
330
                if type == "commit":
331
                    yield type_data[0]
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
332
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
333
    def sha1s(self):
0.200.753 by Jelmer Vernooij
Move lookup_tree/lookup_blob to a separate object.
334
        return self._by_sha.iterkeys()
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
335
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
336
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
337
class SqliteCacheUpdater(CacheUpdater):
338
339
    def __init__(self, cache, rev):
340
        self.cache = cache
0.200.850 by Jelmer Vernooij
Fix tests.
341
        self.db = self.cache.idmap.db
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
342
        self.revid = rev.revision_id
343
        self._commit = None
344
        self._trees = []
345
        self._blobs = []
346
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
347
    def add_object(self, obj, ie, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
348
        if obj.type_name == "commit":
349
            self._commit = obj
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
350
            self._testament3_sha1 = ie.get("testament3-sha1")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
351
            assert type(ie) is dict
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
352
        elif obj.type_name == "tree":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
353
            if ie is not None:
354
                self._trees.append((obj.id, ie.file_id, self.revid))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
355
        elif obj.type_name == "blob":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
356
            if ie is not None:
357
                self._blobs.append((obj.id, ie.file_id, ie.revision))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
358
        else:
359
            raise AssertionError
360
361
    def finish(self):
362
        if self._commit is None:
363
            raise AssertionError("No commit object added")
0.200.850 by Jelmer Vernooij
Fix tests.
364
        self.db.executemany(
365
            "replace into trees (sha1, fileid, revid) values (?, ?, ?)",
366
            self._trees)
367
        self.db.executemany(
368
            "replace into blobs (sha1, fileid, revid) values (?, ?, ?)",
369
            self._blobs)
370
        self.db.execute(
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
371
            "replace into commits (sha1, revid, tree_sha, testament3_sha1) values (?, ?, ?, ?)",
372
            (self._commit.id, self.revid, self._commit.tree, self._testament3_sha1))
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
373
        return self._commit
374
375
376
SqliteBzrGitCache = lambda p: BzrGitCache(SqliteGitShaMap(p), None, SqliteCacheUpdater)
377
378
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
379
class SqliteGitCacheFormat(BzrGitCacheFormat):
380
381
    def get_format_string(self):
382
        return 'bzr-git sha map version 1 using sqlite\n'
383
384
    def open(self, transport):
385
        try:
386
            basepath = transport.local_abspath(".")
387
        except bzrlib.errors.NotLocalUrl:
388
            basepath = get_cache_dir()
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
389
        return SqliteBzrGitCache(os.path.join(basepath, "idmap.db"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
390
391
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
392
class SqliteGitShaMap(GitShaMap):
0.254.51 by Jelmer Vernooij
Add some docstrings.
393
    """Bazaar GIT Sha map that uses a sqlite database for storage."""
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
394
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
395
    def __init__(self, path=None):
396
        self.path = path
397
        if path is None:
0.200.262 by Jelmer Vernooij
Add tests for GitShaMap.
398
            self.db = sqlite3.connect(":memory:")
399
        else:
0.200.365 by Jelmer Vernooij
Share sha map cache connections inside threads.
400
            if not mapdbs().has_key(path):
401
                mapdbs()[path] = sqlite3.connect(path)
0.200.675 by Jelmer Vernooij
Fix formatting.
402
            self.db = mapdbs()[path]
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
403
        self.db.text_factory = str
0.200.230 by Jelmer Vernooij
Implement sha cache.
404
        self.db.executescript("""
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
405
        create table if not exists commits(
406
            sha1 text not null check(length(sha1) == 40),
407
            revid text not null,
408
            tree_sha text not null check(length(tree_sha) == 40)
409
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
410
        create index if not exists commit_sha1 on commits(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
411
        create unique index if not exists commit_revid on commits(revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
412
        create table if not exists blobs(
413
            sha1 text not null check(length(sha1) == 40),
414
            fileid text not null,
415
            revid text not null
416
        );
0.200.230 by Jelmer Vernooij
Implement sha cache.
417
        create index if not exists blobs_sha1 on blobs(sha1);
0.200.284 by Jelmer Vernooij
Add extra indexes.
418
        create unique index if not exists blobs_fileid_revid on blobs(fileid, revid);
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
419
        create table if not exists trees(
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
420
            sha1 text unique not null check(length(sha1) == 40),
0.200.691 by Jelmer Vernooij
Add extra constraints in sqlite tables.
421
            fileid text not null,
422
            revid text not null
423
        );
0.255.1 by Jelmer Vernooij
Remove use of lookup_tree.
424
        create unique index if not exists trees_sha1 on trees(sha1);
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
425
        create unique index if not exists trees_fileid_revid on trees(fileid, revid);
0.200.230 by Jelmer Vernooij
Implement sha cache.
426
""")
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
427
        try:
428
            self.db.executescript(
429
                "ALTER TABLE commits ADD testament3_sha1 TEXT;")
430
        except sqlite3.OperationalError:
431
            pass # Column already exists.
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
432
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
433
    def __repr__(self):
434
        return "%s(%r)" % (self.__class__.__name__, self.path)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
435
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
436
    def lookup_commit(self, revid):
0.254.51 by Jelmer Vernooij
Add some docstrings.
437
        cursor = self.db.execute("select sha1 from commits where revid = ?", 
438
            (revid,))
439
        row = cursor.fetchone()
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
440
        if row is not None:
0.200.688 by Jelmer Vernooij
Use str text factory rather than encoding/decoding each time.
441
            return row[0]
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
442
        raise KeyError
0.200.231 by Jelmer Vernooij
Partially fix pull.
443
0.200.687 by Jelmer Vernooij
Use start_write_group() / commit_write_group() mechanism when creating git SHA maps.
444
    def commit_write_group(self):
0.200.232 by Jelmer Vernooij
Fix pull from remote branches.
445
        self.db.commit()
446
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
447
    def lookup_blob_id(self, fileid, revision):
448
        row = self.db.execute("select sha1 from blobs where fileid = ? and revid = ?", (fileid, revision)).fetchone()
449
        if row is not None:
450
            return row[0]
451
        raise KeyError(fileid)
452
453
    def lookup_tree_id(self, fileid, revision):
0.200.860 by Jelmer Vernooij
Fix bugs in two lookup_tree_id implementations and add a test for it.
454
        row = self.db.execute("select sha1 from trees where fileid = ? and revid = ?", (fileid, revision)).fetchone()
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
455
        if row is not None:
456
            return row[0]
457
        raise KeyError(fileid)
0.230.2 by Jelmer Vernooij
Fix versionedfiles.
458
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
459
    def lookup_git_sha(self, sha):
460
        """Lookup a Git sha in the database.
461
462
        :param sha: Git object sha
463
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
464
            commit: revid, tree sha, verifiers
465
            tree: fileid, revid
466
            blob: fileid, revid
0.200.226 by Jelmer Vernooij
Merge thin-pack work.
467
        """
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
468
        found = False
469
        cursor = self.db.execute("select revid, tree_sha, testament3_sha1 from commits where sha1 = ?", (sha,))
470
        for row in cursor.fetchall():
471
            found = True
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
472
            if row[2] is not None:
473
                verifiers = {"testament3-sha1": row[2]}
474
            else:
475
                verifiers = {}
476
            yield ("commit", (row[0], row[1], verifiers))
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
477
        cursor = self.db.execute("select fileid, revid from blobs where sha1 = ?", (sha,))
478
        for row in cursor.fetchall():
479
            found = True
480
            yield ("blob", row)
481
        cursor = self.db.execute("select fileid, revid from trees where sha1 = ?", (sha,))
482
        for row in cursor.fetchall():
483
            found = True
484
            yield ("tree", row)
485
        if not found:
486
            raise KeyError(sha)
0.200.230 by Jelmer Vernooij
Implement sha cache.
487
488
    def revids(self):
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
489
        """List the revision ids known."""
0.248.7 by Jelmer Vernooij
Avoid fetching all sha1s at once.
490
        return (row for (row,) in self.db.execute("select revid from commits"))
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
491
492
    def sha1s(self):
493
        """List the SHA1s."""
494
        for table in ("blobs", "commits", "trees"):
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
495
            for (sha,) in self.db.execute("select sha1 from %s" % table):
496
                yield sha
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
497
498
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
499
class TdbCacheUpdater(CacheUpdater):
0.254.51 by Jelmer Vernooij
Add some docstrings.
500
    """Cache updater for tdb-based caches."""
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
501
502
    def __init__(self, cache, rev):
503
        self.cache = cache
504
        self.db = cache.idmap.db
505
        self.revid = rev.revision_id
506
        self.parent_revids = rev.parent_ids
507
        self._commit = None
508
        self._entries = []
509
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
510
    def add_object(self, obj, ie, path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
511
        sha = obj.sha().digest()
512
        if obj.type_name == "commit":
0.200.853 by Jelmer Vernooij
Fix lookup of commits in tdb.
513
            self.db["commit\0" + self.revid] = "\0".join((sha, obj.tree))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
514
            assert type(ie) is dict, "was %r" % ie
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
515
            type_data = (self.revid, obj.tree)
516
            try:
517
                type_data += (ie["testament3-sha1"],)
518
            except KeyError:
519
                pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
520
            self._commit = obj
521
        elif obj.type_name == "blob":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
522
            if ie is None:
523
                return
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
524
            self.db["\0".join(("blob", ie.file_id, ie.revision))] = sha
525
            type_data = (ie.file_id, ie.revision)
526
        elif obj.type_name == "tree":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
527
            if ie is None:
528
                return
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
529
            type_data = (ie.file_id, self.revid)
530
        else:
531
            raise AssertionError
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
532
        entry = "\0".join((obj.type_name, ) + type_data) + "\n"
533
        key = "git\0" + sha
534
        try:
535
            oldval = self.db[key]
536
        except KeyError:
537
            self.db[key] = entry
538
        else:
0.261.3 by Jelmer Vernooij
Fix more tests.
539
            if oldval[-1] != "\n":
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
540
                self.db[key] = "".join([oldval, "\n", entry])
541
            else:
542
                self.db[key] = "".join([oldval, entry])
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
543
544
    def finish(self):
545
        if self._commit is None:
546
            raise AssertionError("No commit object added")
547
        return self._commit
548
549
550
TdbBzrGitCache = lambda p: BzrGitCache(TdbGitShaMap(p), None, TdbCacheUpdater)
0.200.479 by Jelmer Vernooij
Version tdb sha map.
551
0.200.1140 by Jelmer Vernooij
Update now that the control dir formats are no longer in __init__.
552
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
553
class TdbGitCacheFormat(BzrGitCacheFormat):
0.254.51 by Jelmer Vernooij
Add some docstrings.
554
    """Cache format for tdb-based caches."""
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
555
556
    def get_format_string(self):
557
        return 'bzr-git sha map version 3 using tdb\n'
558
559
    def open(self, transport):
560
        try:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
561
            basepath = transport.local_abspath(".").encode(osutils._fs_enc)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
562
        except bzrlib.errors.NotLocalUrl:
563
            basepath = get_cache_dir()
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
564
        assert isinstance(basepath, str)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
565
        try:
0.200.850 by Jelmer Vernooij
Fix tests.
566
            return TdbBzrGitCache(os.path.join(basepath, "idmap.tdb"))
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
567
        except ImportError:
568
            raise ImportError(
569
                "Unable to open existing bzr-git cache because 'tdb' is not "
570
                "installed.")
571
572
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
573
class TdbGitShaMap(GitShaMap):
574
    """SHA Map that uses a TDB database.
575
576
    Entries:
577
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
578
    "git <sha1>" -> "<type> <type-data1> <type-data2>"
579
    "commit revid" -> "<sha1> <tree-id>"
0.200.477 by Jelmer Vernooij
More tests for sha maps, fix cache misses in tdb.
580
    "tree fileid revid" -> "<sha1>"
581
    "blob fileid revid" -> "<sha1>"
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
582
    """
583
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
584
    TDB_MAP_VERSION = 3
585
    TDB_HASH_SIZE = 50000
586
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
587
    def __init__(self, path=None):
588
        import tdb
589
        self.path = path
590
        if path is None:
591
            self.db = {}
592
        else:
0.200.1075 by Jelmer Vernooij
Fix compatibility with older versions of python-tdb.
593
            assert isinstance(path, str)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
594
            if not mapdbs().has_key(path):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
595
                mapdbs()[path] = tdb.Tdb(path, self.TDB_HASH_SIZE, tdb.DEFAULT,
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
596
                                          os.O_RDWR|os.O_CREAT)
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
597
            self.db = mapdbs()[path]
598
        try:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
599
            if int(self.db["version"]) not in (2, 3):
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
600
                trace.warning("SHA Map is incompatible (%s -> %d), rebuilding database.",
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
601
                              self.db["version"], self.TDB_MAP_VERSION)
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
602
                self.db.clear()
0.200.676 by Jelmer Vernooij
Avoid iterating over all keys in the tdb database.
603
        except KeyError:
0.200.751 by Jelmer Vernooij
Unrelated small fixes - import, avoid storing tree info (no longer used).
604
            pass
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
605
        self.db["version"] = str(self.TDB_MAP_VERSION)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
606
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
607
    def start_write_group(self):
608
        """Start writing changes."""
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
609
        self.db.transaction_start()
0.200.809 by Jelmer Vernooij
Use tdb transactions for write groups.
610
611
    def commit_write_group(self):
612
        """Commit any pending changes."""
613
        self.db.transaction_commit()
614
615
    def abort_write_group(self):
616
        """Abort any pending changes."""
617
        self.db.transaction_cancel()
0.200.778 by Jelmer Vernooij
Use transactions in tdb.
618
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
619
    def __repr__(self):
620
        return "%s(%r)" % (self.__class__.__name__, self.path)
621
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
622
    def lookup_commit(self, revid):
0.200.1264 by Jelmer Vernooij
Fix updating cache for single revision - don't consider it an update of the full cache.
623
        try:
624
            return sha_to_hex(self.db["commit\0" + revid][:20])
625
        except KeyError:
626
            raise KeyError("No cache entry for %r" % revid)
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
627
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
628
    def lookup_blob_id(self, fileid, revision):
629
        return sha_to_hex(self.db["\0".join(("blob", fileid, revision))])
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
630
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
631
    def lookup_git_sha(self, sha):
632
        """Lookup a Git sha in the database.
633
634
        :param sha: Git object sha
635
        :return: (type, type_data) with type_data:
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
636
            commit: revid, tree sha
637
            blob: fileid, revid
638
            tree: fileid, revid
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
639
        """
0.200.564 by Jelmer Vernooij
Accept 'binary' shas.
640
        if len(sha) == 40:
641
            sha = hex_to_sha(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
642
        value = self.db["git\0" + sha]
643
        for data in value.splitlines():
0.261.1 by Jelmer Vernooij
Initial work on supporting multiple results for git shas.
644
            data = data.split("\0")
645
            if data[0] == "commit":
646
                if len(data) == 3:
647
                    yield (data[0], (data[1], data[2], {}))
648
                else:
649
                    yield (data[0], (data[1], data[2], {"testament3-sha1": data[3]}))
0.261.2 by Jelmer Vernooij
Fix cache tests.
650
            elif data[0] in ("tree", "blob"):
651
                yield (data[0], tuple(data[1:]))
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
652
            else:
0.261.2 by Jelmer Vernooij
Fix cache tests.
653
                raise AssertionError("unknown type %r" % data[0])
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
654
0.200.677 by Jelmer Vernooij
Implement TdbCache.missing_revisions().
655
    def missing_revisions(self, revids):
656
        ret = set()
657
        for revid in revids:
658
            if self.db.get("commit\0" + revid) is None:
659
                ret.add(revid)
660
        return ret
661
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
662
    def revids(self):
663
        """List the revision ids known."""
664
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
665
            if key.startswith("commit\0"):
666
                yield key[7:]
0.200.475 by Jelmer Vernooij
Add Tdb database backend.
667
668
    def sha1s(self):
669
        """List the SHA1s."""
670
        for key in self.db.iterkeys():
0.235.1 by Jelmer Vernooij
Store sha map more efficiently.
671
            if key.startswith("git\0"):
672
                yield sha_to_hex(key[4:])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
673
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
674
0.254.44 by Jelmer Vernooij
Add knit-based content cache for trees.
675
class VersionedFilesContentCache(ContentCache):
676
677
    def __init__(self, vf):
678
        self._vf = vf
679
680
    def add(self, obj):
681
        self._vf.insert_record_stream(
682
            [versionedfile.ChunkedContentFactory((obj.id,), [], None,
683
                obj.as_legacy_object_chunks())])
684
685
    def __getitem__(self, sha):
686
        stream = self._vf.get_record_stream([(sha,)], 'unordered', True)
687
        entry = stream.next() 
688
        if entry.storage_kind == 'absent':
689
            raise KeyError(sha)
690
        return ShaFile._parse_legacy_object(entry.get_bytes_as('fulltext'))
691
692
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
693
class GitObjectStoreContentCache(ContentCache):
694
695
    def __init__(self, store):
696
        self.store = store
697
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
698
    def add_multi(self, objs):
699
        self.store.add_objects(objs)
700
701
    def add(self, obj, path):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
702
        self.store.add_object(obj)
703
704
    def __getitem__(self, sha):
705
        return self.store[sha]
706
707
0.254.46 by Jelmer Vernooij
Merge trunk.
708
class IndexCacheUpdater(CacheUpdater):
709
710
    def __init__(self, cache, rev):
711
        self.cache = cache
712
        self.revid = rev.revision_id
713
        self.parent_revids = rev.parent_ids
714
        self._commit = None
715
        self._entries = []
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
716
        self._cache_objs = set()
0.254.46 by Jelmer Vernooij
Merge trunk.
717
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
718
    def add_object(self, obj, ie, path):
0.254.46 by Jelmer Vernooij
Merge trunk.
719
        if obj.type_name == "commit":
720
            self._commit = obj
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
721
            assert type(ie) is dict
0.254.47 by Jelmer Vernooij
Merge trunk.
722
            self.cache.idmap._add_git_sha(obj.id, "commit",
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
723
                (self.revid, obj.tree, ie))
0.254.47 by Jelmer Vernooij
Merge trunk.
724
            self.cache.idmap._add_node(("commit", self.revid, "X"),
0.254.46 by Jelmer Vernooij
Merge trunk.
725
                " ".join((obj.id, obj.tree)))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
726
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
727
        elif obj.type_name == "blob":
0.254.47 by Jelmer Vernooij
Merge trunk.
728
            self.cache.idmap._add_git_sha(obj.id, "blob",
729
                (ie.file_id, ie.revision))
730
            self.cache.idmap._add_node(("blob", ie.file_id, ie.revision), obj.id)
0.254.49 by Jelmer Vernooij
Also cache full contents of symlinks.
731
            if ie.kind == "symlink":
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
732
                self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
733
        elif obj.type_name == "tree":
0.254.47 by Jelmer Vernooij
Merge trunk.
734
            self.cache.idmap._add_git_sha(obj.id, "tree",
735
                (ie.file_id, self.revid))
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
736
            self._cache_objs.add((obj, path))
0.254.46 by Jelmer Vernooij
Merge trunk.
737
        else:
738
            raise AssertionError
739
740
    def finish(self):
0.200.952 by Jelmer Vernooij
Write git pack files rather than loose objects.
741
        self.cache.content_cache.add_multi(self._cache_objs)
0.254.46 by Jelmer Vernooij
Merge trunk.
742
        return self._commit
743
744
745
class IndexBzrGitCache(BzrGitCache):
746
747
    def __init__(self, transport=None):
748
        mapper = versionedfile.ConstantMapper("trees")
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
749
        shamap = IndexGitShaMap(transport.clone('index'))
750
        #trees_store = knit.make_file_factory(True, mapper)(transport)
751
        #content_cache = VersionedFilesContentCache(trees_store)
752
        from bzrlib.plugins.git.transportgit import TransportObjectStore
753
        store = TransportObjectStore(transport.clone('objects'))
754
        content_cache = GitObjectStoreContentCache(store)
755
        super(IndexBzrGitCache, self).__init__(shamap, content_cache,
0.254.47 by Jelmer Vernooij
Merge trunk.
756
                IndexCacheUpdater)
0.254.46 by Jelmer Vernooij
Merge trunk.
757
758
0.254.43 by Jelmer Vernooij
Merge trunk.
759
class IndexGitCacheFormat(BzrGitCacheFormat):
760
761
    def get_format_string(self):
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
762
        return 'bzr-git sha map with git object cache version 1\n'
0.254.43 by Jelmer Vernooij
Merge trunk.
763
764
    def initialize(self, transport):
765
        super(IndexGitCacheFormat, self).initialize(transport)
766
        transport.mkdir('index')
0.254.52 by Jelmer Vernooij
Merge trunk, use git objects to cache tree objects.
767
        transport.mkdir('objects')
768
        from bzrlib.plugins.git.transportgit import TransportObjectStore
769
        TransportObjectStore.init(transport.clone('objects'))
0.254.43 by Jelmer Vernooij
Merge trunk.
770
771
    def open(self, transport):
0.254.46 by Jelmer Vernooij
Merge trunk.
772
        return IndexBzrGitCache(transport)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
773
774
775
class IndexGitShaMap(GitShaMap):
0.254.31 by Jelmer Vernooij
Initial work on CHKMap support.
776
    """SHA Map that uses the Bazaar APIs to store a cache.
777
778
    BTree Index file with the following contents:
779
780
    ("git", <sha1>) -> "<type> <type-data1> <type-data2>"
781
    ("commit", <revid>) -> "<sha1> <tree-id>"
0.254.36 by Jelmer Vernooij
Merge trunk.
782
    ("blob", <fileid>, <revid>) -> <sha1>
783
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
784
    """
785
786
    def __init__(self, transport=None):
787
        if transport is None:
0.254.43 by Jelmer Vernooij
Merge trunk.
788
            self._transport = None
0.254.36 by Jelmer Vernooij
Merge trunk.
789
            self._index = _mod_index.InMemoryGraphIndex(0, key_elements=3)
0.254.2 by jelmer
use btree indexes
790
            self._builder = self._index
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
791
        else:
0.254.30 by Jelmer Vernooij
Move index to separate dir.
792
            self._builder = None
0.254.43 by Jelmer Vernooij
Merge trunk.
793
            self._transport = transport
0.254.2 by jelmer
use btree indexes
794
            self._index = _mod_index.CombinedGraphIndex([])
0.254.43 by Jelmer Vernooij
Merge trunk.
795
            for name in self._transport.list_dir("."):
0.254.2 by jelmer
use btree indexes
796
                if not name.endswith(".rix"):
797
                    continue
0.254.43 by Jelmer Vernooij
Merge trunk.
798
                x = _mod_btree_index.BTreeGraphIndex(self._transport, name,
799
                    self._transport.stat(name).st_size)
0.254.2 by jelmer
use btree indexes
800
                self._index.insert_index(0, x)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
801
802
    @classmethod
803
    def from_repository(cls, repository):
804
        transport = getattr(repository, "_transport", None)
805
        if transport is not None:
0.254.2 by jelmer
use btree indexes
806
            try:
807
                transport.mkdir('git')
808
            except bzrlib.errors.FileExists:
809
                pass
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
810
            return cls(transport.clone('git'))
811
        from bzrlib.transport import get_transport
812
        return cls(get_transport(get_cache_dir()))
813
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
814
    def __repr__(self):
815
        if self._transport is not None:
816
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
817
        else:
818
            return "%s()" % (self.__class__.__name__)
819
0.254.3 by John Arbash Meinel
Add repack function.
820
    def repack(self):
821
        assert self._builder is None
822
        self.start_write_group()
823
        for _, key, value in self._index.iter_all_entries():
824
            self._builder.add_node(key, value)
825
        to_remove = []
0.254.43 by Jelmer Vernooij
Merge trunk.
826
        for name in self._transport.list_dir('.'):
0.254.3 by John Arbash Meinel
Add repack function.
827
            if name.endswith('.rix'):
828
                to_remove.append(name)
829
        self.commit_write_group()
830
        del self._index.indices[1:]
831
        for name in to_remove:
0.254.43 by Jelmer Vernooij
Merge trunk.
832
            self._transport.rename(name, name + '.old')
0.254.3 by John Arbash Meinel
Add repack function.
833
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
834
    def start_write_group(self):
0.254.2 by jelmer
use btree indexes
835
        assert self._builder is None
0.254.36 by Jelmer Vernooij
Merge trunk.
836
        self._builder = _mod_btree_index.BTreeBuilder(0, key_elements=3)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
837
        self._name = osutils.sha()
838
839
    def commit_write_group(self):
0.254.2 by jelmer
use btree indexes
840
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
841
        stream = self._builder.finish()
0.254.2 by jelmer
use btree indexes
842
        name = self._name.hexdigest() + ".rix"
0.254.43 by Jelmer Vernooij
Merge trunk.
843
        size = self._transport.put_file(name, stream)
844
        index = _mod_btree_index.BTreeGraphIndex(self._transport, name, size)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
845
        self._index.insert_index(0, index)
846
        self._builder = None
847
        self._name = None
848
849
    def abort_write_group(self):
0.254.2 by jelmer
use btree indexes
850
        assert self._builder is not None
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
851
        self._builder = None
852
        self._name = None
853
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
854
    def _add_node(self, key, value):
855
        try:
856
            self._builder.add_node(key, value)
857
        except bzrlib.errors.BadIndexDuplicateKey:
0.254.26 by Jelmer Vernooij
Fix typo, cope with invalid shamaps a bit better.
858
            # Multiple bzr objects can have the same contents
859
            return True
860
        else:
861
            return False
0.254.15 by Jelmer Vernooij
Convenience function for adding index nodes.
862
0.254.2 by jelmer
use btree indexes
863
    def _get_entry(self, key):
864
        entries = self._index.iter_entries([key])
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
865
        try:
0.254.2 by jelmer
use btree indexes
866
            return entries.next()[2]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
867
        except StopIteration:
0.254.2 by jelmer
use btree indexes
868
            if self._builder is None:
869
                raise KeyError
870
            entries = self._builder.iter_entries([key])
871
            try:
872
                return entries.next()[2]
873
            except StopIteration:
874
                raise KeyError
875
0.261.2 by Jelmer Vernooij
Fix cache tests.
876
    def _iter_entries_prefix(self, prefix):
0.254.2 by jelmer
use btree indexes
877
        for entry in self._index.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
878
            yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
879
        if self._builder is not None:
880
            for entry in self._builder.iter_entries_prefix([prefix]):
0.261.2 by Jelmer Vernooij
Fix cache tests.
881
                yield (entry[1], entry[2])
0.254.2 by jelmer
use btree indexes
882
883
    def lookup_commit(self, revid):
0.254.36 by Jelmer Vernooij
Merge trunk.
884
        return self._get_entry(("commit", revid, "X"))[:40]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
885
0.254.33 by Jelmer Vernooij
Merge trunk.
886
    def _add_git_sha(self, hexsha, type, type_data):
0.254.2 by jelmer
use btree indexes
887
        if hexsha is not None:
888
            self._name.update(hexsha)
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
889
            if type == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
890
                td = (type_data[0], type_data[1])
891
                try:
892
                    td += (type_data[2]["testament3-sha1"],)
893
                except KeyError:
894
                    pass
0.200.1029 by Jelmer Vernooij
Use dictionary with verifiers rather than requiring testament3-sha1 everywhere.
895
            else:
896
                td = type_data
897
            self._add_node(("git", hexsha, "X"), " ".join((type,) + td))
0.254.2 by jelmer
use btree indexes
898
        else:
899
            # This object is not represented in Git - perhaps an empty
900
            # directory?
901
            self._name.update(type + " ".join(type_data))
0.254.33 by Jelmer Vernooij
Merge trunk.
902
0.254.42 by Jelmer Vernooij
Merge trunk.
903
    def lookup_blob_id(self, fileid, revision):
904
        return self._get_entry(("blob", fileid, revision))
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
905
906
    def lookup_git_sha(self, sha):
907
        if len(sha) == 20:
908
            sha = sha_to_hex(sha)
0.261.2 by Jelmer Vernooij
Fix cache tests.
909
        found = False
910
        for key, value in self._iter_entries_prefix(("git", sha, None)):
911
            found = True
912
            data = value.split(" ", 3)
913
            if data[0] == "commit":
0.200.1179 by Jelmer Vernooij
Avoid using verifiers for natively imported revisions, save a lot of time.
914
                if data[3]:
915
                    verifiers = {"testament3-sha1": data[3]}
916
                else:
917
                    verifiers = {}
918
                yield ("commit", (data[1], data[2], verifiers))
0.261.2 by Jelmer Vernooij
Fix cache tests.
919
            else:
920
                yield (data[0], tuple(data[1:]))
921
        if not found:
922
            raise KeyError(sha)
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
923
924
    def revids(self):
925
        """List the revision ids known."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
926
        for key, value in self._iter_entries_prefix(("commit", None, None)):
0.254.2 by jelmer
use btree indexes
927
            yield key[1]
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
928
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
929
    def missing_revisions(self, revids):
930
        """Return set of all the revisions that are not present."""
931
        missing_revids = set(revids)
932
        for _, key, value in self._index.iter_entries((
0.254.37 by Jelmer Vernooij
merge trunk
933
            ("commit", revid, "X") for revid in revids)):
0.254.21 by Jelmer Vernooij
Implement faster missing_revisions.
934
            missing_revids.remove(key[1])
935
        return missing_revids
936
0.254.1 by Jelmer Vernooij
Add trivial index-based sha map.
937
    def sha1s(self):
938
        """List the SHA1s."""
0.261.2 by Jelmer Vernooij
Fix cache tests.
939
        for key, value in self._iter_entries_prefix(("git", None, None)):
0.254.2 by jelmer
use btree indexes
940
            yield key[1]
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
941
942
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
943
formats = registry.Registry()
944
formats.register(TdbGitCacheFormat().get_format_string(),
945
    TdbGitCacheFormat())
946
formats.register(SqliteGitCacheFormat().get_format_string(),
947
    SqliteGitCacheFormat())
0.254.43 by Jelmer Vernooij
Merge trunk.
948
formats.register(IndexGitCacheFormat().get_format_string(),
949
    IndexGitCacheFormat())
0.200.951 by Jelmer Vernooij
merge support for git object store-based caching mechanism.
950
# In the future, this will become the default:
951
# formats.register('default', IndexGitCacheFormat())
952
try:
953
    import tdb
954
except ImportError:
955
    formats.register('default', SqliteGitCacheFormat())
956
else:
957
    formats.register('default', TdbGitCacheFormat())
958
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
959
960
961
def migrate_ancient_formats(repo_transport):
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
962
    # Migrate older cache formats
963
    repo_transport = remove_readonly_transport_decorator(repo_transport)
964
    has_sqlite = repo_transport.has("git.db")
965
    has_tdb = repo_transport.has("git.tdb")
966
    if not has_sqlite or has_tdb:
967
        return
968
    try:
969
        repo_transport.mkdir("git")
970
    except bzrlib.errors.FileExists:
971
        return
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
972
    # Prefer migrating git.db over git.tdb, since the latter may not 
973
    # be openable on some platforms.
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
974
    if has_sqlite:
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
975
        SqliteGitCacheFormat().initialize(repo_transport.clone("git"))
976
        repo_transport.rename("git.db", "git/idmap.db")
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
977
    elif has_tdb:
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
978
        TdbGitCacheFormat().initialize(repo_transport.clone("git"))
979
        repo_transport.rename("git.tdb", "git/idmap.tdb")
980
981
0.200.865 by Jelmer Vernooij
Support serving without --allow-writes.
982
def remove_readonly_transport_decorator(transport):
983
    if transport.is_readonly():
984
        return transport._decorated
985
    return transport
986
987
0.254.19 by Jelmer Vernooij
Support upgrading sha maps.
988
def from_repository(repository):
0.200.866 by Jelmer Vernooij
More docstrings, prefer migrating git.db to migrating git.tdb.
989
    """Open a cache file for a repository.
990
991
    If the repository is remote and there is no transport available from it
992
    this will use a local file in the users cache directory
993
    (typically ~/.cache/bazaar/git/)
994
995
    :param repository: A repository object
996
    """
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
997
    repo_transport = getattr(repository, "_transport", None)
998
    if repo_transport is not None:
0.200.1221 by Jelmer Vernooij
Support cache for non-local transport properly.
999
        migrate_ancient_formats(repo_transport)
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
1000
    return BzrGitCacheFormat.from_repository(repository)