/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
19
import git
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
20
import os
0.200.57 by Jelmer Vernooij
Fix more tests.
21
import time
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
22
23
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
24
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
25
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
26
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
27
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
28
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
29
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
30
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
31
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
32
    urlutils,
0.200.60 by Jelmer Vernooij
Support signature functions.
33
    versionedfile,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
34
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
35
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
36
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
37
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
38
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
39
    ids,
40
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
41
42
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
43
cachedbs = {}
44
45
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
46
class GitRepository(repository.Repository):
47
    """An adapter to git repositories for bzr."""
48
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
49
    _serializer = None
50
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
51
    def __init__(self, gitdir, lockfiles):
0.200.61 by Jelmer Vernooij
Fix tests.
52
        self.base = gitdir.root_transport.base
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
53
        self.bzrdir = gitdir
54
        self.control_files = lockfiles
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
55
        self._git = git.repo.Repo(gitdir.root_transport.local_abspath("."))
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
56
        self._revision_cache = {}
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
57
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
58
        self._blob_info_cache = {}
59
        cache_dir = cache.create_cache_dir()
60
        cachedir_transport = get_transport(cache_dir)
61
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
62
        if not cachedbs.has_key(cache_file):
63
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
64
        self.cachedb = cachedbs[cache_file]
65
        self._init_cachedb()
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
66
        self.texts = None
0.200.60 by Jelmer Vernooij
Support signature functions.
67
        self.signatures = versionedfile.VirtualSignatureTexts(self)
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
68
        self.revisions = None
0.203.1 by Aaron Bentley
Make checkouts work
69
        self._format = GitFormat()
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
70
        self._fallback_repositories = []
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
71
72
    def _init_cachedb(self):
73
        self.cachedb.executescript("""
74
        create table if not exists inventory (
75
            revid blob);
76
        create unique index if not exists inventory_revid
77
            on inventory (revid);
78
        create table if not exists entry_revision (
79
            inventory blob,
80
            path blob,
81
            gitid blob,
82
            executable integer,
83
            revision blob);
84
        create unique index if not exists entry_revision_revid_path
85
            on entry_revision (inventory, path);
86
        """)
87
        self.cachedb.commit()
88
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
89
    def is_shared(self):
90
        return True
91
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
92
    def supports_rich_root(self):
93
        return False
94
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
95
    def get_ancestry(self, revision_id):
96
        param = [ids.convert_revision_id_bzr_to_git(revision_id)]
97
        git_ancestry = self._git.get_ancestry(param)
98
        # print "fetched ancestry:", param
99
        return [None] + [
100
            ids.convert_revision_id_git_to_bzr(git_id)
101
            for git_id in git_ancestry]
102
103
    def get_signature_text(self, revision_id):
104
        raise errors.NoSuchRevision(self, revision_id)
105
0.200.60 by Jelmer Vernooij
Support signature functions.
106
    def has_signature_for_revision_id(self, revision_id):
107
        return False
108
0.200.57 by Jelmer Vernooij
Fix more tests.
109
    def get_parent_map(self, revision_ids):
110
        ret = {}
111
        for revid in revision_ids:
112
            commit = self._git.commit(ids.convert_revision_id_bzr_to_git(revid))
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
113
            ret[revid] = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.57 by Jelmer Vernooij
Fix more tests.
114
        return ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
115
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
116
    def get_revision(self, revision_id):
117
        if revision_id in self._revision_cache:
118
            return self._revision_cache[revision_id]
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
119
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
0.200.57 by Jelmer Vernooij
Fix more tests.
120
        commit = self._git.commit(git_commit_id)
0.204.5 by James Westby
Lose the debuggin prints.
121
        # print "fetched revision:", git_commit_id
0.200.57 by Jelmer Vernooij
Fix more tests.
122
        revision = self._parse_rev(commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
123
        self._revision_cache[revision_id] = revision
124
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
125
126
    def has_revision(self, revision_id):
127
        try:
128
            self.get_revision(revision_id)
129
        except NoSuchRevision:
130
            return False
131
        else:
132
            return True
133
134
    def get_revisions(self, revisions):
135
        return [self.get_revision(r) for r in revisions]
136
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
137
    @classmethod
0.200.57 by Jelmer Vernooij
Fix more tests.
138
    def _parse_rev(klass, commit):
139
        """Convert a git commit to a bzr revision.
140
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
141
        :return: a `bzrlib.revision.Revision` object.
142
        """
0.200.57 by Jelmer Vernooij
Fix more tests.
143
        rev = revision.Revision(ids.convert_revision_id_git_to_bzr(commit.id))
144
        rev.parent_ids = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
145
        rev.inventory_sha1 = ""
0.200.57 by Jelmer Vernooij
Fix more tests.
146
        rev.message = commit.message
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
147
        rev.committer = str(commit.committer)
148
        rev.properties['author'] = str(commit.author)
0.200.57 by Jelmer Vernooij
Fix more tests.
149
        rev.timestamp = time.mktime(commit.committed_date)
150
        rev.timezone = 0
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
151
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
152
153
    def revision_trees(self, revids):
154
        for revid in revids:
155
            yield self.revision_tree(revid)
156
157
    def revision_tree(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
158
        revision_id = revision.ensure_null(revision_id)
159
160
        if revision_id == revision.NULL_REVISION:
161
            inv = inventory.Inventory(root_id=None)
162
            inv.revision_id = revision_id
163
            return revisiontree.RevisionTree(self, inv, revision_id)
164
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
165
        return GitRevisionTree(self, revision_id)
166
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
167
    def _fetch_blob(self, git_id):
168
        lines = self._git.cat_file('blob', git_id)
0.204.5 by James Westby
Lose the debuggin prints.
169
        # print "fetched blob:", git_id
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
170
        if self._building_inventory is not None:
171
            self._building_inventory.git_file_data[git_id] = lines
172
        return lines
173
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
174
    def _get_blob(self, git_id):
175
        try:
176
            return self._blob_cache[git_id]
177
        except KeyError:
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
178
            return self._fetch_blob(git_id)
179
180
    def _get_blob_caching(self, git_id):
181
        try:
182
            return self._blob_cache[git_id]
183
        except KeyError:
184
            lines = self._fetch_blob(git_id)
185
            self._blob_cache[git_id] = lines
186
            return lines
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
187
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
188
    def _get_blob_info(self, git_id):
189
        try:
190
            return self._blob_info_cache[git_id]
191
        except KeyError:
192
            lines = self._get_blob(git_id)
193
            size = sum(len(line) for line in lines)
194
            sha1 = osutils.sha_strings(lines)
195
            self._blob_info_cache[git_id] = (size, sha1)
196
            return size, sha1
197
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
198
    def get_inventory(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
199
        assert revision_id != None
200
        return self.revision_tree(revision_id).inventory
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
201
202
    def _set_entry_text_info(self, inv, entry, git_id):
203
        if entry.kind == 'directory':
204
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
205
        size, sha1 = self._get_blob_info(git_id)
206
        entry.text_size = size
207
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
208
        if entry.kind == 'symlink':
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
209
            lines = self._get_blob_caching(git_id)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
210
            entry.symlink_target = ''.join(lines)
211
212
    def _get_file_revision(self, revision_id, path):
213
        lines = self._git.rev_list(
214
            [ids.convert_revision_id_bzr_to_git(revision_id)],
215
            max_count=1, topo_order=True, paths=[path])
216
        [line] = lines
217
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.204.5 by James Westby
Lose the debuggin prints.
218
        # print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
219
        return result
220
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
221
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
222
        result = self.cachedb.execute(
223
            "select revision from entry_revision where"
224
            " inventory=? and path=? and gitid=? and executable=?",
225
            (revid, path, git_id, executable)).fetchone()
226
        if result is None:
227
            return None
228
        [revision] = result
229
        return revision
230
231
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
232
        self.cachedb.execute(
233
            "insert into entry_revision"
234
            " (inventory, path, gitid, executable, revision)"
235
            " values (?, ?, ?, ?, ?)",
236
            (revid, path, git_id, executable, revision))
237
238
    def _all_inventories_in_db(self, revids):
239
        for revid in revids:
240
            result = self.cachedb.execute(
241
                "select count(*) from inventory where revid = ?",
242
                (revid,)).fetchone()
243
            if result is None:
244
                return False
245
        return True
246
0.200.44 by David Allouche
Remove some experimental cruft.
247
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
248
        # If a revision is in the cache, we assume it contains entries for the
249
        # whole inventory. So if all parent revisions are in the cache, but no
250
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
251
        # revision. That amortizes the number of _get_file_revision calls for
252
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
253
        entry_rev = self._get_entry_revision_from_db(
254
            revid, path, git_id, entry.executable)
255
        if entry_rev is not None:
256
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
257
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
258
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
259
        revision = self.get_revision(revid)
260
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
261
            entry_rev = self._get_entry_revision_from_db(
262
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
263
            if entry_rev is not None:
264
                break
265
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
266
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
267
                entry_rev = revid
268
            else:
269
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
270
        self._set_entry_revision_in_db(
271
            revid, path, git_id, entry.executable, entry_rev)
272
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
273
        entry.revision = entry_rev
274
275
276
def escape_file_id(file_id):
277
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
278
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
279
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
280
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
281
282
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
283
        self._repository = repository
0.200.58 by Jelmer Vernooij
Fix remaining tests.
284
        self.revision_id = revision_id
0.200.57 by Jelmer Vernooij
Fix more tests.
285
        git_id = ids.convert_revision_id_bzr_to_git(revision_id)
286
        self.tree = repository._git.commit(git_id).tree
0.200.58 by Jelmer Vernooij
Fix remaining tests.
287
        self._inventory = inventory.Inventory(revision_id=revision_id)
288
        self._inventory.root.revision = revision_id
289
        self._build_inventory(self.tree, self._inventory.root, "")
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
290
291
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
292
        entry = self._inventory[file_id]
293
        if entry.kind == 'directory': return []
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
294
        git_id = self._inventory.git_ids[file_id]
295
        if git_id in self._inventory.git_file_data:
296
            return self._inventory.git_file_data[git_id]
297
        return self._repository._get_blob(git_id)
0.203.1 by Aaron Bentley
Make checkouts work
298
0.200.58 by Jelmer Vernooij
Fix remaining tests.
299
    def _build_inventory(self, tree, ie, path):
300
        assert isinstance(path, str)
301
        for b in tree.contents:
302
            basename = b.name.decode("utf-8")
303
            if path == "":
304
                child_path = b.name
305
            else:
306
                child_path = urlutils.join(path, b.name)
307
            file_id = escape_file_id(child_path.encode('utf-8'))
308
            if b.mode[0] == '0':
309
                child_ie = inventory.InventoryDirectory(file_id, basename, ie.file_id)
310
            elif b.mode[0] == '1':
311
                if b.mode[1] == '0':
312
                    child_ie = inventory.InventoryFile(file_id, basename, ie.file_id)
313
                    child_ie.text_sha1 = osutils.sha_string(b.data)
314
                elif b.mode[1] == '2':
315
                    child_ie = inventory.InventoryLink(file_id, basename, ie.file_id)
316
                    child_ie.text_sha1 = osutils.sha_string("")
317
                else:
318
                    raise AssertionError(
319
                        "Unknown file kind, perms=%r." % (b.mode,))
320
                child_ie.text_size = b.size
321
            else:
322
                raise AssertionError(
323
                    "Unknown blob kind, perms=%r." % (b.mode,))
324
            child_ie.executable = bool(int(b.mode[3:], 8) & 0111)
325
            child_ie.revision = self.revision_id
326
            assert not basename in ie.children
327
            ie.children[basename] = child_ie
328
            if b.mode[0] == '0':
329
                self._build_inventory(b, child_ie, child_path)
330
0.203.1 by Aaron Bentley
Make checkouts work
331
332
class GitFormat(object):
333
334
    supports_tree_reference = False