/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
19
import git
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
20
import os
0.200.57 by Jelmer Vernooij
Fix more tests.
21
import time
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
22
23
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
24
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
25
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
26
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
27
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
28
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
29
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
30
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
31
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
32
    urlutils,
0.200.60 by Jelmer Vernooij
Support signature functions.
33
    versionedfile,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
34
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
35
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
36
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
37
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
38
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
39
    ids,
40
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
41
42
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
43
cachedbs = {}
44
45
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
46
class GitRepository(repository.Repository):
47
    """An adapter to git repositories for bzr."""
48
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
49
    _serializer = None
50
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
51
    def __init__(self, gitdir, lockfiles):
0.200.61 by Jelmer Vernooij
Fix tests.
52
        self.base = gitdir.root_transport.base
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
53
        self.bzrdir = gitdir
54
        self.control_files = lockfiles
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
55
        self._git = git.repo.Repo(gitdir.root_transport.local_abspath("."))
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
56
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
57
        self._blob_info_cache = {}
58
        cache_dir = cache.create_cache_dir()
59
        cachedir_transport = get_transport(cache_dir)
60
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
61
        if not cachedbs.has_key(cache_file):
62
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
63
        self.cachedb = cachedbs[cache_file]
64
        self._init_cachedb()
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
65
        self.texts = None
0.200.60 by Jelmer Vernooij
Support signature functions.
66
        self.signatures = versionedfile.VirtualSignatureTexts(self)
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
67
        self.revisions = None
0.203.1 by Aaron Bentley
Make checkouts work
68
        self._format = GitFormat()
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
69
        self._fallback_repositories = []
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
70
71
    def _init_cachedb(self):
72
        self.cachedb.executescript("""
73
        create table if not exists inventory (
74
            revid blob);
75
        create unique index if not exists inventory_revid
76
            on inventory (revid);
77
        create table if not exists entry_revision (
78
            inventory blob,
79
            path blob,
80
            gitid blob,
81
            executable integer,
82
            revision blob);
83
        create unique index if not exists entry_revision_revid_path
84
            on entry_revision (inventory, path);
85
        """)
86
        self.cachedb.commit()
87
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
88
    def is_shared(self):
89
        return True
90
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
91
    def supports_rich_root(self):
92
        return False
93
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
94
    def get_ancestry(self, revision_id):
95
        param = [ids.convert_revision_id_bzr_to_git(revision_id)]
96
        git_ancestry = self._git.get_ancestry(param)
97
        # print "fetched ancestry:", param
98
        return [None] + [
99
            ids.convert_revision_id_git_to_bzr(git_id)
100
            for git_id in git_ancestry]
101
102
    def get_signature_text(self, revision_id):
103
        raise errors.NoSuchRevision(self, revision_id)
104
0.200.60 by Jelmer Vernooij
Support signature functions.
105
    def has_signature_for_revision_id(self, revision_id):
106
        return False
107
0.200.57 by Jelmer Vernooij
Fix more tests.
108
    def get_parent_map(self, revision_ids):
109
        ret = {}
110
        for revid in revision_ids:
111
            commit = self._git.commit(ids.convert_revision_id_bzr_to_git(revid))
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
112
            ret[revid] = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.57 by Jelmer Vernooij
Fix more tests.
113
        return ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
114
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
115
    def get_revision(self, revision_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
116
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
0.200.57 by Jelmer Vernooij
Fix more tests.
117
        commit = self._git.commit(git_commit_id)
0.204.5 by James Westby
Lose the debuggin prints.
118
        # print "fetched revision:", git_commit_id
0.200.57 by Jelmer Vernooij
Fix more tests.
119
        revision = self._parse_rev(commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
120
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
121
122
    def has_revision(self, revision_id):
123
        try:
124
            self.get_revision(revision_id)
125
        except NoSuchRevision:
126
            return False
127
        else:
128
            return True
129
130
    def get_revisions(self, revisions):
131
        return [self.get_revision(r) for r in revisions]
132
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
133
    @classmethod
0.200.57 by Jelmer Vernooij
Fix more tests.
134
    def _parse_rev(klass, commit):
135
        """Convert a git commit to a bzr revision.
136
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
137
        :return: a `bzrlib.revision.Revision` object.
138
        """
0.200.57 by Jelmer Vernooij
Fix more tests.
139
        rev = revision.Revision(ids.convert_revision_id_git_to_bzr(commit.id))
140
        rev.parent_ids = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
141
        rev.inventory_sha1 = ""
0.200.63 by Jelmer Vernooij
Ignore decoding errors since git doesn't support storing encoding.
142
        rev.message = commit.message.decode("utf-8", "replace")
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
143
        rev.committer = str(commit.committer)
144
        rev.properties['author'] = str(commit.author)
0.200.57 by Jelmer Vernooij
Fix more tests.
145
        rev.timestamp = time.mktime(commit.committed_date)
146
        rev.timezone = 0
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
147
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
148
149
    def revision_trees(self, revids):
150
        for revid in revids:
151
            yield self.revision_tree(revid)
152
153
    def revision_tree(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
154
        revision_id = revision.ensure_null(revision_id)
155
156
        if revision_id == revision.NULL_REVISION:
157
            inv = inventory.Inventory(root_id=None)
158
            inv.revision_id = revision_id
159
            return revisiontree.RevisionTree(self, inv, revision_id)
160
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
161
        return GitRevisionTree(self, revision_id)
162
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
163
    def _fetch_blob(self, git_id):
164
        lines = self._git.cat_file('blob', git_id)
0.204.5 by James Westby
Lose the debuggin prints.
165
        # print "fetched blob:", git_id
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
166
        if self._building_inventory is not None:
167
            self._building_inventory.git_file_data[git_id] = lines
168
        return lines
169
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
170
    def _get_blob(self, git_id):
171
        try:
172
            return self._blob_cache[git_id]
173
        except KeyError:
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
174
            return self._fetch_blob(git_id)
175
176
    def _get_blob_caching(self, git_id):
177
        try:
178
            return self._blob_cache[git_id]
179
        except KeyError:
180
            lines = self._fetch_blob(git_id)
181
            self._blob_cache[git_id] = lines
182
            return lines
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
183
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
184
    def _get_blob_info(self, git_id):
185
        try:
186
            return self._blob_info_cache[git_id]
187
        except KeyError:
188
            lines = self._get_blob(git_id)
189
            size = sum(len(line) for line in lines)
190
            sha1 = osutils.sha_strings(lines)
191
            self._blob_info_cache[git_id] = (size, sha1)
192
            return size, sha1
193
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
194
    def get_inventory(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
195
        assert revision_id != None
196
        return self.revision_tree(revision_id).inventory
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
197
198
    def _set_entry_text_info(self, inv, entry, git_id):
199
        if entry.kind == 'directory':
200
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
201
        size, sha1 = self._get_blob_info(git_id)
202
        entry.text_size = size
203
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
204
        if entry.kind == 'symlink':
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
205
            lines = self._get_blob_caching(git_id)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
206
            entry.symlink_target = ''.join(lines)
207
208
    def _get_file_revision(self, revision_id, path):
209
        lines = self._git.rev_list(
210
            [ids.convert_revision_id_bzr_to_git(revision_id)],
211
            max_count=1, topo_order=True, paths=[path])
212
        [line] = lines
213
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.204.5 by James Westby
Lose the debuggin prints.
214
        # print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
215
        return result
216
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
217
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
218
        result = self.cachedb.execute(
219
            "select revision from entry_revision where"
220
            " inventory=? and path=? and gitid=? and executable=?",
221
            (revid, path, git_id, executable)).fetchone()
222
        if result is None:
223
            return None
224
        [revision] = result
225
        return revision
226
227
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
228
        self.cachedb.execute(
229
            "insert into entry_revision"
230
            " (inventory, path, gitid, executable, revision)"
231
            " values (?, ?, ?, ?, ?)",
232
            (revid, path, git_id, executable, revision))
233
234
    def _all_inventories_in_db(self, revids):
235
        for revid in revids:
236
            result = self.cachedb.execute(
237
                "select count(*) from inventory where revid = ?",
238
                (revid,)).fetchone()
239
            if result is None:
240
                return False
241
        return True
242
0.200.44 by David Allouche
Remove some experimental cruft.
243
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
244
        # If a revision is in the cache, we assume it contains entries for the
245
        # whole inventory. So if all parent revisions are in the cache, but no
246
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
247
        # revision. That amortizes the number of _get_file_revision calls for
248
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
249
        entry_rev = self._get_entry_revision_from_db(
250
            revid, path, git_id, entry.executable)
251
        if entry_rev is not None:
252
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
253
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
254
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
255
        revision = self.get_revision(revid)
256
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
257
            entry_rev = self._get_entry_revision_from_db(
258
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
259
            if entry_rev is not None:
260
                break
261
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
262
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
263
                entry_rev = revid
264
            else:
265
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
266
        self._set_entry_revision_in_db(
267
            revid, path, git_id, entry.executable, entry_rev)
268
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
269
        entry.revision = entry_rev
270
271
272
def escape_file_id(file_id):
273
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
274
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
275
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
276
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
277
278
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
279
        self._repository = repository
0.200.58 by Jelmer Vernooij
Fix remaining tests.
280
        self.revision_id = revision_id
0.200.57 by Jelmer Vernooij
Fix more tests.
281
        git_id = ids.convert_revision_id_bzr_to_git(revision_id)
282
        self.tree = repository._git.commit(git_id).tree
0.200.58 by Jelmer Vernooij
Fix remaining tests.
283
        self._inventory = inventory.Inventory(revision_id=revision_id)
284
        self._inventory.root.revision = revision_id
285
        self._build_inventory(self.tree, self._inventory.root, "")
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
286
287
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
288
        entry = self._inventory[file_id]
289
        if entry.kind == 'directory': return []
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
290
        git_id = self._inventory.git_ids[file_id]
291
        if git_id in self._inventory.git_file_data:
292
            return self._inventory.git_file_data[git_id]
293
        return self._repository._get_blob(git_id)
0.203.1 by Aaron Bentley
Make checkouts work
294
0.200.58 by Jelmer Vernooij
Fix remaining tests.
295
    def _build_inventory(self, tree, ie, path):
296
        assert isinstance(path, str)
297
        for b in tree.contents:
298
            basename = b.name.decode("utf-8")
299
            if path == "":
300
                child_path = b.name
301
            else:
302
                child_path = urlutils.join(path, b.name)
303
            file_id = escape_file_id(child_path.encode('utf-8'))
304
            if b.mode[0] == '0':
305
                child_ie = inventory.InventoryDirectory(file_id, basename, ie.file_id)
306
            elif b.mode[0] == '1':
307
                if b.mode[1] == '0':
308
                    child_ie = inventory.InventoryFile(file_id, basename, ie.file_id)
309
                    child_ie.text_sha1 = osutils.sha_string(b.data)
310
                elif b.mode[1] == '2':
311
                    child_ie = inventory.InventoryLink(file_id, basename, ie.file_id)
312
                    child_ie.text_sha1 = osutils.sha_string("")
313
                else:
314
                    raise AssertionError(
315
                        "Unknown file kind, perms=%r." % (b.mode,))
316
                child_ie.text_size = b.size
317
            else:
318
                raise AssertionError(
319
                    "Unknown blob kind, perms=%r." % (b.mode,))
320
            child_ie.executable = bool(int(b.mode[3:], 8) & 0111)
321
            child_ie.revision = self.revision_id
322
            assert not basename in ie.children
323
            ie.children[basename] = child_ie
324
            if b.mode[0] == '0':
325
                self._build_inventory(b, child_ie, child_path)
326
0.203.1 by Aaron Bentley
Make checkouts work
327
328
class GitFormat(object):
329
330
    supports_tree_reference = False