/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
19
import git
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
20
import os
0.200.57 by Jelmer Vernooij
Fix more tests.
21
import time
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
22
23
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
24
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
25
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
26
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
27
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
28
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
29
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
30
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
31
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
32
    urlutils,
0.200.60 by Jelmer Vernooij
Support signature functions.
33
    versionedfile,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
34
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
35
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
36
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
37
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
38
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
39
    ids,
40
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
41
42
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
43
cachedbs = {}
44
45
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
46
class GitRepository(repository.Repository):
47
    """An adapter to git repositories for bzr."""
48
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
49
    _serializer = None
50
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
51
    def __init__(self, gitdir, lockfiles):
0.200.61 by Jelmer Vernooij
Fix tests.
52
        self.base = gitdir.root_transport.base
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
53
        self.bzrdir = gitdir
54
        self.control_files = lockfiles
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
55
        self._git = git.repo.Repo(gitdir.root_transport.local_abspath("."))
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
56
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
57
        self._blob_info_cache = {}
58
        cache_dir = cache.create_cache_dir()
59
        cachedir_transport = get_transport(cache_dir)
60
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
61
        if not cachedbs.has_key(cache_file):
62
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
63
        self.cachedb = cachedbs[cache_file]
64
        self._init_cachedb()
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
65
        self.texts = None
0.200.60 by Jelmer Vernooij
Support signature functions.
66
        self.signatures = versionedfile.VirtualSignatureTexts(self)
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
67
        self.revisions = None
0.203.1 by Aaron Bentley
Make checkouts work
68
        self._format = GitFormat()
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
69
        self._fallback_repositories = []
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
70
71
    def _init_cachedb(self):
72
        self.cachedb.executescript("""
73
        create table if not exists inventory (
74
            revid blob);
75
        create unique index if not exists inventory_revid
76
            on inventory (revid);
77
        create table if not exists entry_revision (
78
            inventory blob,
79
            path blob,
80
            gitid blob,
81
            executable integer,
82
            revision blob);
83
        create unique index if not exists entry_revision_revid_path
84
            on entry_revision (inventory, path);
85
        """)
86
        self.cachedb.commit()
87
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
88
    def is_shared(self):
89
        return True
90
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
91
    def supports_rich_root(self):
92
        return False
93
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
94
    def get_ancestry(self, revision_id):
0.200.65 by Jelmer Vernooij
Implement get_ancestry properly.
95
        revision_id = revision.ensure_null(revision_id)
96
        ret = []
97
        if revision_id != revision.NULL_REVISION:
98
            skip = 0
99
            max_count = 1000
100
            cms = None
101
            while cms != []:
102
                cms = self._git.commits(ids.convert_revision_id_bzr_to_git(revision_id), max_count=max_count, skip=skip)
103
                skip += max_count
104
                ret += [ids.convert_revision_id_git_to_bzr(cm.id) for cm in cms]
105
        return [None] + ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
106
107
    def get_signature_text(self, revision_id):
108
        raise errors.NoSuchRevision(self, revision_id)
109
0.200.60 by Jelmer Vernooij
Support signature functions.
110
    def has_signature_for_revision_id(self, revision_id):
111
        return False
112
0.200.57 by Jelmer Vernooij
Fix more tests.
113
    def get_parent_map(self, revision_ids):
114
        ret = {}
115
        for revid in revision_ids:
116
            commit = self._git.commit(ids.convert_revision_id_bzr_to_git(revid))
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
117
            ret[revid] = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.57 by Jelmer Vernooij
Fix more tests.
118
        return ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
119
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
120
    def get_revision(self, revision_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
121
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
0.200.57 by Jelmer Vernooij
Fix more tests.
122
        commit = self._git.commit(git_commit_id)
0.204.5 by James Westby
Lose the debuggin prints.
123
        # print "fetched revision:", git_commit_id
0.200.57 by Jelmer Vernooij
Fix more tests.
124
        revision = self._parse_rev(commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
125
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
126
127
    def has_revision(self, revision_id):
128
        try:
129
            self.get_revision(revision_id)
130
        except NoSuchRevision:
131
            return False
132
        else:
133
            return True
134
135
    def get_revisions(self, revisions):
136
        return [self.get_revision(r) for r in revisions]
137
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
138
    @classmethod
0.200.57 by Jelmer Vernooij
Fix more tests.
139
    def _parse_rev(klass, commit):
140
        """Convert a git commit to a bzr revision.
141
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
142
        :return: a `bzrlib.revision.Revision` object.
143
        """
0.200.57 by Jelmer Vernooij
Fix more tests.
144
        rev = revision.Revision(ids.convert_revision_id_git_to_bzr(commit.id))
145
        rev.parent_ids = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
146
        rev.inventory_sha1 = ""
0.200.63 by Jelmer Vernooij
Ignore decoding errors since git doesn't support storing encoding.
147
        rev.message = commit.message.decode("utf-8", "replace")
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
148
        rev.committer = str(commit.committer)
149
        rev.properties['author'] = str(commit.author)
0.200.57 by Jelmer Vernooij
Fix more tests.
150
        rev.timestamp = time.mktime(commit.committed_date)
151
        rev.timezone = 0
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
152
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
153
154
    def revision_trees(self, revids):
155
        for revid in revids:
156
            yield self.revision_tree(revid)
157
158
    def revision_tree(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
159
        revision_id = revision.ensure_null(revision_id)
160
161
        if revision_id == revision.NULL_REVISION:
162
            inv = inventory.Inventory(root_id=None)
163
            inv.revision_id = revision_id
164
            return revisiontree.RevisionTree(self, inv, revision_id)
165
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
166
        return GitRevisionTree(self, revision_id)
167
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
168
    def _fetch_blob(self, git_id):
169
        lines = self._git.cat_file('blob', git_id)
0.204.5 by James Westby
Lose the debuggin prints.
170
        # print "fetched blob:", git_id
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
171
        if self._building_inventory is not None:
172
            self._building_inventory.git_file_data[git_id] = lines
173
        return lines
174
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
175
    def _get_blob(self, git_id):
176
        try:
177
            return self._blob_cache[git_id]
178
        except KeyError:
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
179
            return self._fetch_blob(git_id)
180
181
    def _get_blob_caching(self, git_id):
182
        try:
183
            return self._blob_cache[git_id]
184
        except KeyError:
185
            lines = self._fetch_blob(git_id)
186
            self._blob_cache[git_id] = lines
187
            return lines
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
188
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
189
    def _get_blob_info(self, git_id):
190
        try:
191
            return self._blob_info_cache[git_id]
192
        except KeyError:
193
            lines = self._get_blob(git_id)
194
            size = sum(len(line) for line in lines)
195
            sha1 = osutils.sha_strings(lines)
196
            self._blob_info_cache[git_id] = (size, sha1)
197
            return size, sha1
198
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
199
    def get_inventory(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
200
        assert revision_id != None
201
        return self.revision_tree(revision_id).inventory
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
202
203
    def _set_entry_text_info(self, inv, entry, git_id):
204
        if entry.kind == 'directory':
205
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
206
        size, sha1 = self._get_blob_info(git_id)
207
        entry.text_size = size
208
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
209
        if entry.kind == 'symlink':
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
210
            lines = self._get_blob_caching(git_id)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
211
            entry.symlink_target = ''.join(lines)
212
213
    def _get_file_revision(self, revision_id, path):
214
        lines = self._git.rev_list(
215
            [ids.convert_revision_id_bzr_to_git(revision_id)],
216
            max_count=1, topo_order=True, paths=[path])
217
        [line] = lines
218
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.204.5 by James Westby
Lose the debuggin prints.
219
        # print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
220
        return result
221
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
222
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
223
        result = self.cachedb.execute(
224
            "select revision from entry_revision where"
225
            " inventory=? and path=? and gitid=? and executable=?",
226
            (revid, path, git_id, executable)).fetchone()
227
        if result is None:
228
            return None
229
        [revision] = result
230
        return revision
231
232
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
233
        self.cachedb.execute(
234
            "insert into entry_revision"
235
            " (inventory, path, gitid, executable, revision)"
236
            " values (?, ?, ?, ?, ?)",
237
            (revid, path, git_id, executable, revision))
238
239
    def _all_inventories_in_db(self, revids):
240
        for revid in revids:
241
            result = self.cachedb.execute(
242
                "select count(*) from inventory where revid = ?",
243
                (revid,)).fetchone()
244
            if result is None:
245
                return False
246
        return True
247
0.200.44 by David Allouche
Remove some experimental cruft.
248
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
249
        # If a revision is in the cache, we assume it contains entries for the
250
        # whole inventory. So if all parent revisions are in the cache, but no
251
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
252
        # revision. That amortizes the number of _get_file_revision calls for
253
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
254
        entry_rev = self._get_entry_revision_from_db(
255
            revid, path, git_id, entry.executable)
256
        if entry_rev is not None:
257
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
258
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
259
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
260
        revision = self.get_revision(revid)
261
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
262
            entry_rev = self._get_entry_revision_from_db(
263
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
264
            if entry_rev is not None:
265
                break
266
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
267
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
268
                entry_rev = revid
269
            else:
270
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
271
        self._set_entry_revision_in_db(
272
            revid, path, git_id, entry.executable, entry_rev)
273
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
274
        entry.revision = entry_rev
275
276
277
def escape_file_id(file_id):
278
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
279
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
280
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
281
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
282
283
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
284
        self._repository = repository
0.200.58 by Jelmer Vernooij
Fix remaining tests.
285
        self.revision_id = revision_id
0.200.57 by Jelmer Vernooij
Fix more tests.
286
        git_id = ids.convert_revision_id_bzr_to_git(revision_id)
287
        self.tree = repository._git.commit(git_id).tree
0.200.58 by Jelmer Vernooij
Fix remaining tests.
288
        self._inventory = inventory.Inventory(revision_id=revision_id)
289
        self._inventory.root.revision = revision_id
290
        self._build_inventory(self.tree, self._inventory.root, "")
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
291
292
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
293
        entry = self._inventory[file_id]
294
        if entry.kind == 'directory': return []
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
295
        git_id = self._inventory.git_ids[file_id]
296
        if git_id in self._inventory.git_file_data:
297
            return self._inventory.git_file_data[git_id]
298
        return self._repository._get_blob(git_id)
0.203.1 by Aaron Bentley
Make checkouts work
299
0.200.58 by Jelmer Vernooij
Fix remaining tests.
300
    def _build_inventory(self, tree, ie, path):
301
        assert isinstance(path, str)
302
        for b in tree.contents:
303
            basename = b.name.decode("utf-8")
304
            if path == "":
305
                child_path = b.name
306
            else:
307
                child_path = urlutils.join(path, b.name)
308
            file_id = escape_file_id(child_path.encode('utf-8'))
309
            if b.mode[0] == '0':
310
                child_ie = inventory.InventoryDirectory(file_id, basename, ie.file_id)
311
            elif b.mode[0] == '1':
312
                if b.mode[1] == '0':
313
                    child_ie = inventory.InventoryFile(file_id, basename, ie.file_id)
314
                    child_ie.text_sha1 = osutils.sha_string(b.data)
315
                elif b.mode[1] == '2':
316
                    child_ie = inventory.InventoryLink(file_id, basename, ie.file_id)
317
                    child_ie.text_sha1 = osutils.sha_string("")
318
                else:
319
                    raise AssertionError(
320
                        "Unknown file kind, perms=%r." % (b.mode,))
321
                child_ie.text_size = b.size
322
            else:
323
                raise AssertionError(
324
                    "Unknown blob kind, perms=%r." % (b.mode,))
325
            child_ie.executable = bool(int(b.mode[3:], 8) & 0111)
326
            child_ie.revision = self.revision_id
327
            assert not basename in ie.children
328
            ie.children[basename] = child_ie
329
            if b.mode[0] == '0':
330
                self._build_inventory(b, child_ie, child_path)
331
0.203.1 by Aaron Bentley
Make checkouts work
332
333
class GitFormat(object):
334
335
    supports_tree_reference = False