/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
19
import git
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
20
import os
0.200.57 by Jelmer Vernooij
Fix more tests.
21
import time
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
22
23
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
24
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
25
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
26
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
27
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
28
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
29
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
30
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
31
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
32
    urlutils,
0.200.60 by Jelmer Vernooij
Support signature functions.
33
    versionedfile,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
34
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
35
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
36
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
37
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
38
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
39
    ids,
40
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
41
42
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
43
cachedbs = {}
44
45
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
46
class GitRepository(repository.Repository):
47
    """An adapter to git repositories for bzr."""
48
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
49
    _serializer = None
50
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
51
    def __init__(self, gitdir, lockfiles):
0.200.61 by Jelmer Vernooij
Fix tests.
52
        self.base = gitdir.root_transport.base
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
53
        self.bzrdir = gitdir
54
        self.control_files = lockfiles
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
55
        self._git = git.repo.Repo(gitdir.root_transport.local_abspath("."))
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
56
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
57
        self._blob_info_cache = {}
58
        cache_dir = cache.create_cache_dir()
59
        cachedir_transport = get_transport(cache_dir)
60
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
61
        if not cachedbs.has_key(cache_file):
62
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
63
        self.cachedb = cachedbs[cache_file]
64
        self._init_cachedb()
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
65
        self.texts = None
0.200.60 by Jelmer Vernooij
Support signature functions.
66
        self.signatures = versionedfile.VirtualSignatureTexts(self)
0.200.74 by Jelmer Vernooij
Implement Repository.all_revision_ids().
67
        self.revisions = versionedfile.VirtualRevisionTexts(self)
0.203.1 by Aaron Bentley
Make checkouts work
68
        self._format = GitFormat()
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
69
        self._fallback_repositories = []
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
70
71
    def _init_cachedb(self):
72
        self.cachedb.executescript("""
73
        create table if not exists inventory (
74
            revid blob);
75
        create unique index if not exists inventory_revid
76
            on inventory (revid);
77
        create table if not exists entry_revision (
78
            inventory blob,
79
            path blob,
80
            gitid blob,
81
            executable integer,
82
            revision blob);
83
        create unique index if not exists entry_revision_revid_path
84
            on entry_revision (inventory, path);
85
        """)
86
        self.cachedb.commit()
87
0.200.74 by Jelmer Vernooij
Implement Repository.all_revision_ids().
88
    def _all_revision_ids(self):
89
        if self._git.heads == []:
90
            return set()
91
        ret = set()
92
        skip = 0
93
        max_count = 1000
94
        cms = None
95
        while cms != []:
96
            cms = self._git.commits("--all", max_count=max_count, skip=skip)
97
            skip += max_count
98
            ret.update([ids.convert_revision_id_git_to_bzr(cm.id) for cm in cms])
99
        return ret
100
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
101
    def is_shared(self):
102
        return True
103
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
104
    def supports_rich_root(self):
105
        return False
106
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
107
    def get_ancestry(self, revision_id):
0.200.65 by Jelmer Vernooij
Implement get_ancestry properly.
108
        revision_id = revision.ensure_null(revision_id)
109
        ret = []
110
        if revision_id != revision.NULL_REVISION:
111
            skip = 0
112
            max_count = 1000
113
            cms = None
114
            while cms != []:
115
                cms = self._git.commits(ids.convert_revision_id_bzr_to_git(revision_id), max_count=max_count, skip=skip)
116
                skip += max_count
117
                ret += [ids.convert_revision_id_git_to_bzr(cm.id) for cm in cms]
118
        return [None] + ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
119
120
    def get_signature_text(self, revision_id):
121
        raise errors.NoSuchRevision(self, revision_id)
122
0.200.60 by Jelmer Vernooij
Support signature functions.
123
    def has_signature_for_revision_id(self, revision_id):
124
        return False
125
0.200.57 by Jelmer Vernooij
Fix more tests.
126
    def get_parent_map(self, revision_ids):
127
        ret = {}
128
        for revid in revision_ids:
0.200.77 by Jelmer Vernooij
Handle NULL_REVISION in get_parent_map.
129
            if revid == revision.NULL_REVISION:
130
                ret[revid] = ()
131
            else:
132
                commit = self._git.commit(ids.convert_revision_id_bzr_to_git(revid))
133
                ret[revid] = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.57 by Jelmer Vernooij
Fix more tests.
134
        return ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
135
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
136
    def get_revision(self, revision_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
137
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
0.200.57 by Jelmer Vernooij
Fix more tests.
138
        commit = self._git.commit(git_commit_id)
0.204.5 by James Westby
Lose the debuggin prints.
139
        # print "fetched revision:", git_commit_id
0.200.57 by Jelmer Vernooij
Fix more tests.
140
        revision = self._parse_rev(commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
141
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
142
143
    def has_revision(self, revision_id):
144
        try:
145
            self.get_revision(revision_id)
146
        except NoSuchRevision:
147
            return False
148
        else:
149
            return True
150
151
    def get_revisions(self, revisions):
152
        return [self.get_revision(r) for r in revisions]
153
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
154
    @classmethod
0.200.57 by Jelmer Vernooij
Fix more tests.
155
    def _parse_rev(klass, commit):
156
        """Convert a git commit to a bzr revision.
157
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
158
        :return: a `bzrlib.revision.Revision` object.
159
        """
0.200.57 by Jelmer Vernooij
Fix more tests.
160
        rev = revision.Revision(ids.convert_revision_id_git_to_bzr(commit.id))
161
        rev.parent_ids = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
162
        rev.inventory_sha1 = ""
0.200.63 by Jelmer Vernooij
Ignore decoding errors since git doesn't support storing encoding.
163
        rev.message = commit.message.decode("utf-8", "replace")
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
164
        rev.committer = str(commit.committer)
165
        rev.properties['author'] = str(commit.author)
0.200.57 by Jelmer Vernooij
Fix more tests.
166
        rev.timestamp = time.mktime(commit.committed_date)
167
        rev.timezone = 0
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
168
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
169
170
    def revision_trees(self, revids):
171
        for revid in revids:
172
            yield self.revision_tree(revid)
173
174
    def revision_tree(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
175
        revision_id = revision.ensure_null(revision_id)
176
177
        if revision_id == revision.NULL_REVISION:
178
            inv = inventory.Inventory(root_id=None)
179
            inv.revision_id = revision_id
180
            return revisiontree.RevisionTree(self, inv, revision_id)
181
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
182
        return GitRevisionTree(self, revision_id)
183
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
184
    def _fetch_blob(self, git_id):
185
        lines = self._git.cat_file('blob', git_id)
0.204.5 by James Westby
Lose the debuggin prints.
186
        # print "fetched blob:", git_id
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
187
        if self._building_inventory is not None:
188
            self._building_inventory.git_file_data[git_id] = lines
189
        return lines
190
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
191
    def _get_blob(self, git_id):
192
        try:
193
            return self._blob_cache[git_id]
194
        except KeyError:
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
195
            return self._fetch_blob(git_id)
196
197
    def _get_blob_caching(self, git_id):
198
        try:
199
            return self._blob_cache[git_id]
200
        except KeyError:
201
            lines = self._fetch_blob(git_id)
202
            self._blob_cache[git_id] = lines
203
            return lines
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
204
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
205
    def _get_blob_info(self, git_id):
206
        try:
207
            return self._blob_info_cache[git_id]
208
        except KeyError:
209
            lines = self._get_blob(git_id)
210
            size = sum(len(line) for line in lines)
211
            sha1 = osutils.sha_strings(lines)
212
            self._blob_info_cache[git_id] = (size, sha1)
213
            return size, sha1
214
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
215
    def get_inventory(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
216
        assert revision_id != None
217
        return self.revision_tree(revision_id).inventory
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
218
219
    def _set_entry_text_info(self, inv, entry, git_id):
220
        if entry.kind == 'directory':
221
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
222
        size, sha1 = self._get_blob_info(git_id)
223
        entry.text_size = size
224
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
225
        if entry.kind == 'symlink':
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
226
            lines = self._get_blob_caching(git_id)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
227
            entry.symlink_target = ''.join(lines)
228
229
    def _get_file_revision(self, revision_id, path):
230
        lines = self._git.rev_list(
231
            [ids.convert_revision_id_bzr_to_git(revision_id)],
232
            max_count=1, topo_order=True, paths=[path])
233
        [line] = lines
234
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.204.5 by James Westby
Lose the debuggin prints.
235
        # print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
236
        return result
237
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
238
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
239
        result = self.cachedb.execute(
240
            "select revision from entry_revision where"
241
            " inventory=? and path=? and gitid=? and executable=?",
242
            (revid, path, git_id, executable)).fetchone()
243
        if result is None:
244
            return None
245
        [revision] = result
246
        return revision
247
248
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
249
        self.cachedb.execute(
250
            "insert into entry_revision"
251
            " (inventory, path, gitid, executable, revision)"
252
            " values (?, ?, ?, ?, ?)",
253
            (revid, path, git_id, executable, revision))
254
255
    def _all_inventories_in_db(self, revids):
256
        for revid in revids:
257
            result = self.cachedb.execute(
258
                "select count(*) from inventory where revid = ?",
259
                (revid,)).fetchone()
260
            if result is None:
261
                return False
262
        return True
263
0.200.44 by David Allouche
Remove some experimental cruft.
264
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
265
        # If a revision is in the cache, we assume it contains entries for the
266
        # whole inventory. So if all parent revisions are in the cache, but no
267
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
268
        # revision. That amortizes the number of _get_file_revision calls for
269
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
270
        entry_rev = self._get_entry_revision_from_db(
271
            revid, path, git_id, entry.executable)
272
        if entry_rev is not None:
273
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
274
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
275
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
276
        revision = self.get_revision(revid)
277
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
278
            entry_rev = self._get_entry_revision_from_db(
279
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
280
            if entry_rev is not None:
281
                break
282
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
283
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
284
                entry_rev = revid
285
            else:
286
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
287
        self._set_entry_revision_in_db(
288
            revid, path, git_id, entry.executable, entry_rev)
289
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
290
        entry.revision = entry_rev
291
292
293
def escape_file_id(file_id):
294
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
295
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
296
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
297
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
298
299
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
300
        self._repository = repository
0.200.58 by Jelmer Vernooij
Fix remaining tests.
301
        self.revision_id = revision_id
0.200.57 by Jelmer Vernooij
Fix more tests.
302
        git_id = ids.convert_revision_id_bzr_to_git(revision_id)
303
        self.tree = repository._git.commit(git_id).tree
0.200.58 by Jelmer Vernooij
Fix remaining tests.
304
        self._inventory = inventory.Inventory(revision_id=revision_id)
305
        self._inventory.root.revision = revision_id
306
        self._build_inventory(self.tree, self._inventory.root, "")
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
307
0.200.79 by Jelmer Vernooij
Implement RevisionTree.get_revision_id().
308
    def get_revision_id(self):
309
        return self.revision_id
310
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
311
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
312
        entry = self._inventory[file_id]
313
        if entry.kind == 'directory': return []
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
314
        git_id = self._inventory.git_ids[file_id]
315
        if git_id in self._inventory.git_file_data:
316
            return self._inventory.git_file_data[git_id]
317
        return self._repository._get_blob(git_id)
0.203.1 by Aaron Bentley
Make checkouts work
318
0.200.58 by Jelmer Vernooij
Fix remaining tests.
319
    def _build_inventory(self, tree, ie, path):
320
        assert isinstance(path, str)
321
        for b in tree.contents:
322
            basename = b.name.decode("utf-8")
323
            if path == "":
324
                child_path = b.name
325
            else:
326
                child_path = urlutils.join(path, b.name)
327
            file_id = escape_file_id(child_path.encode('utf-8'))
328
            if b.mode[0] == '0':
329
                child_ie = inventory.InventoryDirectory(file_id, basename, ie.file_id)
330
            elif b.mode[0] == '1':
331
                if b.mode[1] == '0':
332
                    child_ie = inventory.InventoryFile(file_id, basename, ie.file_id)
333
                    child_ie.text_sha1 = osutils.sha_string(b.data)
334
                elif b.mode[1] == '2':
335
                    child_ie = inventory.InventoryLink(file_id, basename, ie.file_id)
336
                    child_ie.text_sha1 = osutils.sha_string("")
337
                else:
338
                    raise AssertionError(
339
                        "Unknown file kind, perms=%r." % (b.mode,))
340
                child_ie.text_size = b.size
341
            else:
342
                raise AssertionError(
343
                    "Unknown blob kind, perms=%r." % (b.mode,))
344
            child_ie.executable = bool(int(b.mode[3:], 8) & 0111)
345
            child_ie.revision = self.revision_id
346
            assert not basename in ie.children
347
            ie.children[basename] = child_ie
348
            if b.mode[0] == '0':
349
                self._build_inventory(b, child_ie, child_path)
350
0.203.1 by Aaron Bentley
Make checkouts work
351
352
class GitFormat(object):
353
354
    supports_tree_reference = False
0.200.71 by Jelmer Vernooij
Implement GitRepositoryFormat.get_format_description.
355
356
    def get_format_description(self):
357
        return "Git Repository"