/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
19
import git
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
20
import os
0.200.57 by Jelmer Vernooij
Fix more tests.
21
import time
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
22
23
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
24
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
25
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
26
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
27
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
28
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
29
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
30
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
31
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
32
    urlutils,
0.200.60 by Jelmer Vernooij
Support signature functions.
33
    versionedfile,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
34
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
35
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
36
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
37
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
38
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
39
    ids,
40
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
41
42
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
43
cachedbs = {}
44
45
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
46
class GitRepository(repository.Repository):
47
    """An adapter to git repositories for bzr."""
48
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
49
    _serializer = None
50
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
51
    def __init__(self, gitdir, lockfiles):
0.200.61 by Jelmer Vernooij
Fix tests.
52
        self.base = gitdir.root_transport.base
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
53
        self.bzrdir = gitdir
54
        self.control_files = lockfiles
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
55
        self._git = git.repo.Repo(gitdir.root_transport.local_abspath("."))
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
56
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
57
        self._blob_info_cache = {}
58
        cache_dir = cache.create_cache_dir()
59
        cachedir_transport = get_transport(cache_dir)
60
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
61
        if not cachedbs.has_key(cache_file):
62
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
63
        self.cachedb = cachedbs[cache_file]
64
        self._init_cachedb()
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
65
        self.texts = None
0.200.60 by Jelmer Vernooij
Support signature functions.
66
        self.signatures = versionedfile.VirtualSignatureTexts(self)
0.200.74 by Jelmer Vernooij
Implement Repository.all_revision_ids().
67
        self.revisions = versionedfile.VirtualRevisionTexts(self)
0.203.1 by Aaron Bentley
Make checkouts work
68
        self._format = GitFormat()
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
69
        self._fallback_repositories = []
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
70
71
    def _init_cachedb(self):
72
        self.cachedb.executescript("""
73
        create table if not exists inventory (
74
            revid blob);
75
        create unique index if not exists inventory_revid
76
            on inventory (revid);
77
        create table if not exists entry_revision (
78
            inventory blob,
79
            path blob,
80
            gitid blob,
81
            executable integer,
82
            revision blob);
83
        create unique index if not exists entry_revision_revid_path
84
            on entry_revision (inventory, path);
85
        """)
86
        self.cachedb.commit()
87
0.200.74 by Jelmer Vernooij
Implement Repository.all_revision_ids().
88
    def _all_revision_ids(self):
89
        if self._git.heads == []:
90
            return set()
91
        ret = set()
92
        skip = 0
93
        max_count = 1000
94
        cms = None
95
        while cms != []:
96
            cms = self._git.commits("--all", max_count=max_count, skip=skip)
97
            skip += max_count
98
            ret.update([ids.convert_revision_id_git_to_bzr(cm.id) for cm in cms])
99
        return ret
100
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
101
    def is_shared(self):
102
        return True
103
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
104
    def supports_rich_root(self):
105
        return False
106
0.200.82 by Jelmer Vernooij
Support listing tags.
107
    #def get_revision_delta(self, revision_id):
108
    #    parent_revid = self.get_revision(revision_id).parent_ids[0]
109
    #    diff = self._git.diff(ids.convert_revision_id_bzr_to_git(parent_revid),
110
    #                   ids.convert_revision_id_bzr_to_git(revision_id))
111
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
112
    def get_ancestry(self, revision_id):
0.200.65 by Jelmer Vernooij
Implement get_ancestry properly.
113
        revision_id = revision.ensure_null(revision_id)
114
        ret = []
115
        if revision_id != revision.NULL_REVISION:
116
            skip = 0
117
            max_count = 1000
118
            cms = None
119
            while cms != []:
120
                cms = self._git.commits(ids.convert_revision_id_bzr_to_git(revision_id), max_count=max_count, skip=skip)
121
                skip += max_count
122
                ret += [ids.convert_revision_id_git_to_bzr(cm.id) for cm in cms]
123
        return [None] + ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
124
125
    def get_signature_text(self, revision_id):
126
        raise errors.NoSuchRevision(self, revision_id)
127
0.200.60 by Jelmer Vernooij
Support signature functions.
128
    def has_signature_for_revision_id(self, revision_id):
129
        return False
130
0.200.57 by Jelmer Vernooij
Fix more tests.
131
    def get_parent_map(self, revision_ids):
132
        ret = {}
133
        for revid in revision_ids:
0.200.77 by Jelmer Vernooij
Handle NULL_REVISION in get_parent_map.
134
            if revid == revision.NULL_REVISION:
135
                ret[revid] = ()
136
            else:
137
                commit = self._git.commit(ids.convert_revision_id_bzr_to_git(revid))
138
                ret[revid] = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.57 by Jelmer Vernooij
Fix more tests.
139
        return ret
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
140
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
141
    def get_revision(self, revision_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
142
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
0.200.57 by Jelmer Vernooij
Fix more tests.
143
        commit = self._git.commit(git_commit_id)
0.204.5 by James Westby
Lose the debuggin prints.
144
        # print "fetched revision:", git_commit_id
0.200.57 by Jelmer Vernooij
Fix more tests.
145
        revision = self._parse_rev(commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
146
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
147
148
    def has_revision(self, revision_id):
149
        try:
150
            self.get_revision(revision_id)
151
        except NoSuchRevision:
152
            return False
153
        else:
154
            return True
155
156
    def get_revisions(self, revisions):
157
        return [self.get_revision(r) for r in revisions]
158
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
159
    @classmethod
0.200.57 by Jelmer Vernooij
Fix more tests.
160
    def _parse_rev(klass, commit):
161
        """Convert a git commit to a bzr revision.
162
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
163
        :return: a `bzrlib.revision.Revision` object.
164
        """
0.200.57 by Jelmer Vernooij
Fix more tests.
165
        rev = revision.Revision(ids.convert_revision_id_git_to_bzr(commit.id))
166
        rev.parent_ids = tuple([ids.convert_revision_id_git_to_bzr(p.id) for p in commit.parents])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
167
        rev.inventory_sha1 = ""
0.200.63 by Jelmer Vernooij
Ignore decoding errors since git doesn't support storing encoding.
168
        rev.message = commit.message.decode("utf-8", "replace")
0.200.59 by Jelmer Vernooij
Add more tests, fix revision history.
169
        rev.committer = str(commit.committer)
170
        rev.properties['author'] = str(commit.author)
0.200.57 by Jelmer Vernooij
Fix more tests.
171
        rev.timestamp = time.mktime(commit.committed_date)
172
        rev.timezone = 0
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
173
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
174
175
    def revision_trees(self, revids):
176
        for revid in revids:
177
            yield self.revision_tree(revid)
178
179
    def revision_tree(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
180
        revision_id = revision.ensure_null(revision_id)
181
182
        if revision_id == revision.NULL_REVISION:
183
            inv = inventory.Inventory(root_id=None)
184
            inv.revision_id = revision_id
185
            return revisiontree.RevisionTree(self, inv, revision_id)
186
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
187
        return GitRevisionTree(self, revision_id)
188
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
189
    def _fetch_blob(self, git_id):
190
        lines = self._git.cat_file('blob', git_id)
0.204.5 by James Westby
Lose the debuggin prints.
191
        # print "fetched blob:", git_id
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
192
        if self._building_inventory is not None:
193
            self._building_inventory.git_file_data[git_id] = lines
194
        return lines
195
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
196
    def _get_blob(self, git_id):
197
        try:
198
            return self._blob_cache[git_id]
199
        except KeyError:
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
200
            return self._fetch_blob(git_id)
201
202
    def _get_blob_caching(self, git_id):
203
        try:
204
            return self._blob_cache[git_id]
205
        except KeyError:
206
            lines = self._fetch_blob(git_id)
207
            self._blob_cache[git_id] = lines
208
            return lines
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
209
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
210
    def _get_blob_info(self, git_id):
211
        try:
212
            return self._blob_info_cache[git_id]
213
        except KeyError:
214
            lines = self._get_blob(git_id)
215
            size = sum(len(line) for line in lines)
216
            sha1 = osutils.sha_strings(lines)
217
            self._blob_info_cache[git_id] = (size, sha1)
218
            return size, sha1
219
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
220
    def get_inventory(self, revision_id):
0.200.57 by Jelmer Vernooij
Fix more tests.
221
        assert revision_id != None
222
        return self.revision_tree(revision_id).inventory
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
223
224
    def _set_entry_text_info(self, inv, entry, git_id):
225
        if entry.kind == 'directory':
226
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
227
        size, sha1 = self._get_blob_info(git_id)
228
        entry.text_size = size
229
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
230
        if entry.kind == 'symlink':
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
231
            lines = self._get_blob_caching(git_id)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
232
            entry.symlink_target = ''.join(lines)
233
234
    def _get_file_revision(self, revision_id, path):
235
        lines = self._git.rev_list(
236
            [ids.convert_revision_id_bzr_to_git(revision_id)],
237
            max_count=1, topo_order=True, paths=[path])
238
        [line] = lines
239
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.204.5 by James Westby
Lose the debuggin prints.
240
        # print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
241
        return result
242
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
243
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
244
        result = self.cachedb.execute(
245
            "select revision from entry_revision where"
246
            " inventory=? and path=? and gitid=? and executable=?",
247
            (revid, path, git_id, executable)).fetchone()
248
        if result is None:
249
            return None
250
        [revision] = result
251
        return revision
252
253
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
254
        self.cachedb.execute(
255
            "insert into entry_revision"
256
            " (inventory, path, gitid, executable, revision)"
257
            " values (?, ?, ?, ?, ?)",
258
            (revid, path, git_id, executable, revision))
259
260
    def _all_inventories_in_db(self, revids):
261
        for revid in revids:
262
            result = self.cachedb.execute(
263
                "select count(*) from inventory where revid = ?",
264
                (revid,)).fetchone()
265
            if result is None:
266
                return False
267
        return True
268
0.200.44 by David Allouche
Remove some experimental cruft.
269
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
270
        # If a revision is in the cache, we assume it contains entries for the
271
        # whole inventory. So if all parent revisions are in the cache, but no
272
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
273
        # revision. That amortizes the number of _get_file_revision calls for
274
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
275
        entry_rev = self._get_entry_revision_from_db(
276
            revid, path, git_id, entry.executable)
277
        if entry_rev is not None:
278
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
279
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
280
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
281
        revision = self.get_revision(revid)
282
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
283
            entry_rev = self._get_entry_revision_from_db(
284
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
285
            if entry_rev is not None:
286
                break
287
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
288
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
289
                entry_rev = revid
290
            else:
291
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
292
        self._set_entry_revision_in_db(
293
            revid, path, git_id, entry.executable, entry_rev)
294
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
295
        entry.revision = entry_rev
296
297
298
def escape_file_id(file_id):
299
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
300
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
301
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
302
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
303
304
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
305
        self._repository = repository
0.200.58 by Jelmer Vernooij
Fix remaining tests.
306
        self.revision_id = revision_id
0.200.57 by Jelmer Vernooij
Fix more tests.
307
        git_id = ids.convert_revision_id_bzr_to_git(revision_id)
308
        self.tree = repository._git.commit(git_id).tree
0.200.58 by Jelmer Vernooij
Fix remaining tests.
309
        self._inventory = inventory.Inventory(revision_id=revision_id)
310
        self._inventory.root.revision = revision_id
311
        self._build_inventory(self.tree, self._inventory.root, "")
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
312
0.200.79 by Jelmer Vernooij
Implement RevisionTree.get_revision_id().
313
    def get_revision_id(self):
314
        return self.revision_id
315
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
316
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
317
        entry = self._inventory[file_id]
318
        if entry.kind == 'directory': return []
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
319
        git_id = self._inventory.git_ids[file_id]
320
        if git_id in self._inventory.git_file_data:
321
            return self._inventory.git_file_data[git_id]
322
        return self._repository._get_blob(git_id)
0.203.1 by Aaron Bentley
Make checkouts work
323
0.200.58 by Jelmer Vernooij
Fix remaining tests.
324
    def _build_inventory(self, tree, ie, path):
325
        assert isinstance(path, str)
326
        for b in tree.contents:
327
            basename = b.name.decode("utf-8")
328
            if path == "":
329
                child_path = b.name
330
            else:
331
                child_path = urlutils.join(path, b.name)
332
            file_id = escape_file_id(child_path.encode('utf-8'))
333
            if b.mode[0] == '0':
334
                child_ie = inventory.InventoryDirectory(file_id, basename, ie.file_id)
335
            elif b.mode[0] == '1':
336
                if b.mode[1] == '0':
337
                    child_ie = inventory.InventoryFile(file_id, basename, ie.file_id)
338
                    child_ie.text_sha1 = osutils.sha_string(b.data)
339
                elif b.mode[1] == '2':
340
                    child_ie = inventory.InventoryLink(file_id, basename, ie.file_id)
341
                    child_ie.text_sha1 = osutils.sha_string("")
342
                else:
343
                    raise AssertionError(
344
                        "Unknown file kind, perms=%r." % (b.mode,))
345
                child_ie.text_size = b.size
346
            else:
347
                raise AssertionError(
348
                    "Unknown blob kind, perms=%r." % (b.mode,))
349
            child_ie.executable = bool(int(b.mode[3:], 8) & 0111)
350
            child_ie.revision = self.revision_id
351
            assert not basename in ie.children
352
            ie.children[basename] = child_ie
353
            if b.mode[0] == '0':
354
                self._build_inventory(b, child_ie, child_path)
355
0.203.1 by Aaron Bentley
Make checkouts work
356
357
class GitFormat(object):
358
359
    supports_tree_reference = False
0.200.71 by Jelmer Vernooij
Implement GitRepositoryFormat.get_format_description.
360
361
    def get_format_description(self):
362
        return "Git Repository"