/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
19
import os
20
21
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
22
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
23
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
24
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
25
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
26
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
27
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
28
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
29
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
30
    urlutils,
31
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
32
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
33
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
34
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
35
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
36
    ids,
37
    model,
38
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
39
40
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
41
cachedbs = {}
42
43
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
44
class GitRepository(repository.Repository):
45
    """An adapter to git repositories for bzr."""
46
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
47
    _serializer = None
48
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
49
    def __init__(self, gitdir, lockfiles):
50
        self.bzrdir = gitdir
51
        self.control_files = lockfiles
0.202.2 by David Allouche
GitRepository.get_inventory and .revision_tree work for the null revision. Support for testing GitRepository without disk data.
52
        self._git = self._make_model(gitdir.transport)
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
53
        self._revision_cache = {}
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
54
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
55
        self._blob_info_cache = {}
56
        cache_dir = cache.create_cache_dir()
57
        cachedir_transport = get_transport(cache_dir)
58
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
59
        if not cachedbs.has_key(cache_file):
60
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
61
        self.cachedb = cachedbs[cache_file]
62
        self._init_cachedb()
0.203.1 by Aaron Bentley
Make checkouts work
63
        self._format = GitFormat()
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
64
65
    def _init_cachedb(self):
66
        self.cachedb.executescript("""
67
        create table if not exists inventory (
68
            revid blob);
69
        create unique index if not exists inventory_revid
70
            on inventory (revid);
71
        create table if not exists entry_revision (
72
            inventory blob,
73
            path blob,
74
            gitid blob,
75
            executable integer,
76
            revision blob);
77
        create unique index if not exists entry_revision_revid_path
78
            on entry_revision (inventory, path);
79
        """)
80
        self.cachedb.commit()
81
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
82
0.202.2 by David Allouche
GitRepository.get_inventory and .revision_tree work for the null revision. Support for testing GitRepository without disk data.
83
    @classmethod
84
    def _make_model(klass, transport):
85
        gitdirectory = transport.local_abspath('.')
86
        return model.GitModel(gitdirectory)
87
88
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
89
    def _ancestor_revisions(self, revision_ids):
90
        if revision_ids is not None:
91
            git_revisions = [gitrevid_from_bzr(r) for r in revision_ids]
92
        else:
93
            git_revisions = None
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
94
        for lines in self._git.ancestor_lines(git_revisions):
0.200.30 by David Allouche
Rename GitRepository.parse_rev to ._parse_rev.
95
            yield self._parse_rev(lines)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
96
        # print "fetched ancestors:", git_revisions
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
97
98
    def is_shared(self):
99
        return True
100
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
101
    def supports_rich_root(self):
102
        return False
103
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
104
    def get_revision_graph(self, revision_id=None):
0.200.20 by John Arbash Meinel
All tests are passing again
105
        result = {}
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
106
        if revision_id is not None:
107
            param = [ids.convert_revision_id_bzr_to_git(revision_id)]
108
        else:
109
            param = None
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
110
        git_graph = self._git.get_revision_graph(param)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
111
        # print "fetched revision graph:", param
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
112
        for node, parents in git_graph.iteritems():
0.200.20 by John Arbash Meinel
All tests are passing again
113
            bzr_node = ids.convert_revision_id_git_to_bzr(node)
114
            bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
115
                           for n in parents]
116
            result[bzr_node] = bzr_parents
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
117
        return result
118
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
119
    def get_revision_graph_with_ghosts(self, revision_ids=None):
120
        graph = deprecated_graph.Graph()
121
        if revision_ids is not None:
122
            revision_ids = [ids.convert_revision_id_bzr_to_git(r)
123
                            for r in revision_ids]
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
124
        git_graph = self._git.get_revision_graph(revision_ids)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
125
        # print "fetched revision graph (ghosts):", revision_ids
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
126
        for node, parents in git_graph.iteritems():
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
127
            bzr_node = ids.convert_revision_id_git_to_bzr(node)
128
            bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
129
                           for n in parents]
130
131
            graph.add_node(bzr_node, bzr_parents)
132
        return graph
133
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
134
    def get_ancestry(self, revision_id):
135
        param = [ids.convert_revision_id_bzr_to_git(revision_id)]
136
        git_ancestry = self._git.get_ancestry(param)
137
        # print "fetched ancestry:", param
138
        return [None] + [
139
            ids.convert_revision_id_git_to_bzr(git_id)
140
            for git_id in git_ancestry]
141
142
    def get_signature_text(self, revision_id):
143
        raise errors.NoSuchRevision(self, revision_id)
144
145
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
146
    def get_revision(self, revision_id):
147
        if revision_id in self._revision_cache:
148
            return self._revision_cache[revision_id]
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
149
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
150
        raw = self._git.rev_list([git_commit_id], max_count=1, header=True)
0.204.5 by James Westby
Lose the debuggin prints.
151
        # print "fetched revision:", git_commit_id
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
152
        revision = self._parse_rev(raw)
153
        self._revision_cache[revision_id] = revision
154
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
155
156
    def has_revision(self, revision_id):
157
        try:
158
            self.get_revision(revision_id)
159
        except NoSuchRevision:
160
            return False
161
        else:
162
            return True
163
164
    def get_revisions(self, revisions):
165
        return [self.get_revision(r) for r in revisions]
166
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
167
    @classmethod
168
    def _parse_rev(klass, raw):
169
        """Parse a single git revision.
170
171
        * The first line is the git commit id.
172
        * Following lines conform to the 'name value' structure, until the
173
          first blank line.
174
        * All lines after the first blank line and until the NULL line have 4
175
          leading spaces and constitute the commit message.
176
177
        :param raw: sequence of newline-terminated strings, its last item is a
178
            single NULL character.
179
        :return: a `bzrlib.revision.Revision` object.
180
        """
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
181
        parents = []
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
182
        message_lines = []
183
        in_message = False
184
        committer_was_set = False
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
185
        revision_id = ids.convert_revision_id_git_to_bzr(raw[0][:-1])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
186
        rev = revision.Revision(revision_id)
187
        rev.inventory_sha1 = ""
188
        assert raw[-1] == '\x00', (
189
            "Last item of raw was not a single NULL character.")
190
        for line in raw[1:-1]:
191
            if in_message:
192
                assert line[:4] == '    ', (
193
                    "Unexpected line format in commit message: %r" % line)
194
                message_lines.append(line[4:])
195
                continue
196
            if line == '\n':
197
                in_message = True
198
                continue
199
            name, value = line[:-1].split(' ', 1)
200
            if name == 'parent':
201
                rev.parent_ids.append(
202
                    ids.convert_revision_id_git_to_bzr(value))
203
                continue
204
            if name == 'author':
205
                author, timestamp, timezone = value.rsplit(' ', 2)
206
                rev.properties['author'] = author
207
                rev.properties['git-author-timestamp'] = timestamp
208
                rev.properties['git-author-timezone'] = timezone
209
                if not committer_was_set:
210
                    rev.committer = author
211
                    rev.timestamp = float(timestamp)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
212
                    rev.timezone = klass._parse_tz(timezone)
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
213
                continue
214
            if name == 'committer':
215
                committer_was_set = True
216
                committer, timestamp, timezone = value.rsplit(' ', 2)
217
                rev.committer = committer
218
                rev.timestamp = float(timestamp)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
219
                rev.timezone = klass._parse_tz(timezone)
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
220
                continue
221
            if name == 'tree':
222
                rev.properties['git-tree-id'] = value
223
                continue
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
224
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
225
        rev.message = ''.join(message_lines)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
226
227
        # XXX: That should not be needed, but current revision serializers do
228
        # not know how how to handle text that is illegal in xml. Note: when
229
        # this is fixed, we will need to rev up the revision namespace when
230
        # removing the escaping code. -- David Allouche 2007-12-30
231
        rev.message = escape_for_xml(rev.message)
232
        rev.committer = escape_for_xml(rev.committer)
233
        rev.properties['author'] = escape_for_xml(rev.properties['author'])
234
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
235
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
236
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
237
    @classmethod
238
    def _parse_tz(klass, tz):
239
        """Parse a timezone specification in the [+|-]HHMM format.
240
241
        :return: the timezone offset in seconds.
242
        """
243
        assert len(tz) == 5
244
        sign = {'+': +1, '-': -1}[tz[0]]
245
        hours = int(tz[1:3])
246
        minutes = int(tz[3:])
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
247
        return sign * 60 * (60 * hours + minutes)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
248
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
249
    def revision_trees(self, revids):
250
        for revid in revids:
251
            yield self.revision_tree(revid)
252
253
    def revision_tree(self, revision_id):
254
        return GitRevisionTree(self, revision_id)
255
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
256
    def _fetch_blob(self, git_id):
257
        lines = self._git.cat_file('blob', git_id)
0.204.5 by James Westby
Lose the debuggin prints.
258
        # print "fetched blob:", git_id
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
259
        if self._building_inventory is not None:
260
            self._building_inventory.git_file_data[git_id] = lines
261
        return lines
262
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
263
    def _get_blob(self, git_id):
264
        try:
265
            return self._blob_cache[git_id]
266
        except KeyError:
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
267
            return self._fetch_blob(git_id)
268
269
    def _get_blob_caching(self, git_id):
270
        try:
271
            return self._blob_cache[git_id]
272
        except KeyError:
273
            lines = self._fetch_blob(git_id)
274
            self._blob_cache[git_id] = lines
275
            return lines
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
276
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
277
    def _get_blob_info(self, git_id):
278
        try:
279
            return self._blob_info_cache[git_id]
280
        except KeyError:
281
            lines = self._get_blob(git_id)
282
            size = sum(len(line) for line in lines)
283
            sha1 = osutils.sha_strings(lines)
284
            self._blob_info_cache[git_id] = (size, sha1)
285
            return size, sha1
286
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
287
    def get_inventory(self, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
288
        if revision_id is None:
289
            revision_id = revision.NULL_REVISION
290
        if revision_id == revision.NULL_REVISION:
291
            return inventory.Inventory(
292
                revision_id=revision_id, root_id=None)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
293
294
        # First pass at building the inventory. We need this one to get the
295
        # git ids, so we do not have to cache the entire tree text. Ideally,
296
        # this should be all we need to do.
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
297
        git_commit = ids.convert_revision_id_bzr_to_git(revision_id)
298
        git_inventory = self._git.get_inventory(git_commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
299
        # print "fetched inventory:", git_commit
300
        inv = self._parse_inventory(revision_id, git_inventory)
301
302
        # Second pass at building the inventory. There we retrieve additional
303
        # data that bzrlib requires: text sizes, sha1s, symlink targets and
304
        # revisions that introduced inventory entries
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
305
        self._building_inventory = inv
306
        self._building_inventory.git_file_data = {}
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
307
        for file_id in sorted(inv.git_ids.iterkeys()):
308
            git_id = inv.git_ids[file_id]
309
            entry = inv[file_id]
310
            self._set_entry_text_info(inv, entry, git_id)
311
        for file_id in sorted(inv.git_ids.iterkeys()):
312
            git_id = inv.git_ids[file_id]
313
            entry = inv[file_id]
314
            path = inv.id2path(file_id)
315
            self._set_entry_revision(entry, revision_id, path, git_id)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
316
317
        # At this point the entry_revision table is fully populated for this
318
        # revision. So record that we have inventory data for this revision.
319
        self.cachedb.execute(
320
            "insert or ignore into inventory (revid) values (?)",
321
            (revision_id,))
322
        self.cachedb.commit()
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
323
        self._building_inventory = None
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
324
        return inv
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
325
326
    @classmethod
327
    def _parse_inventory(klass, revid, git_inv):
328
        # For now, git inventory do not have root ids. It is not clear that we
329
        # can reliably support root ids. -- David Allouche 2007-12-28
330
        inv = inventory.Inventory(revision_id=revid)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
331
        inv.git_ids = {}
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
332
        for perms, git_kind, git_id, path in git_inv:
333
            text_sha1 = None
334
            executable = False
335
            if git_kind == 'blob':
336
                if perms[1] == '0':
337
                    kind = 'file'
338
                    executable = bool(int(perms[-3:], 8) & 0111)
339
                elif perms[1] == '2':
340
                    kind = 'symlink'
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
341
                else:
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
342
                    raise AssertionError(
343
                        "Unknown blob kind, perms=%r." % (perms,))
344
            elif git_kind == 'tree':
345
                kind = 'directory'
346
            else:
347
                raise AssertionError(
348
                    "Unknown git entry kind: %r" % (git_kind,))
349
            # XXX: Maybe the file id should be prefixed by file kind, so when
350
            # the kind of path changes, the id changes too.
351
            # -- David Allouche 2007-12-28.
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
352
            file_id = escape_file_id(path.encode('utf-8'))
353
            entry = inv.add_path(path, kind, file_id=file_id)
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
354
            entry.executable = executable
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
355
            inv.git_ids[file_id] = git_id
356
        inv.root.revision = revid
357
        return inv
358
359
    def _set_entry_text_info(self, inv, entry, git_id):
360
        if entry.kind == 'directory':
361
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
362
        size, sha1 = self._get_blob_info(git_id)
363
        entry.text_size = size
364
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
365
        if entry.kind == 'symlink':
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
366
            lines = self._get_blob_caching(git_id)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
367
            entry.symlink_target = ''.join(lines)
368
369
    def _get_file_revision(self, revision_id, path):
370
        lines = self._git.rev_list(
371
            [ids.convert_revision_id_bzr_to_git(revision_id)],
372
            max_count=1, topo_order=True, paths=[path])
373
        [line] = lines
374
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.204.5 by James Westby
Lose the debuggin prints.
375
        # print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
376
        return result
377
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
378
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
379
        result = self.cachedb.execute(
380
            "select revision from entry_revision where"
381
            " inventory=? and path=? and gitid=? and executable=?",
382
            (revid, path, git_id, executable)).fetchone()
383
        if result is None:
384
            return None
385
        [revision] = result
386
        return revision
387
388
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
389
        self.cachedb.execute(
390
            "insert into entry_revision"
391
            " (inventory, path, gitid, executable, revision)"
392
            " values (?, ?, ?, ?, ?)",
393
            (revid, path, git_id, executable, revision))
394
395
    def _all_inventories_in_db(self, revids):
396
        for revid in revids:
397
            result = self.cachedb.execute(
398
                "select count(*) from inventory where revid = ?",
399
                (revid,)).fetchone()
400
            if result is None:
401
                return False
402
        return True
403
0.200.44 by David Allouche
Remove some experimental cruft.
404
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
405
        # If a revision is in the cache, we assume it contains entries for the
406
        # whole inventory. So if all parent revisions are in the cache, but no
407
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
408
        # revision. That amortizes the number of _get_file_revision calls for
409
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
410
        entry_rev = self._get_entry_revision_from_db(
411
            revid, path, git_id, entry.executable)
412
        if entry_rev is not None:
413
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
414
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
415
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
416
        revision = self.get_revision(revid)
417
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
418
            entry_rev = self._get_entry_revision_from_db(
419
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
420
            if entry_rev is not None:
421
                break
422
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
423
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
424
                entry_rev = revid
425
            else:
426
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
427
        self._set_entry_revision_in_db(
428
            revid, path, git_id, entry.executable, entry_rev)
429
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
430
        entry.revision = entry_rev
431
432
433
def escape_file_id(file_id):
434
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
435
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
436
437
def escape_for_xml(message):
438
    """Replace xml-incompatible control characters."""
439
    # Copied from _escape_commit_message from bzr-svn.
440
    # -- David Allouche 2007-12-29.
441
    if message is None:
442
        return None
443
    import re
444
    # FIXME: RBC 20060419 this should be done by the revision
445
    # serialiser not by commit. Then we can also add an unescaper
446
    # in the deserializer and start roundtripping revision messages
447
    # precisely. See repository_implementations/test_repository.py
448
    
449
    # Python strings can include characters that can't be
450
    # represented in well-formed XML; escape characters that
451
    # aren't listed in the XML specification
452
    # (http://www.w3.org/TR/REC-xml/#NT-Char).
453
    message, _ = re.subn(
454
        u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
455
        lambda match: match.group(0).encode('unicode_escape'),
456
        message)
457
    return message
458
459
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
460
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
461
462
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
463
        if revision_id is None:
464
            revision_id = revision.NULL_REVISION
465
        self._inventory = repository.get_inventory(revision_id)
466
        self._repository = repository
467
        self._revision_id = revision_id
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
468
469
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
470
        entry = self._inventory[file_id]
471
        if entry.kind == 'directory': return []
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
472
        git_id = self._inventory.git_ids[file_id]
473
        if git_id in self._inventory.git_file_data:
474
            return self._inventory.git_file_data[git_id]
475
        return self._repository._get_blob(git_id)
0.203.1 by Aaron Bentley
Make checkouts work
476
477
478
class GitFormat(object):
479
480
    supports_tree_reference = False