/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
19
import os
20
21
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
22
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
23
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
24
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
25
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
26
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
27
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
28
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
29
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
30
    urlutils,
31
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
32
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
33
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
34
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
35
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
36
    ids,
37
    model,
38
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
39
40
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
41
cachedbs = {}
42
43
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
44
class GitRepository(repository.Repository):
45
    """An adapter to git repositories for bzr."""
46
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
47
    _serializer = None
48
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
49
    def __init__(self, gitdir, lockfiles):
50
        self.bzrdir = gitdir
51
        self.control_files = lockfiles
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
52
        gitdirectory = gitdir.transport.local_abspath('.')
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
53
        self.base = gitdirectory
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
54
        self._git = model.GitModel(gitdirectory)
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
55
        self._revision_cache = {}
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
56
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
57
        self._blob_info_cache = {}
58
        cache_dir = cache.create_cache_dir()
59
        cachedir_transport = get_transport(cache_dir)
60
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
61
        if not cachedbs.has_key(cache_file):
62
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
63
        self.cachedb = cachedbs[cache_file]
64
        self._init_cachedb()
65
66
    def _init_cachedb(self):
67
        self.cachedb.executescript("""
68
        create table if not exists inventory (
69
            revid blob);
70
        create unique index if not exists inventory_revid
71
            on inventory (revid);
72
        create table if not exists entry_revision (
73
            inventory blob,
74
            path blob,
75
            gitid blob,
76
            executable integer,
77
            revision blob);
78
        create unique index if not exists entry_revision_revid_path
79
            on entry_revision (inventory, path);
80
        """)
81
        self.cachedb.commit()
82
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
83
84
    def _ancestor_revisions(self, revision_ids):
85
        if revision_ids is not None:
86
            git_revisions = [gitrevid_from_bzr(r) for r in revision_ids]
87
        else:
88
            git_revisions = None
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
89
        for lines in self._git.ancestor_lines(git_revisions):
0.200.30 by David Allouche
Rename GitRepository.parse_rev to ._parse_rev.
90
            yield self._parse_rev(lines)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
91
        # print "fetched ancestors:", git_revisions
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
92
93
    def is_shared(self):
94
        return True
95
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
96
    def supports_rich_root(self):
97
        return False
98
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
99
    def get_revision_graph(self, revision_id=None):
0.200.20 by John Arbash Meinel
All tests are passing again
100
        result = {}
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
101
        if revision_id is not None:
102
            param = [ids.convert_revision_id_bzr_to_git(revision_id)]
103
        else:
104
            param = None
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
105
        git_graph = self._git.get_revision_graph(param)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
106
        # print "fetched revision graph:", param
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
107
        for node, parents in git_graph.iteritems():
0.200.20 by John Arbash Meinel
All tests are passing again
108
            bzr_node = ids.convert_revision_id_git_to_bzr(node)
109
            bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
110
                           for n in parents]
111
            result[bzr_node] = bzr_parents
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
112
        return result
113
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
114
    def get_revision_graph_with_ghosts(self, revision_ids=None):
115
        graph = deprecated_graph.Graph()
116
        if revision_ids is not None:
117
            revision_ids = [ids.convert_revision_id_bzr_to_git(r)
118
                            for r in revision_ids]
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
119
        git_graph = self._git.get_revision_graph(revision_ids)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
120
        # print "fetched revision graph (ghosts):", revision_ids
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
121
        for node, parents in git_graph.iteritems():
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
122
            bzr_node = ids.convert_revision_id_git_to_bzr(node)
123
            bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
124
                           for n in parents]
125
126
            graph.add_node(bzr_node, bzr_parents)
127
        return graph
128
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
129
    def get_ancestry(self, revision_id):
130
        param = [ids.convert_revision_id_bzr_to_git(revision_id)]
131
        git_ancestry = self._git.get_ancestry(param)
132
        # print "fetched ancestry:", param
133
        return [None] + [
134
            ids.convert_revision_id_git_to_bzr(git_id)
135
            for git_id in git_ancestry]
136
137
    def get_signature_text(self, revision_id):
138
        raise errors.NoSuchRevision(self, revision_id)
139
140
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
141
    def get_revision(self, revision_id):
142
        if revision_id in self._revision_cache:
143
            return self._revision_cache[revision_id]
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
144
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
145
        raw = self._git.rev_list([git_commit_id], max_count=1, header=True)
146
        # print "fetched revision:", git_commit_id
147
        revision = self._parse_rev(raw)
148
        self._revision_cache[revision_id] = revision
149
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
150
151
    def has_revision(self, revision_id):
152
        try:
153
            self.get_revision(revision_id)
154
        except NoSuchRevision:
155
            return False
156
        else:
157
            return True
158
159
    def get_revisions(self, revisions):
160
        return [self.get_revision(r) for r in revisions]
161
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
162
    @classmethod
163
    def _parse_rev(klass, raw):
164
        """Parse a single git revision.
165
166
        * The first line is the git commit id.
167
        * Following lines conform to the 'name value' structure, until the
168
          first blank line.
169
        * All lines after the first blank line and until the NULL line have 4
170
          leading spaces and constitute the commit message.
171
172
        :param raw: sequence of newline-terminated strings, its last item is a
173
            single NULL character.
174
        :return: a `bzrlib.revision.Revision` object.
175
        """
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
176
        parents = []
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
177
        message_lines = []
178
        in_message = False
179
        committer_was_set = False
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
180
        revision_id = ids.convert_revision_id_git_to_bzr(raw[0][:-1])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
181
        rev = revision.Revision(revision_id)
182
        rev.inventory_sha1 = ""
183
        assert raw[-1] == '\x00', (
184
            "Last item of raw was not a single NULL character.")
185
        for line in raw[1:-1]:
186
            if in_message:
187
                assert line[:4] == '    ', (
188
                    "Unexpected line format in commit message: %r" % line)
189
                message_lines.append(line[4:])
190
                continue
191
            if line == '\n':
192
                in_message = True
193
                continue
194
            name, value = line[:-1].split(' ', 1)
195
            if name == 'parent':
196
                rev.parent_ids.append(
197
                    ids.convert_revision_id_git_to_bzr(value))
198
                continue
199
            if name == 'author':
200
                author, timestamp, timezone = value.rsplit(' ', 2)
201
                rev.properties['author'] = author
202
                rev.properties['git-author-timestamp'] = timestamp
203
                rev.properties['git-author-timezone'] = timezone
204
                if not committer_was_set:
205
                    rev.committer = author
206
                    rev.timestamp = float(timestamp)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
207
                    rev.timezone = klass._parse_tz(timezone)
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
208
                continue
209
            if name == 'committer':
210
                committer_was_set = True
211
                committer, timestamp, timezone = value.rsplit(' ', 2)
212
                rev.committer = committer
213
                rev.timestamp = float(timestamp)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
214
                rev.timezone = klass._parse_tz(timezone)
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
215
                continue
216
            if name == 'tree':
217
                rev.properties['git-tree-id'] = value
218
                continue
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
219
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
220
        rev.message = ''.join(message_lines)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
221
222
        # XXX: That should not be needed, but current revision serializers do
223
        # not know how how to handle text that is illegal in xml. Note: when
224
        # this is fixed, we will need to rev up the revision namespace when
225
        # removing the escaping code. -- David Allouche 2007-12-30
226
        rev.message = escape_for_xml(rev.message)
227
        rev.committer = escape_for_xml(rev.committer)
228
        rev.properties['author'] = escape_for_xml(rev.properties['author'])
229
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
230
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
231
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
232
    @classmethod
233
    def _parse_tz(klass, tz):
234
        """Parse a timezone specification in the [+|-]HHMM format.
235
236
        :return: the timezone offset in seconds.
237
        """
238
        assert len(tz) == 5
239
        sign = {'+': +1, '-': -1}[tz[0]]
240
        hours = int(tz[1:3])
241
        minutes = int(tz[3:])
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
242
        return sign * 60 * (60 * hours + minutes)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
243
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
244
    def revision_trees(self, revids):
245
        for revid in revids:
246
            yield self.revision_tree(revid)
247
248
    def revision_tree(self, revision_id):
249
        return GitRevisionTree(self, revision_id)
250
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
251
    def _get_blob(self, git_id):
252
        try:
253
            return self._blob_cache[git_id]
254
        except KeyError:
255
            blob = self._git.cat_file('blob', git_id)
256
            # print "fetched blob:", git_id
257
            self._blob_cache[git_id] = blob
258
            return blob
259
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
260
    def _get_blob_info(self, git_id):
261
        try:
262
            return self._blob_info_cache[git_id]
263
        except KeyError:
264
            lines = self._get_blob(git_id)
265
            size = sum(len(line) for line in lines)
266
            sha1 = osutils.sha_strings(lines)
267
            self._blob_info_cache[git_id] = (size, sha1)
268
            return size, sha1
269
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
270
    def get_inventory(self, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
271
        if revision_id is None:
272
            revision_id = revision.NULL_REVISION
273
        if revision_id == revision.NULL_REVISION:
274
            return inventory.Inventory(
275
                revision_id=revision_id, root_id=None)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
276
277
        # First pass at building the inventory. We need this one to get the
278
        # git ids, so we do not have to cache the entire tree text. Ideally,
279
        # this should be all we need to do.
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
280
        git_commit = ids.convert_revision_id_bzr_to_git(revision_id)
281
        git_inventory = self._git.get_inventory(git_commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
282
        # print "fetched inventory:", git_commit
283
        inv = self._parse_inventory(revision_id, git_inventory)
284
285
        # Second pass at building the inventory. There we retrieve additional
286
        # data that bzrlib requires: text sizes, sha1s, symlink targets and
287
        # revisions that introduced inventory entries
288
        inv.git_file_data = {}
289
        for file_id in sorted(inv.git_ids.iterkeys()):
290
            git_id = inv.git_ids[file_id]
291
            entry = inv[file_id]
292
            self._set_entry_text_info(inv, entry, git_id)
293
        for file_id in sorted(inv.git_ids.iterkeys()):
294
            git_id = inv.git_ids[file_id]
295
            entry = inv[file_id]
296
            path = inv.id2path(file_id)
297
            self._set_entry_revision(entry, revision_id, path, git_id)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
298
299
        # At this point the entry_revision table is fully populated for this
300
        # revision. So record that we have inventory data for this revision.
301
        self.cachedb.execute(
302
            "insert or ignore into inventory (revid) values (?)",
303
            (revision_id,))
304
        self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
305
        return inv
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
306
307
    @classmethod
308
    def _parse_inventory(klass, revid, git_inv):
309
        # For now, git inventory do not have root ids. It is not clear that we
310
        # can reliably support root ids. -- David Allouche 2007-12-28
311
        inv = inventory.Inventory(revision_id=revid)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
312
        inv.git_ids = {}
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
313
        for perms, git_kind, git_id, path in git_inv:
314
            text_sha1 = None
315
            executable = False
316
            if git_kind == 'blob':
317
                if perms[1] == '0':
318
                    kind = 'file'
319
                    executable = bool(int(perms[-3:], 8) & 0111)
320
                elif perms[1] == '2':
321
                    kind = 'symlink'
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
322
                else:
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
323
                    raise AssertionError(
324
                        "Unknown blob kind, perms=%r." % (perms,))
325
            elif git_kind == 'tree':
326
                kind = 'directory'
327
            else:
328
                raise AssertionError(
329
                    "Unknown git entry kind: %r" % (git_kind,))
330
            # XXX: Maybe the file id should be prefixed by file kind, so when
331
            # the kind of path changes, the id changes too.
332
            # -- David Allouche 2007-12-28.
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
333
            file_id = escape_file_id(path.encode('utf-8'))
334
            entry = inv.add_path(path, kind, file_id=file_id)
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
335
            entry.executable = executable
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
336
            inv.git_ids[file_id] = git_id
337
        inv.root.revision = revid
338
        return inv
339
340
    def _set_entry_text_info(self, inv, entry, git_id):
341
        if entry.kind == 'directory':
342
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
343
        size, sha1 = self._get_blob_info(git_id)
344
        entry.text_size = size
345
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
346
        lines = self._get_blob(git_id)
347
        if entry.kind == 'symlink':
348
            entry.symlink_target = ''.join(lines)
349
        inv.git_file_data[entry.file_id] = lines
350
351
    def _get_file_revision(self, revision_id, path):
352
        lines = self._git.rev_list(
353
            [ids.convert_revision_id_bzr_to_git(revision_id)],
354
            max_count=1, topo_order=True, paths=[path])
355
        [line] = lines
356
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
357
        print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
358
        return result
359
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
360
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
361
        result = self.cachedb.execute(
362
            "select revision from entry_revision where"
363
            " inventory=? and path=? and gitid=? and executable=?",
364
            (revid, path, git_id, executable)).fetchone()
365
        if result is None:
366
            return None
367
        [revision] = result
368
        return revision
369
370
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
371
        self.cachedb.execute(
372
            "insert into entry_revision"
373
            " (inventory, path, gitid, executable, revision)"
374
            " values (?, ?, ?, ?, ?)",
375
            (revid, path, git_id, executable, revision))
376
377
    def _all_inventories_in_db(self, revids):
378
        for revid in revids:
379
            result = self.cachedb.execute(
380
                "select count(*) from inventory where revid = ?",
381
                (revid,)).fetchone()
382
            if result is None:
383
                return False
384
        return True
385
0.200.44 by David Allouche
Remove some experimental cruft.
386
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
387
        # If a revision is in the cache, we assume it contains entries for the
388
        # whole inventory. So if all parent revisions are in the cache, but no
389
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
390
        # revision. That amortizes the number of _get_file_revision calls for
391
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
392
        entry_rev = self._get_entry_revision_from_db(
393
            revid, path, git_id, entry.executable)
394
        if entry_rev is not None:
395
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
396
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
397
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
398
        revision = self.get_revision(revid)
399
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
400
            entry_rev = self._get_entry_revision_from_db(
401
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
402
            if entry_rev is not None:
403
                break
404
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
405
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
406
                entry_rev = revid
407
            else:
408
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
409
        self._set_entry_revision_in_db(
410
            revid, path, git_id, entry.executable, entry_rev)
411
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
412
        entry.revision = entry_rev
413
414
415
def escape_file_id(file_id):
416
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
417
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
418
419
def escape_for_xml(message):
420
    """Replace xml-incompatible control characters."""
421
    # Copied from _escape_commit_message from bzr-svn.
422
    # -- David Allouche 2007-12-29.
423
    if message is None:
424
        return None
425
    import re
426
    # FIXME: RBC 20060419 this should be done by the revision
427
    # serialiser not by commit. Then we can also add an unescaper
428
    # in the deserializer and start roundtripping revision messages
429
    # precisely. See repository_implementations/test_repository.py
430
    
431
    # Python strings can include characters that can't be
432
    # represented in well-formed XML; escape characters that
433
    # aren't listed in the XML specification
434
    # (http://www.w3.org/TR/REC-xml/#NT-Char).
435
    message, _ = re.subn(
436
        u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
437
        lambda match: match.group(0).encode('unicode_escape'),
438
        message)
439
    return message
440
441
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
442
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
443
444
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
445
        if revision_id is None:
446
            revision_id = revision.NULL_REVISION
447
        self._inventory = repository.get_inventory(revision_id)
448
        self._repository = repository
449
        self._revision_id = revision_id
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
450
451
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
452
        entry = self._inventory[file_id]
453
        if entry.kind == 'directory': return []
454
        return self._inventory.git_file_data[file_id]
455
        
456
        obj_id = self._inventory.git_ids[file_id]
457
        assert obj_id is not None, (
458
            "git_id must not be None: %r" % (self._inventory[file_id],))
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
459
        return self._repository._git.cat_file('blob', obj_id)