/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""An adapter between a Git Repository and a Bazaar Branch"""
18
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
19
import git
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
20
import os
21
22
import bzrlib
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
23
from bzrlib import (
0.200.20 by John Arbash Meinel
All tests are passing again
24
    deprecated_graph,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
25
    errors,
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
26
    inventory,
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
27
    osutils,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
28
    repository,
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
29
    revision,
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
30
    revisiontree,
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
31
    urlutils,
32
    )
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
33
from bzrlib.transport import get_transport
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
34
0.200.27 by David Allouche
Flat is better than nested, remove the gitlib hierarchy.
35
from bzrlib.plugins.git import (
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
36
    cache,
0.200.20 by John Arbash Meinel
All tests are passing again
37
    ids,
38
    )
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
39
40
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
41
cachedbs = {}
42
43
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
44
class GitRepository(repository.Repository):
45
    """An adapter to git repositories for bzr."""
46
0.200.41 by David Allouche
Define _serializer = None in GitRepository.
47
    _serializer = None
48
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
49
    def __init__(self, gitdir, lockfiles):
50
        self.bzrdir = gitdir
51
        self.control_files = lockfiles
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
52
        self._git = git.repo.Repo(gitdir.root_transport.local_abspath("."))
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
53
        self._revision_cache = {}
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
54
        self._blob_cache = {}
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
55
        self._blob_info_cache = {}
56
        cache_dir = cache.create_cache_dir()
57
        cachedir_transport = get_transport(cache_dir)
58
        cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)
59
        if not cachedbs.has_key(cache_file):
60
            cachedbs[cache_file] = cache.sqlite3.connect(cache_file)
61
        self.cachedb = cachedbs[cache_file]
62
        self._init_cachedb()
0.200.56 by Jelmer Vernooij
Switch to using GitPython rather than our own in-house stuff.
63
        self.texts = None
64
        self.signatures = None
65
        self.revisions = None
0.203.1 by Aaron Bentley
Make checkouts work
66
        self._format = GitFormat()
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
67
68
    def _init_cachedb(self):
69
        self.cachedb.executescript("""
70
        create table if not exists inventory (
71
            revid blob);
72
        create unique index if not exists inventory_revid
73
            on inventory (revid);
74
        create table if not exists entry_revision (
75
            inventory blob,
76
            path blob,
77
            gitid blob,
78
            executable integer,
79
            revision blob);
80
        create unique index if not exists entry_revision_revid_path
81
            on entry_revision (inventory, path);
82
        """)
83
        self.cachedb.commit()
84
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
85
    def _ancestor_revisions(self, revision_ids):
86
        if revision_ids is not None:
87
            git_revisions = [gitrevid_from_bzr(r) for r in revision_ids]
88
        else:
89
            git_revisions = None
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
90
        for lines in self._git.ancestor_lines(git_revisions):
0.200.30 by David Allouche
Rename GitRepository.parse_rev to ._parse_rev.
91
            yield self._parse_rev(lines)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
92
        # print "fetched ancestors:", git_revisions
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
93
94
    def is_shared(self):
95
        return True
96
0.200.40 by David Allouche
GitRepository.supports_rich_root() => False
97
    def supports_rich_root(self):
98
        return False
99
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
100
    def get_revision_graph(self, revision_id=None):
0.200.20 by John Arbash Meinel
All tests are passing again
101
        result = {}
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
102
        if revision_id is not None:
103
            param = [ids.convert_revision_id_bzr_to_git(revision_id)]
104
        else:
105
            param = None
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
106
        git_graph = self._git.get_revision_graph(param)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
107
        # print "fetched revision graph:", param
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
108
        for node, parents in git_graph.iteritems():
0.200.20 by John Arbash Meinel
All tests are passing again
109
            bzr_node = ids.convert_revision_id_git_to_bzr(node)
110
            bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
111
                           for n in parents]
112
            result[bzr_node] = bzr_parents
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
113
        return result
114
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
115
    def get_revision_graph_with_ghosts(self, revision_ids=None):
116
        graph = deprecated_graph.Graph()
117
        if revision_ids is not None:
118
            revision_ids = [ids.convert_revision_id_bzr_to_git(r)
119
                            for r in revision_ids]
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
120
        git_graph = self._git.get_revision_graph(revision_ids)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
121
        # print "fetched revision graph (ghosts):", revision_ids
0.200.42 by David Allouche
Rename GitModel.ancestry to .get_revision_graph.
122
        for node, parents in git_graph.iteritems():
0.200.21 by John Arbash Meinel
Fix Repository.get_revision_graph()
123
            bzr_node = ids.convert_revision_id_git_to_bzr(node)
124
            bzr_parents = [ids.convert_revision_id_git_to_bzr(n)
125
                           for n in parents]
126
127
            graph.add_node(bzr_node, bzr_parents)
128
        return graph
129
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
130
    def get_ancestry(self, revision_id):
131
        param = [ids.convert_revision_id_bzr_to_git(revision_id)]
132
        git_ancestry = self._git.get_ancestry(param)
133
        # print "fetched ancestry:", param
134
        return [None] + [
135
            ids.convert_revision_id_git_to_bzr(git_id)
136
            for git_id in git_ancestry]
137
138
    def get_signature_text(self, revision_id):
139
        raise errors.NoSuchRevision(self, revision_id)
140
141
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
142
    def get_revision(self, revision_id):
143
        if revision_id in self._revision_cache:
144
            return self._revision_cache[revision_id]
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
145
        git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)
146
        raw = self._git.rev_list([git_commit_id], max_count=1, header=True)
0.204.5 by James Westby
Lose the debuggin prints.
147
        # print "fetched revision:", git_commit_id
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
148
        revision = self._parse_rev(raw)
149
        self._revision_cache[revision_id] = revision
150
        return revision
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
151
152
    def has_revision(self, revision_id):
153
        try:
154
            self.get_revision(revision_id)
155
        except NoSuchRevision:
156
            return False
157
        else:
158
            return True
159
160
    def get_revisions(self, revisions):
161
        return [self.get_revision(r) for r in revisions]
162
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
163
    @classmethod
164
    def _parse_rev(klass, raw):
165
        """Parse a single git revision.
166
167
        * The first line is the git commit id.
168
        * Following lines conform to the 'name value' structure, until the
169
          first blank line.
170
        * All lines after the first blank line and until the NULL line have 4
171
          leading spaces and constitute the commit message.
172
173
        :param raw: sequence of newline-terminated strings, its last item is a
174
            single NULL character.
175
        :return: a `bzrlib.revision.Revision` object.
176
        """
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
177
        parents = []
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
178
        message_lines = []
179
        in_message = False
180
        committer_was_set = False
0.200.29 by David Allouche
Smoke test for GitRepository.get_revision, and corresponding fixes.
181
        revision_id = ids.convert_revision_id_git_to_bzr(raw[0][:-1])
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
182
        rev = revision.Revision(revision_id)
183
        rev.inventory_sha1 = ""
184
        assert raw[-1] == '\x00', (
185
            "Last item of raw was not a single NULL character.")
186
        for line in raw[1:-1]:
187
            if in_message:
188
                assert line[:4] == '    ', (
189
                    "Unexpected line format in commit message: %r" % line)
190
                message_lines.append(line[4:])
191
                continue
192
            if line == '\n':
193
                in_message = True
194
                continue
195
            name, value = line[:-1].split(' ', 1)
196
            if name == 'parent':
197
                rev.parent_ids.append(
198
                    ids.convert_revision_id_git_to_bzr(value))
199
                continue
200
            if name == 'author':
201
                author, timestamp, timezone = value.rsplit(' ', 2)
202
                rev.properties['author'] = author
203
                rev.properties['git-author-timestamp'] = timestamp
204
                rev.properties['git-author-timezone'] = timezone
205
                if not committer_was_set:
206
                    rev.committer = author
207
                    rev.timestamp = float(timestamp)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
208
                    rev.timezone = klass._parse_tz(timezone)
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
209
                continue
210
            if name == 'committer':
211
                committer_was_set = True
212
                committer, timestamp, timezone = value.rsplit(' ', 2)
213
                rev.committer = committer
214
                rev.timestamp = float(timestamp)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
215
                rev.timezone = klass._parse_tz(timezone)
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
216
                continue
217
            if name == 'tree':
218
                rev.properties['git-tree-id'] = value
219
                continue
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
220
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
221
        rev.message = ''.join(message_lines)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
222
223
        # XXX: That should not be needed, but current revision serializers do
224
        # not know how how to handle text that is illegal in xml. Note: when
225
        # this is fixed, we will need to rev up the revision namespace when
226
        # removing the escaping code. -- David Allouche 2007-12-30
227
        rev.message = escape_for_xml(rev.message)
228
        rev.committer = escape_for_xml(rev.committer)
229
        rev.properties['author'] = escape_for_xml(rev.properties['author'])
230
0.200.32 by David Allouche
Rewrite GitRepository._parse_rev, with unit tests.
231
        return rev
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
232
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
233
    @classmethod
234
    def _parse_tz(klass, tz):
235
        """Parse a timezone specification in the [+|-]HHMM format.
236
237
        :return: the timezone offset in seconds.
238
        """
239
        assert len(tz) == 5
240
        sign = {'+': +1, '-': -1}[tz[0]]
241
        hours = int(tz[1:3])
242
        minutes = int(tz[3:])
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
243
        return sign * 60 * (60 * hours + minutes)
0.200.35 by David Allouche
GitRepository._parse_rev sets Revision.timezone to a float instead of a string.
244
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
245
    def revision_trees(self, revids):
246
        for revid in revids:
247
            yield self.revision_tree(revid)
248
249
    def revision_tree(self, revision_id):
250
        return GitRevisionTree(self, revision_id)
251
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
252
    def _fetch_blob(self, git_id):
253
        lines = self._git.cat_file('blob', git_id)
0.204.5 by James Westby
Lose the debuggin prints.
254
        # print "fetched blob:", git_id
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
255
        if self._building_inventory is not None:
256
            self._building_inventory.git_file_data[git_id] = lines
257
        return lines
258
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
259
    def _get_blob(self, git_id):
260
        try:
261
            return self._blob_cache[git_id]
262
        except KeyError:
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
263
            return self._fetch_blob(git_id)
264
265
    def _get_blob_caching(self, git_id):
266
        try:
267
            return self._blob_cache[git_id]
268
        except KeyError:
269
            lines = self._fetch_blob(git_id)
270
            self._blob_cache[git_id] = lines
271
            return lines
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
272
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
273
    def _get_blob_info(self, git_id):
274
        try:
275
            return self._blob_info_cache[git_id]
276
        except KeyError:
277
            lines = self._get_blob(git_id)
278
            size = sum(len(line) for line in lines)
279
            sha1 = osutils.sha_strings(lines)
280
            self._blob_info_cache[git_id] = (size, sha1)
281
            return size, sha1
282
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
283
    def get_inventory(self, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
284
        if revision_id is None:
285
            revision_id = revision.NULL_REVISION
286
        if revision_id == revision.NULL_REVISION:
287
            return inventory.Inventory(
288
                revision_id=revision_id, root_id=None)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
289
290
        # First pass at building the inventory. We need this one to get the
291
        # git ids, so we do not have to cache the entire tree text. Ideally,
292
        # this should be all we need to do.
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
293
        git_commit = ids.convert_revision_id_bzr_to_git(revision_id)
294
        git_inventory = self._git.get_inventory(git_commit)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
295
        # print "fetched inventory:", git_commit
296
        inv = self._parse_inventory(revision_id, git_inventory)
297
298
        # Second pass at building the inventory. There we retrieve additional
299
        # data that bzrlib requires: text sizes, sha1s, symlink targets and
300
        # revisions that introduced inventory entries
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
301
        self._building_inventory = inv
302
        self._building_inventory.git_file_data = {}
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
303
        for file_id in sorted(inv.git_ids.iterkeys()):
304
            git_id = inv.git_ids[file_id]
305
            entry = inv[file_id]
306
            self._set_entry_text_info(inv, entry, git_id)
307
        for file_id in sorted(inv.git_ids.iterkeys()):
308
            git_id = inv.git_ids[file_id]
309
            entry = inv[file_id]
310
            path = inv.id2path(file_id)
311
            self._set_entry_revision(entry, revision_id, path, git_id)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
312
313
        # At this point the entry_revision table is fully populated for this
314
        # revision. So record that we have inventory data for this revision.
315
        self.cachedb.execute(
316
            "insert or ignore into inventory (revid) values (?)",
317
            (revision_id,))
318
        self.cachedb.commit()
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
319
        self._building_inventory = None
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
320
        return inv
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
321
322
    @classmethod
323
    def _parse_inventory(klass, revid, git_inv):
324
        # For now, git inventory do not have root ids. It is not clear that we
325
        # can reliably support root ids. -- David Allouche 2007-12-28
326
        inv = inventory.Inventory(revision_id=revid)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
327
        inv.git_ids = {}
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
328
        for perms, git_kind, git_id, path in git_inv:
329
            text_sha1 = None
330
            executable = False
331
            if git_kind == 'blob':
332
                if perms[1] == '0':
333
                    kind = 'file'
334
                    executable = bool(int(perms[-3:], 8) & 0111)
335
                elif perms[1] == '2':
336
                    kind = 'symlink'
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
337
                else:
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
338
                    raise AssertionError(
339
                        "Unknown blob kind, perms=%r." % (perms,))
340
            elif git_kind == 'tree':
341
                kind = 'directory'
342
            else:
343
                raise AssertionError(
344
                    "Unknown git entry kind: %r" % (git_kind,))
345
            # XXX: Maybe the file id should be prefixed by file kind, so when
346
            # the kind of path changes, the id changes too.
347
            # -- David Allouche 2007-12-28.
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
348
            file_id = escape_file_id(path.encode('utf-8'))
349
            entry = inv.add_path(path, kind, file_id=file_id)
0.200.38 by David Allouche
Reimplement GitRepository.get_inventory, simpler and faster.
350
            entry.executable = executable
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
351
            inv.git_ids[file_id] = git_id
352
        inv.root.revision = revid
353
        return inv
354
355
    def _set_entry_text_info(self, inv, entry, git_id):
356
        if entry.kind == 'directory':
357
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
358
        size, sha1 = self._get_blob_info(git_id)
359
        entry.text_size = size
360
        entry.text_sha1 = sha1
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
361
        if entry.kind == 'symlink':
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
362
            lines = self._get_blob_caching(git_id)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
363
            entry.symlink_target = ''.join(lines)
364
365
    def _get_file_revision(self, revision_id, path):
366
        lines = self._git.rev_list(
367
            [ids.convert_revision_id_bzr_to_git(revision_id)],
368
            max_count=1, topo_order=True, paths=[path])
369
        [line] = lines
370
        result = ids.convert_revision_id_git_to_bzr(line[:-1])
0.204.5 by James Westby
Lose the debuggin prints.
371
        # print "fetched file revision", line[:-1], path
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
372
        return result
373
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
374
    def _get_entry_revision_from_db(self, revid, path, git_id, executable):
375
        result = self.cachedb.execute(
376
            "select revision from entry_revision where"
377
            " inventory=? and path=? and gitid=? and executable=?",
378
            (revid, path, git_id, executable)).fetchone()
379
        if result is None:
380
            return None
381
        [revision] = result
382
        return revision
383
384
    def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):
385
        self.cachedb.execute(
386
            "insert into entry_revision"
387
            " (inventory, path, gitid, executable, revision)"
388
            " values (?, ?, ?, ?, ?)",
389
            (revid, path, git_id, executable, revision))
390
391
    def _all_inventories_in_db(self, revids):
392
        for revid in revids:
393
            result = self.cachedb.execute(
394
                "select count(*) from inventory where revid = ?",
395
                (revid,)).fetchone()
396
            if result is None:
397
                return False
398
        return True
399
0.200.44 by David Allouche
Remove some experimental cruft.
400
    def _set_entry_revision(self, entry, revid, path, git_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
401
        # If a revision is in the cache, we assume it contains entries for the
402
        # whole inventory. So if all parent revisions are in the cache, but no
403
        # parent entry is present, then the entry revision is the current
0.200.44 by David Allouche
Remove some experimental cruft.
404
        # revision. That amortizes the number of _get_file_revision calls for
405
        # large pulls to a "small number".
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
406
        entry_rev = self._get_entry_revision_from_db(
407
            revid, path, git_id, entry.executable)
408
        if entry_rev is not None:
409
            entry.revision = entry_rev
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
410
            return
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
411
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
412
        revision = self.get_revision(revid)
413
        for parent_id in revision.parent_ids:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
414
            entry_rev = self._get_entry_revision_from_db(
415
                parent_id, path, git_id, entry.executable)
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
416
            if entry_rev is not None:
417
                break
418
        else:
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
419
            if self._all_inventories_in_db(revision.parent_ids):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
420
                entry_rev = revid
421
            else:
422
                entry_rev = self._get_file_revision(revid, path)
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
423
        self._set_entry_revision_in_db(
424
            revid, path, git_id, entry.executable, entry_rev)
425
        #self.cachedb.commit()
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
426
        entry.revision = entry_rev
427
428
429
def escape_file_id(file_id):
430
    return file_id.replace('_', '__').replace(' ', '_s')
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
431
0.200.45 by David Allouche
More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.
432
433
def escape_for_xml(message):
434
    """Replace xml-incompatible control characters."""
435
    # Copied from _escape_commit_message from bzr-svn.
436
    # -- David Allouche 2007-12-29.
437
    if message is None:
438
        return None
439
    import re
440
    # FIXME: RBC 20060419 this should be done by the revision
441
    # serialiser not by commit. Then we can also add an unescaper
442
    # in the deserializer and start roundtripping revision messages
443
    # precisely. See repository_implementations/test_repository.py
444
    
445
    # Python strings can include characters that can't be
446
    # represented in well-formed XML; escape characters that
447
    # aren't listed in the XML specification
448
    # (http://www.w3.org/TR/REC-xml/#NT-Char).
449
    message, _ = re.subn(
450
        u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
451
        lambda match: match.group(0).encode('unicode_escape'),
452
        message)
453
    return message
454
455
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
456
class GitRevisionTree(revisiontree.RevisionTree):
0.200.18 by John Arbash Meinel
Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.
457
458
    def __init__(self, repository, revision_id):
0.200.39 by David Allouche
Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.
459
        if revision_id is None:
460
            revision_id = revision.NULL_REVISION
461
        self._inventory = repository.get_inventory(revision_id)
462
        self._repository = repository
463
        self._revision_id = revision_id
0.200.19 by John Arbash Meinel
More refactoring. Add some direct tests for GitModel.
464
465
    def get_file_lines(self, file_id):
0.200.43 by David Allouche
Ultra-experimental support for "bzr pull". No test. No sanity.
466
        entry = self._inventory[file_id]
467
        if entry.kind == 'directory': return []
0.200.46 by David Allouche
Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.
468
        git_id = self._inventory.git_ids[file_id]
469
        if git_id in self._inventory.git_file_data:
470
            return self._inventory.git_file_data[git_id]
471
        return self._repository._get_blob(git_id)
0.203.1 by Aaron Bentley
Make checkouts work
472
473
474
class GitFormat(object):
475
476
    supports_tree_reference = False