bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
1  | 
# Copyright (C) 2007 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
"""An adapter between a Git Repository and a Bazaar Branch"""
 | 
|
18  | 
||
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
19  | 
import os  | 
20  | 
||
21  | 
import bzrlib  | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
22  | 
from bzrlib import (  | 
| 
0.200.20
by John Arbash Meinel
 All tests are passing again  | 
23  | 
deprecated_graph,  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
24  | 
errors,  | 
| 
0.200.38
by David Allouche
 Reimplement GitRepository.get_inventory, simpler and faster.  | 
25  | 
inventory,  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
26  | 
osutils,  | 
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
27  | 
repository,  | 
| 
0.200.29
by David Allouche
 Smoke test for GitRepository.get_revision, and corresponding fixes.  | 
28  | 
revision,  | 
| 
0.200.39
by David Allouche
 Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.  | 
29  | 
revisiontree,  | 
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
30  | 
urlutils,  | 
31  | 
    )
 | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
32  | 
from bzrlib.transport import get_transport  | 
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
33  | 
|
| 
0.200.27
by David Allouche
 Flat is better than nested, remove the gitlib hierarchy.  | 
34  | 
from bzrlib.plugins.git import (  | 
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
35  | 
cache,  | 
| 
0.200.20
by John Arbash Meinel
 All tests are passing again  | 
36  | 
ids,  | 
37  | 
model,  | 
|
38  | 
    )
 | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
39  | 
|
40  | 
||
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
41  | 
cachedbs = {}  | 
42  | 
||
43  | 
||
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
44  | 
class GitRepository(repository.Repository):  | 
45  | 
"""An adapter to git repositories for bzr."""  | 
|
46  | 
||
| 
0.200.41
by David Allouche
 Define _serializer = None in GitRepository.  | 
47  | 
_serializer = None  | 
48  | 
||
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
49  | 
def __init__(self, gitdir, lockfiles):  | 
50  | 
self.bzrdir = gitdir  | 
|
51  | 
self.control_files = lockfiles  | 
|
| 
0.202.2
by David Allouche
 GitRepository.get_inventory and .revision_tree work for the null revision. Support for testing GitRepository without disk data.  | 
52  | 
self._git = self._make_model(gitdir.transport)  | 
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
53  | 
self._revision_cache = {}  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
54  | 
self._blob_cache = {}  | 
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
55  | 
self._blob_info_cache = {}  | 
56  | 
cache_dir = cache.create_cache_dir()  | 
|
57  | 
cachedir_transport = get_transport(cache_dir)  | 
|
58  | 
cache_file = os.path.join(cache_dir, 'cache-%s' % ids.NAMESPACE)  | 
|
59  | 
if not cachedbs.has_key(cache_file):  | 
|
60  | 
cachedbs[cache_file] = cache.sqlite3.connect(cache_file)  | 
|
61  | 
self.cachedb = cachedbs[cache_file]  | 
|
62  | 
self._init_cachedb()  | 
|
| 
0.203.1
by Aaron Bentley
 Make checkouts work  | 
63  | 
self._format = GitFormat()  | 
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
64  | 
|
65  | 
def _init_cachedb(self):  | 
|
66  | 
self.cachedb.executescript("""  | 
|
67  | 
        create table if not exists inventory (
 | 
|
68  | 
            revid blob);
 | 
|
69  | 
        create unique index if not exists inventory_revid
 | 
|
70  | 
            on inventory (revid);
 | 
|
71  | 
        create table if not exists entry_revision (
 | 
|
72  | 
            inventory blob,
 | 
|
73  | 
            path blob,
 | 
|
74  | 
            gitid blob,
 | 
|
75  | 
            executable integer,
 | 
|
76  | 
            revision blob);
 | 
|
77  | 
        create unique index if not exists entry_revision_revid_path
 | 
|
78  | 
            on entry_revision (inventory, path);
 | 
|
79  | 
""")  | 
|
80  | 
self.cachedb.commit()  | 
|
81  | 
||
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
82  | 
|
| 
0.202.2
by David Allouche
 GitRepository.get_inventory and .revision_tree work for the null revision. Support for testing GitRepository without disk data.  | 
83  | 
    @classmethod
 | 
84  | 
def _make_model(klass, transport):  | 
|
85  | 
gitdirectory = transport.local_abspath('.')  | 
|
86  | 
return model.GitModel(gitdirectory)  | 
|
87  | 
||
88  | 
||
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
89  | 
def _ancestor_revisions(self, revision_ids):  | 
90  | 
if revision_ids is not None:  | 
|
91  | 
git_revisions = [gitrevid_from_bzr(r) for r in revision_ids]  | 
|
92  | 
else:  | 
|
93  | 
git_revisions = None  | 
|
| 
0.200.19
by John Arbash Meinel
 More refactoring. Add some direct tests for GitModel.  | 
94  | 
for lines in self._git.ancestor_lines(git_revisions):  | 
| 
0.200.30
by David Allouche
 Rename GitRepository.parse_rev to ._parse_rev.  | 
95  | 
yield self._parse_rev(lines)  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
96  | 
        # print "fetched ancestors:", git_revisions
 | 
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
97  | 
|
98  | 
def is_shared(self):  | 
|
99  | 
return True  | 
|
100  | 
||
| 
0.200.40
by David Allouche
 GitRepository.supports_rich_root() => False  | 
101  | 
def supports_rich_root(self):  | 
102  | 
return False  | 
|
103  | 
||
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
104  | 
def get_revision_graph(self, revision_id=None):  | 
| 
0.200.20
by John Arbash Meinel
 All tests are passing again  | 
105  | 
result = {}  | 
| 
0.200.21
by John Arbash Meinel
 Fix Repository.get_revision_graph()  | 
106  | 
if revision_id is not None:  | 
107  | 
param = [ids.convert_revision_id_bzr_to_git(revision_id)]  | 
|
108  | 
else:  | 
|
109  | 
param = None  | 
|
| 
0.200.42
by David Allouche
 Rename GitModel.ancestry to .get_revision_graph.  | 
110  | 
git_graph = self._git.get_revision_graph(param)  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
111  | 
        # print "fetched revision graph:", param
 | 
| 
0.200.42
by David Allouche
 Rename GitModel.ancestry to .get_revision_graph.  | 
112  | 
for node, parents in git_graph.iteritems():  | 
| 
0.200.20
by John Arbash Meinel
 All tests are passing again  | 
113  | 
bzr_node = ids.convert_revision_id_git_to_bzr(node)  | 
114  | 
bzr_parents = [ids.convert_revision_id_git_to_bzr(n)  | 
|
115  | 
for n in parents]  | 
|
116  | 
result[bzr_node] = bzr_parents  | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
117  | 
return result  | 
118  | 
||
| 
0.200.21
by John Arbash Meinel
 Fix Repository.get_revision_graph()  | 
119  | 
def get_revision_graph_with_ghosts(self, revision_ids=None):  | 
120  | 
graph = deprecated_graph.Graph()  | 
|
121  | 
if revision_ids is not None:  | 
|
122  | 
revision_ids = [ids.convert_revision_id_bzr_to_git(r)  | 
|
123  | 
for r in revision_ids]  | 
|
| 
0.200.42
by David Allouche
 Rename GitModel.ancestry to .get_revision_graph.  | 
124  | 
git_graph = self._git.get_revision_graph(revision_ids)  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
125  | 
        # print "fetched revision graph (ghosts):", revision_ids
 | 
| 
0.200.42
by David Allouche
 Rename GitModel.ancestry to .get_revision_graph.  | 
126  | 
for node, parents in git_graph.iteritems():  | 
| 
0.200.21
by John Arbash Meinel
 Fix Repository.get_revision_graph()  | 
127  | 
bzr_node = ids.convert_revision_id_git_to_bzr(node)  | 
128  | 
bzr_parents = [ids.convert_revision_id_git_to_bzr(n)  | 
|
129  | 
for n in parents]  | 
|
130  | 
||
131  | 
graph.add_node(bzr_node, bzr_parents)  | 
|
132  | 
return graph  | 
|
133  | 
||
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
134  | 
def get_ancestry(self, revision_id):  | 
135  | 
param = [ids.convert_revision_id_bzr_to_git(revision_id)]  | 
|
136  | 
git_ancestry = self._git.get_ancestry(param)  | 
|
137  | 
        # print "fetched ancestry:", param
 | 
|
138  | 
return [None] + [  | 
|
139  | 
ids.convert_revision_id_git_to_bzr(git_id)  | 
|
140  | 
for git_id in git_ancestry]  | 
|
141  | 
||
142  | 
def get_signature_text(self, revision_id):  | 
|
143  | 
raise errors.NoSuchRevision(self, revision_id)  | 
|
144  | 
||
145  | 
||
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
146  | 
def get_revision(self, revision_id):  | 
147  | 
if revision_id in self._revision_cache:  | 
|
148  | 
return self._revision_cache[revision_id]  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
149  | 
git_commit_id = ids.convert_revision_id_bzr_to_git(revision_id)  | 
150  | 
raw = self._git.rev_list([git_commit_id], max_count=1, header=True)  | 
|
| 
0.204.5
by James Westby
 Lose the debuggin prints.  | 
151  | 
        # print "fetched revision:", git_commit_id
 | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
152  | 
revision = self._parse_rev(raw)  | 
153  | 
self._revision_cache[revision_id] = revision  | 
|
154  | 
return revision  | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
155  | 
|
156  | 
def has_revision(self, revision_id):  | 
|
157  | 
try:  | 
|
158  | 
self.get_revision(revision_id)  | 
|
159  | 
except NoSuchRevision:  | 
|
160  | 
return False  | 
|
161  | 
else:  | 
|
162  | 
return True  | 
|
163  | 
||
164  | 
def get_revisions(self, revisions):  | 
|
165  | 
return [self.get_revision(r) for r in revisions]  | 
|
166  | 
||
| 
0.200.32
by David Allouche
 Rewrite GitRepository._parse_rev, with unit tests.  | 
167  | 
    @classmethod
 | 
168  | 
def _parse_rev(klass, raw):  | 
|
169  | 
"""Parse a single git revision.  | 
|
170  | 
||
171  | 
        * The first line is the git commit id.
 | 
|
172  | 
        * Following lines conform to the 'name value' structure, until the
 | 
|
173  | 
          first blank line.
 | 
|
174  | 
        * All lines after the first blank line and until the NULL line have 4
 | 
|
175  | 
          leading spaces and constitute the commit message.
 | 
|
176  | 
||
177  | 
        :param raw: sequence of newline-terminated strings, its last item is a
 | 
|
178  | 
            single NULL character.
 | 
|
179  | 
        :return: a `bzrlib.revision.Revision` object.
 | 
|
180  | 
        """
 | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
181  | 
parents = []  | 
| 
0.200.32
by David Allouche
 Rewrite GitRepository._parse_rev, with unit tests.  | 
182  | 
message_lines = []  | 
183  | 
in_message = False  | 
|
184  | 
committer_was_set = False  | 
|
| 
0.200.29
by David Allouche
 Smoke test for GitRepository.get_revision, and corresponding fixes.  | 
185  | 
revision_id = ids.convert_revision_id_git_to_bzr(raw[0][:-1])  | 
| 
0.200.32
by David Allouche
 Rewrite GitRepository._parse_rev, with unit tests.  | 
186  | 
rev = revision.Revision(revision_id)  | 
187  | 
rev.inventory_sha1 = ""  | 
|
188  | 
assert raw[-1] == '\x00', (  | 
|
189  | 
"Last item of raw was not a single NULL character.")  | 
|
190  | 
for line in raw[1:-1]:  | 
|
191  | 
if in_message:  | 
|
192  | 
assert line[:4] == ' ', (  | 
|
193  | 
"Unexpected line format in commit message: %r" % line)  | 
|
194  | 
message_lines.append(line[4:])  | 
|
195  | 
                continue
 | 
|
196  | 
if line == '\n':  | 
|
197  | 
in_message = True  | 
|
198  | 
                continue
 | 
|
199  | 
name, value = line[:-1].split(' ', 1)  | 
|
200  | 
if name == 'parent':  | 
|
201  | 
rev.parent_ids.append(  | 
|
202  | 
ids.convert_revision_id_git_to_bzr(value))  | 
|
203  | 
                continue
 | 
|
204  | 
if name == 'author':  | 
|
205  | 
author, timestamp, timezone = value.rsplit(' ', 2)  | 
|
206  | 
rev.properties['author'] = author  | 
|
207  | 
rev.properties['git-author-timestamp'] = timestamp  | 
|
208  | 
rev.properties['git-author-timezone'] = timezone  | 
|
209  | 
if not committer_was_set:  | 
|
210  | 
rev.committer = author  | 
|
211  | 
rev.timestamp = float(timestamp)  | 
|
| 
0.200.35
by David Allouche
 GitRepository._parse_rev sets Revision.timezone to a float instead of a string.  | 
212  | 
rev.timezone = klass._parse_tz(timezone)  | 
| 
0.200.32
by David Allouche
 Rewrite GitRepository._parse_rev, with unit tests.  | 
213  | 
                continue
 | 
214  | 
if name == 'committer':  | 
|
215  | 
committer_was_set = True  | 
|
216  | 
committer, timestamp, timezone = value.rsplit(' ', 2)  | 
|
217  | 
rev.committer = committer  | 
|
218  | 
rev.timestamp = float(timestamp)  | 
|
| 
0.200.35
by David Allouche
 GitRepository._parse_rev sets Revision.timezone to a float instead of a string.  | 
219  | 
rev.timezone = klass._parse_tz(timezone)  | 
| 
0.200.32
by David Allouche
 Rewrite GitRepository._parse_rev, with unit tests.  | 
220  | 
                continue
 | 
221  | 
if name == 'tree':  | 
|
222  | 
rev.properties['git-tree-id'] = value  | 
|
223  | 
                continue
 | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
224  | 
|
| 
0.200.32
by David Allouche
 Rewrite GitRepository._parse_rev, with unit tests.  | 
225  | 
rev.message = ''.join(message_lines)  | 
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
226  | 
|
227  | 
        # XXX: That should not be needed, but current revision serializers do
 | 
|
228  | 
        # not know how how to handle text that is illegal in xml. Note: when
 | 
|
229  | 
        # this is fixed, we will need to rev up the revision namespace when
 | 
|
230  | 
        # removing the escaping code. -- David Allouche 2007-12-30
 | 
|
231  | 
rev.message = escape_for_xml(rev.message)  | 
|
232  | 
rev.committer = escape_for_xml(rev.committer)  | 
|
233  | 
rev.properties['author'] = escape_for_xml(rev.properties['author'])  | 
|
234  | 
||
| 
0.200.32
by David Allouche
 Rewrite GitRepository._parse_rev, with unit tests.  | 
235  | 
return rev  | 
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
236  | 
|
| 
0.200.35
by David Allouche
 GitRepository._parse_rev sets Revision.timezone to a float instead of a string.  | 
237  | 
    @classmethod
 | 
238  | 
def _parse_tz(klass, tz):  | 
|
239  | 
"""Parse a timezone specification in the [+|-]HHMM format.  | 
|
240  | 
||
241  | 
        :return: the timezone offset in seconds.
 | 
|
242  | 
        """
 | 
|
243  | 
assert len(tz) == 5  | 
|
244  | 
sign = {'+': +1, '-': -1}[tz[0]]  | 
|
245  | 
hours = int(tz[1:3])  | 
|
246  | 
minutes = int(tz[3:])  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
247  | 
return sign * 60 * (60 * hours + minutes)  | 
| 
0.200.35
by David Allouche
 GitRepository._parse_rev sets Revision.timezone to a float instead of a string.  | 
248  | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
249  | 
def revision_trees(self, revids):  | 
250  | 
for revid in revids:  | 
|
251  | 
yield self.revision_tree(revid)  | 
|
252  | 
||
253  | 
def revision_tree(self, revision_id):  | 
|
254  | 
return GitRevisionTree(self, revision_id)  | 
|
255  | 
||
| 
0.200.46
by David Allouche
 Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.  | 
256  | 
def _fetch_blob(self, git_id):  | 
257  | 
lines = self._git.cat_file('blob', git_id)  | 
|
| 
0.204.5
by James Westby
 Lose the debuggin prints.  | 
258  | 
        # print "fetched blob:", git_id
 | 
| 
0.200.46
by David Allouche
 Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.  | 
259  | 
if self._building_inventory is not None:  | 
260  | 
self._building_inventory.git_file_data[git_id] = lines  | 
|
261  | 
return lines  | 
|
262  | 
||
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
263  | 
def _get_blob(self, git_id):  | 
264  | 
try:  | 
|
265  | 
return self._blob_cache[git_id]  | 
|
266  | 
except KeyError:  | 
|
| 
0.200.46
by David Allouche
 Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.  | 
267  | 
return self._fetch_blob(git_id)  | 
268  | 
||
269  | 
def _get_blob_caching(self, git_id):  | 
|
270  | 
try:  | 
|
271  | 
return self._blob_cache[git_id]  | 
|
272  | 
except KeyError:  | 
|
273  | 
lines = self._fetch_blob(git_id)  | 
|
274  | 
self._blob_cache[git_id] = lines  | 
|
275  | 
return lines  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
276  | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
277  | 
def _get_blob_info(self, git_id):  | 
278  | 
try:  | 
|
279  | 
return self._blob_info_cache[git_id]  | 
|
280  | 
except KeyError:  | 
|
281  | 
lines = self._get_blob(git_id)  | 
|
282  | 
size = sum(len(line) for line in lines)  | 
|
283  | 
sha1 = osutils.sha_strings(lines)  | 
|
284  | 
self._blob_info_cache[git_id] = (size, sha1)  | 
|
285  | 
return size, sha1  | 
|
286  | 
||
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
287  | 
def get_inventory(self, revision_id):  | 
| 
0.200.39
by David Allouche
 Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.  | 
288  | 
if revision_id is None:  | 
289  | 
revision_id = revision.NULL_REVISION  | 
|
290  | 
if revision_id == revision.NULL_REVISION:  | 
|
291  | 
return inventory.Inventory(  | 
|
292  | 
revision_id=revision_id, root_id=None)  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
293  | 
|
294  | 
        # First pass at building the inventory. We need this one to get the
 | 
|
295  | 
        # git ids, so we do not have to cache the entire tree text. Ideally,
 | 
|
296  | 
        # this should be all we need to do.
 | 
|
| 
0.200.38
by David Allouche
 Reimplement GitRepository.get_inventory, simpler and faster.  | 
297  | 
git_commit = ids.convert_revision_id_bzr_to_git(revision_id)  | 
298  | 
git_inventory = self._git.get_inventory(git_commit)  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
299  | 
        # print "fetched inventory:", git_commit
 | 
300  | 
inv = self._parse_inventory(revision_id, git_inventory)  | 
|
301  | 
||
302  | 
        # Second pass at building the inventory. There we retrieve additional
 | 
|
303  | 
        # data that bzrlib requires: text sizes, sha1s, symlink targets and
 | 
|
304  | 
        # revisions that introduced inventory entries
 | 
|
| 
0.200.46
by David Allouche
 Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.  | 
305  | 
self._building_inventory = inv  | 
306  | 
self._building_inventory.git_file_data = {}  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
307  | 
for file_id in sorted(inv.git_ids.iterkeys()):  | 
308  | 
git_id = inv.git_ids[file_id]  | 
|
309  | 
entry = inv[file_id]  | 
|
310  | 
self._set_entry_text_info(inv, entry, git_id)  | 
|
311  | 
for file_id in sorted(inv.git_ids.iterkeys()):  | 
|
312  | 
git_id = inv.git_ids[file_id]  | 
|
313  | 
entry = inv[file_id]  | 
|
314  | 
path = inv.id2path(file_id)  | 
|
315  | 
self._set_entry_revision(entry, revision_id, path, git_id)  | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
316  | 
|
317  | 
        # At this point the entry_revision table is fully populated for this
 | 
|
318  | 
        # revision. So record that we have inventory data for this revision.
 | 
|
319  | 
self.cachedb.execute(  | 
|
320  | 
"insert or ignore into inventory (revid) values (?)",  | 
|
321  | 
(revision_id,))  | 
|
322  | 
self.cachedb.commit()  | 
|
| 
0.200.46
by David Allouche
 Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.  | 
323  | 
self._building_inventory = None  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
324  | 
return inv  | 
| 
0.200.38
by David Allouche
 Reimplement GitRepository.get_inventory, simpler and faster.  | 
325  | 
|
326  | 
    @classmethod
 | 
|
327  | 
def _parse_inventory(klass, revid, git_inv):  | 
|
328  | 
        # For now, git inventory do not have root ids. It is not clear that we
 | 
|
329  | 
        # can reliably support root ids. -- David Allouche 2007-12-28
 | 
|
330  | 
inv = inventory.Inventory(revision_id=revid)  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
331  | 
inv.git_ids = {}  | 
| 
0.200.38
by David Allouche
 Reimplement GitRepository.get_inventory, simpler and faster.  | 
332  | 
for perms, git_kind, git_id, path in git_inv:  | 
333  | 
text_sha1 = None  | 
|
334  | 
executable = False  | 
|
335  | 
if git_kind == 'blob':  | 
|
336  | 
if perms[1] == '0':  | 
|
337  | 
kind = 'file'  | 
|
338  | 
executable = bool(int(perms[-3:], 8) & 0111)  | 
|
339  | 
elif perms[1] == '2':  | 
|
340  | 
kind = 'symlink'  | 
|
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
341  | 
else:  | 
| 
0.200.38
by David Allouche
 Reimplement GitRepository.get_inventory, simpler and faster.  | 
342  | 
raise AssertionError(  | 
343  | 
"Unknown blob kind, perms=%r." % (perms,))  | 
|
344  | 
elif git_kind == 'tree':  | 
|
345  | 
kind = 'directory'  | 
|
346  | 
else:  | 
|
347  | 
raise AssertionError(  | 
|
348  | 
"Unknown git entry kind: %r" % (git_kind,))  | 
|
349  | 
            # XXX: Maybe the file id should be prefixed by file kind, so when
 | 
|
350  | 
            # the kind of path changes, the id changes too.
 | 
|
351  | 
            # -- David Allouche 2007-12-28.
 | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
352  | 
file_id = escape_file_id(path.encode('utf-8'))  | 
353  | 
entry = inv.add_path(path, kind, file_id=file_id)  | 
|
| 
0.200.38
by David Allouche
 Reimplement GitRepository.get_inventory, simpler and faster.  | 
354  | 
entry.executable = executable  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
355  | 
inv.git_ids[file_id] = git_id  | 
356  | 
inv.root.revision = revid  | 
|
357  | 
return inv  | 
|
358  | 
||
359  | 
def _set_entry_text_info(self, inv, entry, git_id):  | 
|
360  | 
if entry.kind == 'directory':  | 
|
361  | 
            return
 | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
362  | 
size, sha1 = self._get_blob_info(git_id)  | 
363  | 
entry.text_size = size  | 
|
364  | 
entry.text_sha1 = sha1  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
365  | 
if entry.kind == 'symlink':  | 
| 
0.200.46
by David Allouche
 Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.  | 
366  | 
lines = self._get_blob_caching(git_id)  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
367  | 
entry.symlink_target = ''.join(lines)  | 
368  | 
||
369  | 
def _get_file_revision(self, revision_id, path):  | 
|
370  | 
lines = self._git.rev_list(  | 
|
371  | 
[ids.convert_revision_id_bzr_to_git(revision_id)],  | 
|
372  | 
max_count=1, topo_order=True, paths=[path])  | 
|
373  | 
[line] = lines  | 
|
374  | 
result = ids.convert_revision_id_git_to_bzr(line[:-1])  | 
|
| 
0.204.5
by James Westby
 Lose the debuggin prints.  | 
375  | 
        # print "fetched file revision", line[:-1], path
 | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
376  | 
return result  | 
377  | 
||
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
378  | 
def _get_entry_revision_from_db(self, revid, path, git_id, executable):  | 
379  | 
result = self.cachedb.execute(  | 
|
380  | 
            "select revision from entry_revision where"
 | 
|
381  | 
" inventory=? and path=? and gitid=? and executable=?",  | 
|
382  | 
(revid, path, git_id, executable)).fetchone()  | 
|
383  | 
if result is None:  | 
|
384  | 
return None  | 
|
385  | 
[revision] = result  | 
|
386  | 
return revision  | 
|
387  | 
||
388  | 
def _set_entry_revision_in_db(self, revid, path, git_id, executable, revision):  | 
|
389  | 
self.cachedb.execute(  | 
|
390  | 
            "insert into entry_revision"
 | 
|
391  | 
            " (inventory, path, gitid, executable, revision)"
 | 
|
392  | 
" values (?, ?, ?, ?, ?)",  | 
|
393  | 
(revid, path, git_id, executable, revision))  | 
|
394  | 
||
395  | 
def _all_inventories_in_db(self, revids):  | 
|
396  | 
for revid in revids:  | 
|
397  | 
result = self.cachedb.execute(  | 
|
398  | 
"select count(*) from inventory where revid = ?",  | 
|
399  | 
(revid,)).fetchone()  | 
|
400  | 
if result is None:  | 
|
401  | 
return False  | 
|
402  | 
return True  | 
|
403  | 
||
| 
0.200.44
by David Allouche
 Remove some experimental cruft.  | 
404  | 
def _set_entry_revision(self, entry, revid, path, git_id):  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
405  | 
        # If a revision is in the cache, we assume it contains entries for the
 | 
406  | 
        # whole inventory. So if all parent revisions are in the cache, but no
 | 
|
407  | 
        # parent entry is present, then the entry revision is the current
 | 
|
| 
0.200.44
by David Allouche
 Remove some experimental cruft.  | 
408  | 
        # revision. That amortizes the number of _get_file_revision calls for
 | 
409  | 
        # large pulls to a "small number".
 | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
410  | 
entry_rev = self._get_entry_revision_from_db(  | 
411  | 
revid, path, git_id, entry.executable)  | 
|
412  | 
if entry_rev is not None:  | 
|
413  | 
entry.revision = entry_rev  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
414  | 
            return
 | 
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
415  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
416  | 
revision = self.get_revision(revid)  | 
417  | 
for parent_id in revision.parent_ids:  | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
418  | 
entry_rev = self._get_entry_revision_from_db(  | 
419  | 
parent_id, path, git_id, entry.executable)  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
420  | 
if entry_rev is not None:  | 
421  | 
                break
 | 
|
422  | 
else:  | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
423  | 
if self._all_inventories_in_db(revision.parent_ids):  | 
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
424  | 
entry_rev = revid  | 
425  | 
else:  | 
|
426  | 
entry_rev = self._get_file_revision(revid, path)  | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
427  | 
self._set_entry_revision_in_db(  | 
428  | 
revid, path, git_id, entry.executable, entry_rev)  | 
|
429  | 
        #self.cachedb.commit()
 | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
430  | 
entry.revision = entry_rev  | 
431  | 
||
432  | 
||
433  | 
def escape_file_id(file_id):  | 
|
434  | 
return file_id.replace('_', '__').replace(' ', '_s')  | 
|
| 
0.200.39
by David Allouche
 Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.  | 
435  | 
|
| 
0.200.45
by David Allouche
 More performance hacking, introduce sqlite cache, escape characters in commits that break serializers.  | 
436  | 
|
437  | 
def escape_for_xml(message):  | 
|
438  | 
"""Replace xml-incompatible control characters."""  | 
|
439  | 
    # Copied from _escape_commit_message from bzr-svn.
 | 
|
440  | 
    # -- David Allouche 2007-12-29.
 | 
|
441  | 
if message is None:  | 
|
442  | 
return None  | 
|
443  | 
import re  | 
|
444  | 
    # FIXME: RBC 20060419 this should be done by the revision
 | 
|
445  | 
    # serialiser not by commit. Then we can also add an unescaper
 | 
|
446  | 
    # in the deserializer and start roundtripping revision messages
 | 
|
447  | 
    # precisely. See repository_implementations/test_repository.py
 | 
|
448  | 
||
449  | 
    # Python strings can include characters that can't be
 | 
|
450  | 
    # represented in well-formed XML; escape characters that
 | 
|
451  | 
    # aren't listed in the XML specification
 | 
|
452  | 
    # (http://www.w3.org/TR/REC-xml/#NT-Char).
 | 
|
453  | 
message, _ = re.subn(  | 
|
454  | 
u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',  | 
|
455  | 
lambda match: match.group(0).encode('unicode_escape'),  | 
|
456  | 
message)  | 
|
457  | 
return message  | 
|
458  | 
||
459  | 
||
| 
0.200.39
by David Allouche
 Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.  | 
460  | 
class GitRevisionTree(revisiontree.RevisionTree):  | 
| 
0.200.18
by John Arbash Meinel
 Start splitting up the Git{Branch,Dir,Repository} into separate modules, etc.  | 
461  | 
|
462  | 
def __init__(self, repository, revision_id):  | 
|
| 
0.200.39
by David Allouche
 Black-box text for "bzr log" in a git tree. Further simplification of GitRevisionTree.  | 
463  | 
if revision_id is None:  | 
464  | 
revision_id = revision.NULL_REVISION  | 
|
465  | 
self._inventory = repository.get_inventory(revision_id)  | 
|
466  | 
self._repository = repository  | 
|
467  | 
self._revision_id = revision_id  | 
|
| 
0.200.19
by John Arbash Meinel
 More refactoring. Add some direct tests for GitModel.  | 
468  | 
|
469  | 
def get_file_lines(self, file_id):  | 
|
| 
0.200.43
by David Allouche
 Ultra-experimental support for "bzr pull". No test. No sanity.  | 
470  | 
entry = self._inventory[file_id]  | 
471  | 
if entry.kind == 'directory': return []  | 
|
| 
0.200.46
by David Allouche
 Only keep symlink blobs in memory. Add some clever inventory-based caching so modified files need not be read twice when pulling.  | 
472  | 
git_id = self._inventory.git_ids[file_id]  | 
473  | 
if git_id in self._inventory.git_file_data:  | 
|
474  | 
return self._inventory.git_file_data[git_id]  | 
|
475  | 
return self._repository._get_blob(git_id)  | 
|
| 
0.203.1
by Aaron Bentley
 Make checkouts work  | 
476  | 
|
477  | 
||
478  | 
class GitFormat(object):  | 
|
479  | 
||
480  | 
supports_tree_reference = False  |