/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.228 by Jelmer Vernooij
Split out map.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
19
from dulwich.objects import (
20
    Blob,
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
21
    sha_to_hex,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
22
    )
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
23
from dulwich.object_store import (
0.200.457 by Jelmer Vernooij
Use BaseObjectStore.
24
    BaseObjectStore,
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
25
    )
0.200.249 by Jelmer Vernooij
Implement Tree.
26
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
27
from bzrlib import (
0.200.440 by Jelmer Vernooij
Remove silly mapping of timezones; dulwich uses offsets now as well.
28
    debug,
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
29
    errors,
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
30
    trace,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
31
    ui,
32
    )
0.200.541 by Jelmer Vernooij
Cope with NULL_REVISION.
33
from bzrlib.revision import (
34
    NULL_REVISION,
35
    )
0.200.228 by Jelmer Vernooij
Split out map.
36
0.200.229 by Jelmer Vernooij
More work on converter.
37
from bzrlib.plugins.git.mapping import (
0.200.463 by Jelmer Vernooij
Support remote dpush (except for references).
38
    default_mapping,
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
39
    directory_to_tree,
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
40
    extract_unusual_modes,
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
41
    mapping_registry,
0.200.229 by Jelmer Vernooij
More work on converter.
42
    )
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
43
from bzrlib.plugins.git.shamap import (
44
    SqliteGitShaMap,
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
45
    TdbGitShaMap,
0.200.231 by Jelmer Vernooij
Partially fix pull.
46
    )
47
0.200.228 by Jelmer Vernooij
Split out map.
48
0.200.452 by Jelmer Vernooij
Rename converter -> object_store, provide utility function for getting ObjectStore's.
49
def get_object_store(repo, mapping=None):
50
    git = getattr(repo, "_git", None)
51
    if git is not None:
52
        return git.object_store
53
    return BazaarObjectStore(repo, mapping)
54
55
0.200.457 by Jelmer Vernooij
Use BaseObjectStore.
56
class BazaarObjectStore(BaseObjectStore):
0.200.320 by Jelmer Vernooij
Handle lightweight checkouts.
57
    """A Git-style object store backed onto a Bazaar repository."""
0.200.228 by Jelmer Vernooij
Split out map.
58
59
    def __init__(self, repository, mapping=None):
60
        self.repository = repository
61
        if mapping is None:
0.200.463 by Jelmer Vernooij
Support remote dpush (except for references).
62
            self.mapping = default_mapping
0.200.228 by Jelmer Vernooij
Split out map.
63
        else:
64
            self.mapping = mapping
0.200.476 by Jelmer Vernooij
Fix Tdb backend, use tdb if possible by default.
65
        try:
66
            self._idmap = TdbGitShaMap.from_repository(repository)
67
        except ImportError:
68
            self._idmap = SqliteGitShaMap.from_repository(repository)
0.200.228 by Jelmer Vernooij
Split out map.
69
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
70
    def _update_sha_map(self, stop_revision=None):
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
71
        graph = self.repository.get_graph()
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
72
        if stop_revision is None:
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
73
            heads = graph.heads(self.repository.all_revision_ids())
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
74
        else:
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
75
            heads = set([stop_revision])
0.200.689 by Jelmer Vernooij
Also consider todo heads.
76
        missing_revids = self._idmap.missing_revisions(heads)
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
77
        while heads:
78
            parents = graph.get_parent_map(heads)
79
            todo = set()
80
            for p in parents.values():
0.200.684 by Jelmer Vernooij
Properly close write groups.
81
                todo.update([x for x in p if x not in missing_revids])
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
82
            heads = self._idmap.missing_revisions(todo)
0.200.684 by Jelmer Vernooij
Properly close write groups.
83
            missing_revids.update(heads)
0.200.694 by Jelmer Vernooij
Avoid processing NULL_REVISION.
84
        if NULL_REVISION in missing_revids:
85
            missing_revids.remove(NULL_REVISION)
0.200.686 by Jelmer Vernooij
Remove unused (so far) idmap write group code.
86
        pb = ui.ui_factory.nested_progress_bar()
0.200.231 by Jelmer Vernooij
Partially fix pull.
87
        try:
0.200.686 by Jelmer Vernooij
Remove unused (so far) idmap write group code.
88
            for i, revid in enumerate(graph.iter_topo_order(missing_revids)):
89
                pb.update("updating git map", i, len(missing_revids))
90
                self._update_sha_map_revision(revid)
91
        finally:
92
            pb.finished()
93
        self._idmap.commit_write_group()
0.200.229 by Jelmer Vernooij
More work on converter.
94
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
95
    def __iter__(self):
96
        self._update_sha_map()
97
        return iter(self._idmap.sha1s())
98
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
99
    def _revision_to_commit(self, rev, tree_sha):
100
        def parent_lookup(revid):
101
            try:
102
                return self._lookup_revision_sha1(revid)
103
            except errors.NoSuchRevision:
104
                trace.warning("Ignoring ghost parent %s", revid)
105
                return None
0.242.1 by Jelmer Vernooij
Add support for parsing hg-git metadata in the experimental mappings.
106
        return self.mapping.export_commit(rev, tree_sha, parent_lookup)
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
107
0.200.229 by Jelmer Vernooij
More work on converter.
108
    def _update_sha_map_revision(self, revid):
109
        inv = self.repository.get_inventory(revid)
110
        rev = self.repository.get_revision(revid)
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
111
        unusual_modes = extract_unusual_modes(rev)
112
        tree_sha = self._get_ie_sha1(inv.root, inv, unusual_modes)
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
113
        commit_obj = self._revision_to_commit(rev, tree_sha)
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
114
        try:
115
            foreign_revid, mapping = mapping_registry.parse_revision_id(revid)
116
        except errors.InvalidRevisionId:
117
            pass
118
        else:
119
            if foreign_revid != commit_obj.id:
0.200.440 by Jelmer Vernooij
Remove silly mapping of timezones; dulwich uses offsets now as well.
120
                if not "fix-shamap" in debug.debug_flags:
121
                    raise AssertionError("recreated git commit had different sha1: expected %s, got %s" % (foreign_revid, commit_obj.id))
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
122
        self._idmap.add_entry(commit_obj.id, "commit", (revid, tree_sha))
0.200.229 by Jelmer Vernooij
More work on converter.
123
0.200.353 by Jelmer Vernooij
fileids/revids are plain strings, not unicode
124
    def _check_expected_sha(self, expected_sha, object):
125
        if expected_sha is None:
126
            return
0.200.565 by Jelmer Vernooij
Cope with 'binary' shas.
127
        if len(expected_sha) == 40:
128
            if expected_sha != object.sha().hexdigest():
129
                raise AssertionError("Invalid sha for %r: %s" % (object, expected_sha))
130
        elif len(expected_sha) == 20:
131
            if expected_sha != object.sha().digest():
132
                raise AssertionError("Invalid sha for %r: %s" % (object, sha_to_hex(expected_sha)))
133
        else:
134
            raise AssertionError("Unknown length %d for %r" % (len(expected_sha), expected_sha))
0.200.353 by Jelmer Vernooij
fileids/revids are plain strings, not unicode
135
0.200.664 by Jelmer Vernooij
Support submodules during fetch.
136
    def _get_ie_object(self, entry, inv, unusual_modes):
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
137
        if entry.kind == "directory":
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
138
            return self._get_tree(entry.file_id, inv.revision_id, inv, unusual_modes)
0.200.671 by Jelmer Vernooij
Fix symlink use.
139
        elif entry.kind in ("file", "symlink"):
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
140
            return self._get_blob(entry.file_id, entry.revision)
0.200.664 by Jelmer Vernooij
Support submodules during fetch.
141
        else:
142
            raise AssertionError("unknown entry kind '%s'" % entry.kind)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
143
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
144
    def _get_ie_object_or_sha1(self, entry, inv, unusual_modes):
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
145
        if entry.kind == "directory":
146
            try:
147
                return self._idmap.lookup_tree(entry.file_id, inv.revision_id), None
148
            except KeyError:
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
149
                ret = self._get_ie_object(entry, inv, unusual_modes)
0.200.593 by Jelmer Vernooij
Avoid writing empty trees.
150
                if ret is None:
151
                    hexsha = None
152
                else:
153
                    hexsha = ret.id
154
                self._idmap.add_entry(hexsha, "tree", (entry.file_id, inv.revision_id))
155
                return hexsha, ret
0.200.670 by Jelmer Vernooij
Fix symlinks.
156
        elif entry.kind in ("file", "symlink"):
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
157
            try:
158
                return self._idmap.lookup_blob(entry.file_id, entry.revision), None
159
            except KeyError:
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
160
                ret = self._get_ie_object(entry, inv, unusual_modes)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
161
                self._idmap.add_entry(ret.id, "blob", (entry.file_id, entry.revision))
162
                return ret.id, ret
0.200.664 by Jelmer Vernooij
Support submodules during fetch.
163
        else:
164
            raise AssertionError("unknown entry kind '%s'" % entry.kind)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
165
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
166
    def _get_ie_sha1(self, entry, inv, unusual_modes):
167
        return self._get_ie_object_or_sha1(entry, inv, unusual_modes)[0]
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
168
0.200.353 by Jelmer Vernooij
fileids/revids are plain strings, not unicode
169
    def _get_blob(self, fileid, revision, expected_sha=None):
0.200.236 by Jelmer Vernooij
require bzr 1.13.
170
        """Return a Git Blob object from a fileid and revision stored in bzr.
0.200.670 by Jelmer Vernooij
Fix symlinks.
171
0.200.236 by Jelmer Vernooij
require bzr 1.13.
172
        :param fileid: File id of the text
173
        :param revision: Revision of the text
174
        """
0.200.632 by Jelmer Vernooij
Avoid accessing Repository.texts directly.
175
        chunks = self.repository.iter_files_bytes([(fileid, revision, None)]).next()[1]
0.200.229 by Jelmer Vernooij
More work on converter.
176
        blob = Blob()
0.200.632 by Jelmer Vernooij
Avoid accessing Repository.texts directly.
177
        blob._text = "".join(chunks)
0.200.353 by Jelmer Vernooij
fileids/revids are plain strings, not unicode
178
        self._check_expected_sha(expected_sha, blob)
0.200.229 by Jelmer Vernooij
More work on converter.
179
        return blob
180
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
181
    def _get_tree(self, fileid, revid, inv, unusual_modes, expected_sha=None):
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
182
        """Return a Git Tree object from a file id and a revision stored in bzr.
0.200.249 by Jelmer Vernooij
Implement Tree.
183
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
184
        :param fileid: fileid in the tree.
0.200.249 by Jelmer Vernooij
Implement Tree.
185
        :param revision: Revision of the tree.
186
        """
0.200.665 by Jelmer Vernooij
Add more checks for submodules.
187
        tree = directory_to_tree(inv[fileid],
0.200.549 by Jelmer Vernooij
Fix storing of unusual file modes.
188
            lambda ie: self._get_ie_sha1(ie, inv, unusual_modes),
189
            unusual_modes)
0.200.353 by Jelmer Vernooij
fileids/revids are plain strings, not unicode
190
        self._check_expected_sha(expected_sha, tree)
0.200.249 by Jelmer Vernooij
Implement Tree.
191
        return tree
0.200.229 by Jelmer Vernooij
More work on converter.
192
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
193
    def _get_commit(self, rev, tree_sha, expected_sha=None):
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
194
        commit = self._revision_to_commit(rev, tree_sha)
0.200.353 by Jelmer Vernooij
fileids/revids are plain strings, not unicode
195
        self._check_expected_sha(expected_sha, commit)
196
        return commit
0.200.228 by Jelmer Vernooij
Split out map.
197
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
198
    def get_parents(self, sha):
0.200.454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
199
        """Retrieve the parents of a Git commit by SHA1.
200
201
        :param sha: SHA1 of the commit
202
        :raises: KeyError, NotCommitError
203
        """
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
204
        return self[sha].parents
205
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
206
    def _lookup_revision_sha1(self, revid):
0.200.449 by Jelmer Vernooij
Use BazaarObjectStore to find matching SHA1s for bzr revisions.
207
        """Return the SHA1 matching a Bazaar revision."""
0.200.541 by Jelmer Vernooij
Cope with NULL_REVISION.
208
        if revid == NULL_REVISION:
209
            return "0" * 40
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
210
        try:
0.200.487 by Jelmer Vernooij
Prevent deep recursion if the shamap is out of date.
211
            return self._idmap.lookup_commit(revid)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
212
        except KeyError:
0.200.682 by Jelmer Vernooij
Avoid doing a full sha map update if we already know the SHA1.
213
            try:
214
                return mapping_registry.parse_revision_id(revid)[0]
215
            except errors.InvalidRevisionId:
216
                self._update_sha_map(revid)
217
                return self._idmap.lookup_commit(revid)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
218
0.200.310 by Jelmer Vernooij
Fix pull from remote branches.
219
    def get_raw(self, sha):
0.200.454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
220
        """Get the raw representation of a Git object by SHA1.
221
222
        :param sha: SHA1 of the git object
223
        """
0.200.566 by Jelmer Vernooij
Fix ObjectStore.get_raw() .
224
        obj = self[sha]
225
        return (obj.type, obj.as_raw_string())
0.200.310 by Jelmer Vernooij
Fix pull from remote branches.
226
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
227
    def __contains__(self, sha):
228
        # See if sha is in map
229
        try:
0.200.568 by Jelmer Vernooij
Properly check that matching bzr objects exist.
230
            (type, type_data) = self._lookup_git_sha(sha)
231
            if type == "commit":
232
                return self.repository.has_revision(type_data[0])
233
            elif type == "blob":
234
                return self.repository.texts.has_version(type_data)
235
            elif type == "tree":
236
                return self.repository.has_revision(type_data[1])
237
            else:
238
                raise AssertionError("Unknown object type '%s'" % type)
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
239
        except KeyError:
240
            return False
241
        else:
242
            return True
243
244
    def _lookup_git_sha(self, sha):
245
        # See if sha is in map
246
        try:
247
            return self._idmap.lookup_git_sha(sha)
0.200.228 by Jelmer Vernooij
Split out map.
248
        except KeyError:
0.200.670 by Jelmer Vernooij
Fix symlinks.
249
            # if not, see if there are any unconverted revisions and add them
0.200.228 by Jelmer Vernooij
Split out map.
250
            # to the map, search for sha in map again
251
            self._update_sha_map()
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
252
            return self._idmap.lookup_git_sha(sha)
253
254
    def __getitem__(self, sha):
255
        (type, type_data) = self._lookup_git_sha(sha)
0.200.228 by Jelmer Vernooij
Split out map.
256
        # convert object to git object
0.200.229 by Jelmer Vernooij
More work on converter.
257
        if type == "commit":
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
258
            try:
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
259
                rev = self.repository.get_revision(type_data[0])
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
260
            except errors.NoSuchRevision:
261
                trace.mutter('entry for %s %s in shamap: %r, but not found in repository', type, sha, type_data)
262
                raise KeyError(sha)
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
263
            return self._get_commit(rev, type_data[1], expected_sha=sha)
0.200.229 by Jelmer Vernooij
More work on converter.
264
        elif type == "blob":
0.200.353 by Jelmer Vernooij
fileids/revids are plain strings, not unicode
265
            return self._get_blob(type_data[0], type_data[1], expected_sha=sha)
0.200.229 by Jelmer Vernooij
More work on converter.
266
        elif type == "tree":
0.200.561 by Jelmer Vernooij
Cope with revisions pointed to by trees in the shamap disappearing.
267
            try:
268
                inv = self.repository.get_inventory(type_data[1])
269
                rev = self.repository.get_revision(type_data[1])
270
            except errors.NoSuchRevision:
271
                trace.mutter('entry for %s %s in shamap: %r, but not found in repository', type, sha, type_data)
272
                raise KeyError(sha)
0.200.556 by Jelmer Vernooij
Fix syntax error.
273
            unusual_modes = extract_unusual_modes(rev)
0.200.491 by Jelmer Vernooij
Cope with map for Tree objects becoming invalid.
274
            try:
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
275
                return self._get_tree(type_data[0], type_data[1], inv, unusual_modes,
0.200.491 by Jelmer Vernooij
Cope with map for Tree objects becoming invalid.
276
                                      expected_sha=sha)
277
            except errors.NoSuchRevision:
278
                raise KeyError(sha)
0.200.228 by Jelmer Vernooij
Split out map.
279
        else:
280
            raise AssertionError("Unknown object type '%s'" % type)