/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.228 by Jelmer Vernooij
Split out map.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
19
from dulwich.objects import (
20
    Blob,
0.200.864 by Jelmer Vernooij
Cope with the first commit being pointless.
21
    Tree,
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
22
    sha_to_hex,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
23
    )
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
24
from dulwich.object_store import (
0.200.457 by Jelmer Vernooij
Use BaseObjectStore.
25
    BaseObjectStore,
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
26
    )
0.200.249 by Jelmer Vernooij
Implement Tree.
27
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
28
from bzrlib import (
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
29
    errors,
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
30
    lru_cache,
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
31
    trace,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
32
    ui,
0.200.773 by Jelmer Vernooij
Implement inventory_to_objects
33
    urlutils,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
34
    )
0.200.541 by Jelmer Vernooij
Cope with NULL_REVISION.
35
from bzrlib.revision import (
36
    NULL_REVISION,
37
    )
0.200.228 by Jelmer Vernooij
Split out map.
38
0.200.229 by Jelmer Vernooij
More work on converter.
39
from bzrlib.plugins.git.mapping import (
0.200.463 by Jelmer Vernooij
Support remote dpush (except for references).
40
    default_mapping,
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
41
    directory_to_tree,
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
42
    extract_unusual_modes,
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
43
    mapping_registry,
0.200.795 by Jelmer Vernooij
simplify sha extraction for blobs, process multiple blobs at once.
44
    symlink_to_blob,
0.200.229 by Jelmer Vernooij
More work on converter.
45
    )
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
46
from bzrlib.plugins.git.shamap import (
0.200.842 by Jelmer Vernooij
Allow content cache to be provided.
47
    from_repository as cache_from_repository,
0.200.231 by Jelmer Vernooij
Partially fix pull.
48
    )
49
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
50
import posixpath
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
51
import stat
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
52
0.200.228 by Jelmer Vernooij
Split out map.
53
0.200.452 by Jelmer Vernooij
Rename converter -> object_store, provide utility function for getting ObjectStore's.
54
def get_object_store(repo, mapping=None):
55
    git = getattr(repo, "_git", None)
56
    if git is not None:
57
        return git.object_store
58
    return BazaarObjectStore(repo, mapping)
59
60
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
61
MAX_TREE_CACHE_SIZE = 50 * 1024 * 1024
62
63
64
class LRUTreeCache(object):
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
65
66
    def __init__(self, repository):
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
67
        def approx_tree_size(tree):
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
68
            # Very rough estimate, 1k per inventory entry
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
69
            return len(tree.inventory) * 1024
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
70
        self.repository = repository
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
71
        self._cache = lru_cache.LRUSizeCache(max_size=MAX_TREE_CACHE_SIZE,
72
            after_cleanup_size=None, compute_size=approx_tree_size)
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
73
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
74
    def revision_tree(self, revid):            
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
75
        try:
76
            return self._cache[revid] 
77
        except KeyError:
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
78
            tree = self.repository.revision_tree(revid)
79
            self.add(tree)
80
            return tree
81
82
    def iter_revision_trees(self, revids):
83
        trees = dict([(k, self._cache.get(k)) for k in revids]) 
84
        for tree in self.repository.revision_trees(
85
                [r for r, v in trees.iteritems() if v is None]):
86
            trees[tree.get_revision_id()] = tree
87
            self.add(tree)
88
        return (trees[r] for r in revids)
89
90
    def revision_trees(self, revids):
91
        return list(self.iter_revision_trees(revids))
92
93
    def add(self, tree):
94
        self._cache.add(tree.get_revision_id(), tree)
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
95
96
0.252.5 by Jelmer Vernooij
enable 'bzr push'.
97
def _find_missing_bzr_revids(get_parent_map, want, have):
98
    """Find the revisions that have to be pushed.
99
100
    :param get_parent_map: Function that returns the parents for a sequence
101
        of revisions.
102
    :param want: Revisions the target wants
103
    :param have: Revisions the target already has
104
    :return: Set of revisions to fetch
105
    """
106
    pending = want - have
107
    processed = set()
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
108
    todo = set()
109
    while pending:
110
        processed.update(pending)
111
        next_map = get_parent_map(pending)
112
        next_pending = set()
113
        for item in next_map.iteritems():
0.252.5 by Jelmer Vernooij
enable 'bzr push'.
114
            if item[0] in have:
115
                continue
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
116
            todo.add(item[0])
117
            next_pending.update(p for p in item[1] if p not in processed)
118
        pending = next_pending
119
    if NULL_REVISION in todo:
120
        todo.remove(NULL_REVISION)
121
    return todo
122
123
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
124
def _check_expected_sha(expected_sha, object):
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
125
    """Check whether an object matches an expected SHA.
126
127
    :param expected_sha: None or expected SHA as either binary or as hex digest
128
    :param object: Object to verify
129
    """
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
130
    if expected_sha is None:
131
        return
132
    if len(expected_sha) == 40:
133
        if expected_sha != object.sha().hexdigest():
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
134
            raise AssertionError("Invalid sha for %r: %s" % (object,
135
                expected_sha))
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
136
    elif len(expected_sha) == 20:
137
        if expected_sha != object.sha().digest():
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
138
            raise AssertionError("Invalid sha for %r: %s" % (object,
139
                sha_to_hex(expected_sha)))
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
140
    else:
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
141
        raise AssertionError("Unknown length %d for %r" % (len(expected_sha),
142
            expected_sha))
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
143
144
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
145
def _tree_to_objects(tree, parent_trees, idmap, unusual_modes, dummy_file_name=None):
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
146
    """Iterate over the objects that were introduced in a revision.
147
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
148
    :param idmap: id map
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
149
    :param unusual_modes: Unusual file modes
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
150
    :param dummy_file_name: File name to use for dummy files
151
        in empty directories. None to skip empty directories
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
152
    :return: Yields (path, object, ie) entries
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
153
    """
154
    new_trees = {}
155
    new_blobs = []
156
    shamap = {}
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
157
    try:
158
        base_tree = parent_trees[0]
159
        other_parent_trees = parent_trees[1:]
160
    except IndexError:
161
        base_tree = tree._repository.revision_tree(NULL_REVISION)
162
        other_parent_trees = []
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
163
    def find_unchanged_parent_ie(ie, parent_trees):
164
        assert ie.kind in ("symlink", "file")
165
        for ptree in parent_trees:
166
            try:
167
                pie = ptree.inventory[ie.file_id]
168
            except errors.NoSuchId:
169
                pass
170
            else:
171
                if (pie.text_sha1 == ie.text_sha1 and 
172
                    pie.kind == ie.kind and
173
                    pie.symlink_target == ie.symlink_target):
174
                    return pie
175
        raise KeyError
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
176
    for (file_id, path, changed_content, versioned, parent, name, kind,
177
         executable) in tree.iter_changes(base_tree):
178
        if kind[1] == "file":
179
            ie = tree.inventory[file_id]
180
            if changed_content:
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
181
                try:
182
                    pie = find_unchanged_parent_ie(ie, other_parent_trees)
183
                except KeyError:
184
                    pass
185
                else:
0.252.40 by Jelmer Vernooij
Checks for roundtripping.
186
                    try:
187
                        shamap[ie.file_id] = idmap.lookup_blob_id(
188
                            pie.file_id, pie.revision)
189
                    except KeyError:
190
                        # no-change merge ?
191
                        blob = Blob()
192
                        blob.data = tree.get_file_text(ie.file_id)
193
                        shamap[ie.file_id] = blob.id
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
194
            if not file_id in shamap:
195
                new_blobs.append((path[1], ie))
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
196
            new_trees[posixpath.dirname(path[1])] = parent[1]
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
197
        elif kind[1] == "symlink":
198
            ie = tree.inventory[file_id]
199
            if changed_content:
200
                blob = symlink_to_blob(ie)
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
201
                shamap[file_id] = blob.id
202
                try:
203
                    find_unchanged_parent_ie(ie, other_parent_trees)
204
                except KeyError:
205
                    yield path[1], blob, ie
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
206
            new_trees[posixpath.dirname(path[1])] = parent[1]
0.250.3 by Jelmer Vernooij
Simplify..
207
        elif kind[1] not in (None, "directory"):
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
208
            raise AssertionError(kind[1])
0.250.2 by Jelmer Vernooij
Make it work for evolution.
209
        if path[0] is not None:
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
210
            new_trees[posixpath.dirname(path[0])] = parent[0]
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
211
    
0.250.2 by Jelmer Vernooij
Make it work for evolution.
212
    for (path, ie), chunks in tree.iter_files_bytes(
213
        [(ie.file_id, (path, ie)) for (path, ie) in new_blobs]):
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
214
        obj = Blob()
0.200.851 by Jelmer Vernooij
Use blob.chunked.
215
        obj.chunked = chunks
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
216
        yield path, obj, ie
217
        shamap[ie.file_id] = obj.id
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
218
0.200.879 by Jelmer Vernooij
Fix unusual modes.
219
    for path in unusual_modes:
220
        parent_path = posixpath.dirname(path)
221
        new_trees[parent_path] = tree.path2id(parent_path)
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
222
    
223
    trees = {}
224
    while new_trees:
225
        items = new_trees.items()
226
        new_trees = {}
227
        for path, file_id in items:
0.250.2 by Jelmer Vernooij
Make it work for evolution.
228
            try:
229
                parent_id = tree.inventory[file_id].parent_id
230
            except errors.NoSuchId:
231
                # Directory was removed recursively perhaps ?
232
                continue
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
233
            if parent_id is not None:
234
                parent_path = urlutils.dirname(path)
235
                new_trees[parent_path] = parent_id
236
            trees[path] = file_id
237
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
238
    def ie_to_hexsha(ie):
239
        try:
240
            return shamap[ie.file_id]
241
        except KeyError:
0.200.884 by Jelmer Vernooij
Cope with -0000 as timezone in Git commits.
242
            # FIXME: Should be the same as in parent
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
243
            if ie.kind in ("file", "symlink"):
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
244
                try:
245
                    return idmap.lookup_blob_id(ie.file_id, ie.revision)
246
                except KeyError:
247
                    # no-change merge ?
248
                    blob = Blob()
249
                    blob.data = tree.get_file_text(ie.file_id)
250
                    return blob.id
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
251
            elif ie.kind == "directory":
252
                # Not all cache backends store the tree information, 
253
                # calculate again from scratch
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
254
                ret = directory_to_tree(ie, ie_to_hexsha, unusual_modes,
255
                    dummy_file_name)
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
256
                if ret is None:
257
                    return ret
258
                return ret.id
259
            else:
260
                raise AssertionError
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
261
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
262
    for path in sorted(trees.keys(), reverse=True):
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
263
        ie = tree.inventory[trees[path]]
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
264
        assert ie.kind == "directory"
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
265
        obj = directory_to_tree(ie, ie_to_hexsha, unusual_modes,
266
            dummy_file_name)
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
267
        if obj is not None:
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
268
            yield path, obj, ie
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
269
            shamap[ie.file_id] = obj.id
270
271
0.200.457 by Jelmer Vernooij
Use BaseObjectStore.
272
class BazaarObjectStore(BaseObjectStore):
0.200.320 by Jelmer Vernooij
Handle lightweight checkouts.
273
    """A Git-style object store backed onto a Bazaar repository."""
0.200.228 by Jelmer Vernooij
Split out map.
274
275
    def __init__(self, repository, mapping=None):
276
        self.repository = repository
277
        if mapping is None:
0.200.463 by Jelmer Vernooij
Support remote dpush (except for references).
278
            self.mapping = default_mapping
0.200.228 by Jelmer Vernooij
Split out map.
279
        else:
280
            self.mapping = mapping
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
281
        self._cache = cache_from_repository(repository)
0.200.842 by Jelmer Vernooij
Allow content cache to be provided.
282
        self._content_cache_types = ("tree")
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
283
        self.start_write_group = self._cache.idmap.start_write_group
284
        self.abort_write_group = self._cache.idmap.abort_write_group
285
        self.commit_write_group = self._cache.idmap.commit_write_group
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
286
        self.tree_cache = LRUTreeCache(self.repository)
0.200.228 by Jelmer Vernooij
Split out map.
287
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
288
    def _update_sha_map(self, stop_revision=None):
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
289
        graph = self.repository.get_graph()
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
290
        if stop_revision is None:
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
291
            heads = graph.heads(self.repository.all_revision_ids())
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
292
        else:
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
293
            heads = set([stop_revision])
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
294
        missing_revids = self._cache.idmap.missing_revisions(heads)
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
295
        while heads:
296
            parents = graph.get_parent_map(heads)
297
            todo = set()
298
            for p in parents.values():
0.200.684 by Jelmer Vernooij
Properly close write groups.
299
                todo.update([x for x in p if x not in missing_revids])
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
300
            heads = self._cache.idmap.missing_revisions(todo)
0.200.684 by Jelmer Vernooij
Properly close write groups.
301
            missing_revids.update(heads)
0.200.694 by Jelmer Vernooij
Avoid processing NULL_REVISION.
302
        if NULL_REVISION in missing_revids:
303
            missing_revids.remove(NULL_REVISION)
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
304
        missing_revids = self.repository.has_revisions(missing_revids)
305
        if not missing_revids:
306
            return
0.200.735 by Jelmer Vernooij
Use convenience functions for start/stop write groups.
307
        self.start_write_group()
0.200.231 by Jelmer Vernooij
Partially fix pull.
308
        try:
0.200.733 by Jelmer Vernooij
Use start/stop commit write group.
309
            pb = ui.ui_factory.nested_progress_bar()
310
            try:
311
                for i, revid in enumerate(graph.iter_topo_order(missing_revids)):
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
312
                    trace.mutter('processing %r', revid)
0.200.733 by Jelmer Vernooij
Use start/stop commit write group.
313
                    pb.update("updating git map", i, len(missing_revids))
314
                    self._update_sha_map_revision(revid)
315
            finally:
316
                pb.finished()
0.200.735 by Jelmer Vernooij
Use convenience functions for start/stop write groups.
317
        except:
318
            self.abort_write_group()
319
            raise
320
        else:
321
            self.commit_write_group()
0.200.229 by Jelmer Vernooij
More work on converter.
322
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
323
    def __iter__(self):
324
        self._update_sha_map()
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
325
        return iter(self._cache.idmap.sha1s())
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
326
0.252.22 by Jelmer Vernooij
Fix file id map (de)serialization.
327
    def _reconstruct_commit(self, rev, tree_sha, roundtrip):
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
328
        def parent_lookup(revid):
329
            try:
330
                return self._lookup_revision_sha1(revid)
331
            except errors.NoSuchRevision:
332
                return None
0.252.4 by Jelmer Vernooij
More work on roundtripping.
333
        return self.mapping.export_commit(rev, tree_sha, parent_lookup,
0.252.22 by Jelmer Vernooij
Fix file id map (de)serialization.
334
            roundtrip)
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
335
0.252.4 by Jelmer Vernooij
More work on roundtripping.
336
    def _revision_to_objects(self, rev, tree, roundtrip):
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
337
        """Convert a revision to a set of git objects.
338
339
        :param rev: Bazaar revision object
340
        :param tree: Bazaar revision tree
341
        :param roundtrip: Whether to roundtrip all Bazaar revision data
342
        """
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
343
        unusual_modes = extract_unusual_modes(rev)
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
344
        present_parents = self.repository.has_revisions(rev.parent_ids)
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
345
        parent_trees = self.tree_cache.revision_trees(
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
346
            [p for p in rev.parent_ids if p in present_parents])
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
347
        root_tree = None
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
348
        for path, obj, ie in _tree_to_objects(tree, parent_trees,
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
349
                self._cache.idmap, unusual_modes, self.mapping.BZR_DUMMY_FILE):
0.200.773 by Jelmer Vernooij
Implement inventory_to_objects
350
            if path == "":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
351
                root_tree = obj
0.252.34 by Jelmer Vernooij
Yield the proper object for the tree root.
352
                root_ie = ie
353
                # Don't yield just yet
354
            else:
355
                yield path, obj, ie
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
356
        if root_tree is None:
0.250.2 by Jelmer Vernooij
Make it work for evolution.
357
            # Pointless commit - get the tree sha elsewhere
0.200.864 by Jelmer Vernooij
Cope with the first commit being pointless.
358
            if not rev.parent_ids:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
359
                root_tree = Tree()
0.200.864 by Jelmer Vernooij
Cope with the first commit being pointless.
360
            else:
361
                base_sha1 = self._lookup_revision_sha1(rev.parent_ids[0])
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
362
                root_tree = self[self[base_sha1].tree]
0.252.35 by Jelmer Vernooij
Ignore control files in inventories.
363
            root_ie = tree.inventory.root
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
364
        if roundtrip:
0.252.35 by Jelmer Vernooij
Ignore control files in inventories.
365
            # FIXME: This can probably be a lot more efficient, 
366
            # not all files necessarily have to be processed.
0.252.31 by Jelmer Vernooij
Properly escape revids when using them in ref names.
367
            file_ids = {}
368
            for (path, ie) in tree.inventory.iter_entries():
369
                if self.mapping.generate_file_id(path) != ie.file_id:
370
                    file_ids[path] = ie.file_id
371
            b = self.mapping.export_fileid_map(file_ids)
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
372
            if b is not None:
373
                root_tree[self.mapping.BZR_FILE_IDS_FILE] = ((stat.S_IFREG | 0644), b.id)
374
                yield self.mapping.BZR_FILE_IDS_FILE, b, None
0.252.34 by Jelmer Vernooij
Yield the proper object for the tree root.
375
        yield "", root_tree, root_ie
0.252.43 by Jelmer Vernooij
Some refactoring, support proper file ids in revision deltas.
376
        commit_obj = self._reconstruct_commit(rev, root_tree.id,
377
            roundtrip=roundtrip)
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
378
        try:
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
379
            foreign_revid, mapping = mapping_registry.parse_revision_id(
380
                rev.revision_id)
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
381
        except errors.InvalidRevisionId:
382
            pass
383
        else:
0.200.794 by Jelmer Vernooij
Use _check_expected_sha rather than custom checks.
384
            _check_expected_sha(foreign_revid, commit_obj)
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
385
        yield None, commit_obj, None
0.200.783 by Jelmer Vernooij
Move object generation into a separate function.
386
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
387
    def _get_updater(self, rev):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
388
        return self._cache.get_updater(rev)
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
389
0.200.783 by Jelmer Vernooij
Move object generation into a separate function.
390
    def _update_sha_map_revision(self, revid):
391
        rev = self.repository.get_revision(revid)
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
392
        tree = self.tree_cache.revision_tree(rev.revision_id)
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
393
        updater = self._get_updater(rev)
0.252.4 by Jelmer Vernooij
More work on roundtripping.
394
        for path, obj, ie in self._revision_to_objects(rev, tree,
395
            roundtrip=True):
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
396
            updater.add_object(obj, ie)
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
397
        commit_obj = updater.finish()
0.200.781 by Jelmer Vernooij
Return commit id after converting a revision.
398
        return commit_obj.id
0.200.229 by Jelmer Vernooij
More work on converter.
399
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
400
    def _reconstruct_blobs(self, keys):
0.200.236 by Jelmer Vernooij
require bzr 1.13.
401
        """Return a Git Blob object from a fileid and revision stored in bzr.
0.200.670 by Jelmer Vernooij
Fix symlinks.
402
0.200.236 by Jelmer Vernooij
require bzr 1.13.
403
        :param fileid: File id of the text
404
        :param revision: Revision of the text
405
        """
0.250.2 by Jelmer Vernooij
Make it work for evolution.
406
        stream = self.repository.iter_files_bytes(
407
            ((key[0], key[1], key) for key in keys))
0.200.856 by Jelmer Vernooij
Support reconstructing multiple blobs at the same time.
408
        for (fileid, revision, expected_sha), chunks in stream:
0.200.854 by Jelmer Vernooij
_get_blob -> _get_blobs.
409
            blob = Blob()
410
            blob.chunked = chunks
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
411
            if blob.id != expected_sha and blob.data == "":
0.200.854 by Jelmer Vernooij
_get_blob -> _get_blobs.
412
                # Perhaps it's a symlink ?
413
                tree = self.tree_cache.revision_tree(revision)
414
                entry = tree.inventory[fileid]
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
415
                if entry.kind == 'symlink':
416
                    blob = symlink_to_blob(entry)
0.200.854 by Jelmer Vernooij
_get_blob -> _get_blobs.
417
            _check_expected_sha(expected_sha, blob)
418
            yield blob
0.200.229 by Jelmer Vernooij
More work on converter.
419
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
420
    def _reconstruct_tree(self, fileid, revid, inv, unusual_modes,
421
        expected_sha=None):
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
422
        """Return a Git Tree object from a file id and a revision stored in bzr.
0.200.249 by Jelmer Vernooij
Implement Tree.
423
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
424
        :param fileid: fileid in the tree.
0.200.249 by Jelmer Vernooij
Implement Tree.
425
        :param revision: Revision of the tree.
426
        """
0.200.776 by Jelmer Vernooij
Remove unnecessary lookups.
427
        def get_ie_sha1(entry):
428
            if entry.kind == "directory":
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
429
                try:
0.200.859 by Jelmer Vernooij
Trivial cleanups.
430
                    return self._cache.idmap.lookup_tree_id(entry.file_id,
431
                        revid)
0.200.812 by Jelmer Vernooij
Catch KeyError from lookup_tree as well - some caches (such as sqlite) don't store all trees, only some.
432
                except (NotImplementedError, KeyError):
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
433
                    obj = self._reconstruct_tree(entry.file_id, revid, inv,
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
434
                        unusual_modes)
435
                    if obj is None:
436
                        return None
437
                    else:
438
                        return obj.id
0.200.776 by Jelmer Vernooij
Remove unnecessary lookups.
439
            elif entry.kind in ("file", "symlink"):
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
440
                try:
441
                    return self._cache.idmap.lookup_blob_id(entry.file_id,
442
                        entry.revision)
443
                except KeyError:
444
                    # no-change merge?
445
                    return self._reconstruct_blobs(
446
                        [(entry.file_id, entry.revision, None)]).next().id
0.200.776 by Jelmer Vernooij
Remove unnecessary lookups.
447
            else:
448
                raise AssertionError("unknown entry kind '%s'" % entry.kind)
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
449
        tree = directory_to_tree(inv[fileid], get_ie_sha1, unusual_modes,
450
            self.mapping.BZR_DUMMY_FILE)
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
451
        _check_expected_sha(expected_sha, tree)
0.200.249 by Jelmer Vernooij
Implement Tree.
452
        return tree
0.200.229 by Jelmer Vernooij
More work on converter.
453
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
454
    def get_parents(self, sha):
0.200.454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
455
        """Retrieve the parents of a Git commit by SHA1.
456
457
        :param sha: SHA1 of the commit
458
        :raises: KeyError, NotCommitError
459
        """
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
460
        return self[sha].parents
461
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
462
    def _lookup_revision_sha1(self, revid):
0.200.449 by Jelmer Vernooij
Use BazaarObjectStore to find matching SHA1s for bzr revisions.
463
        """Return the SHA1 matching a Bazaar revision."""
0.200.891 by Jelmer Vernooij
Use ZERO_SHA constant where possible.
464
        from dulwich.protocol import ZERO_SHA
0.200.541 by Jelmer Vernooij
Cope with NULL_REVISION.
465
        if revid == NULL_REVISION:
0.200.891 by Jelmer Vernooij
Use ZERO_SHA constant where possible.
466
            return ZERO_SHA
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
467
        try:
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
468
            return self._cache.idmap.lookup_commit(revid)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
469
        except KeyError:
0.200.682 by Jelmer Vernooij
Avoid doing a full sha map update if we already know the SHA1.
470
            try:
471
                return mapping_registry.parse_revision_id(revid)[0]
472
            except errors.InvalidRevisionId:
473
                self._update_sha_map(revid)
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
474
                return self._cache.idmap.lookup_commit(revid)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
475
0.200.310 by Jelmer Vernooij
Fix pull from remote branches.
476
    def get_raw(self, sha):
0.200.454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
477
        """Get the raw representation of a Git object by SHA1.
478
479
        :param sha: SHA1 of the git object
480
        """
0.200.566 by Jelmer Vernooij
Fix ObjectStore.get_raw() .
481
        obj = self[sha]
482
        return (obj.type, obj.as_raw_string())
0.200.310 by Jelmer Vernooij
Fix pull from remote branches.
483
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
484
    def __contains__(self, sha):
485
        # See if sha is in map
486
        try:
0.200.897 by Jelmer Vernooij
Make lookup_git_sha public.
487
            (type, type_data) = self.lookup_git_sha(sha)
0.200.568 by Jelmer Vernooij
Properly check that matching bzr objects exist.
488
            if type == "commit":
489
                return self.repository.has_revision(type_data[0])
490
            elif type == "blob":
491
                return self.repository.texts.has_version(type_data)
492
            elif type == "tree":
493
                return self.repository.has_revision(type_data[1])
494
            else:
495
                raise AssertionError("Unknown object type '%s'" % type)
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
496
        except KeyError:
497
            return False
498
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
499
    def lookup_git_shas(self, shas, update_map=True):
500
        ret = {}
501
        for sha in shas:
502
            try:
503
                ret[sha] = self._cache.idmap.lookup_git_sha(sha)
504
            except KeyError:
505
                if update_map:
506
                    # if not, see if there are any unconverted revisions and add
507
                    # them to the map, search for sha in map again
508
                    self._update_sha_map()
509
                    update_map = False
510
                    try:
511
                        ret[sha] = self._cache.idmap.lookup_git_sha(sha)
512
                    except KeyError:
513
                        pass
514
        return ret
515
516
    def lookup_git_sha(self, sha, update_map=True):
517
        return self.lookup_git_shas([sha], update_map=update_map)[sha]
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
518
519
    def __getitem__(self, sha):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
520
        if self._cache.content_cache is not None:
0.200.840 by Jelmer Vernooij
Support using content cache.
521
            try:
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
522
                return self._cache.content_cache[sha]
0.200.840 by Jelmer Vernooij
Support using content cache.
523
            except KeyError:
524
                pass
0.200.897 by Jelmer Vernooij
Make lookup_git_sha public.
525
        (type, type_data) = self.lookup_git_sha(sha)
0.200.228 by Jelmer Vernooij
Split out map.
526
        # convert object to git object
0.200.229 by Jelmer Vernooij
More work on converter.
527
        if type == "commit":
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
528
            (revid, tree_sha) = type_data
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
529
            try:
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
530
                rev = self.repository.get_revision(revid)
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
531
            except errors.NoSuchRevision:
0.200.836 by Jelmer Vernooij
Allow content cache.
532
                trace.mutter('entry for %s %s in shamap: %r, but not found in '
533
                             'repository', type, sha, type_data)
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
534
                raise KeyError(sha)
0.252.22 by Jelmer Vernooij
Fix file id map (de)serialization.
535
            commit = self._reconstruct_commit(rev, tree_sha, roundtrip=True)
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
536
            _check_expected_sha(sha, commit)
0.200.785 by Jelmer Vernooij
Eliminate _get_commit.
537
            return commit
0.200.229 by Jelmer Vernooij
More work on converter.
538
        elif type == "blob":
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
539
            (fileid, revision) = type_data
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
540
            return self._reconstruct_blobs([(fileid, revision, sha)]).next()
0.200.229 by Jelmer Vernooij
More work on converter.
541
        elif type == "tree":
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
542
            (fileid, revid) = type_data
0.200.561 by Jelmer Vernooij
Cope with revisions pointed to by trees in the shamap disappearing.
543
            try:
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
544
                tree = self.tree_cache.revision_tree(revid)
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
545
                rev = self.repository.get_revision(revid)
0.200.561 by Jelmer Vernooij
Cope with revisions pointed to by trees in the shamap disappearing.
546
            except errors.NoSuchRevision:
547
                trace.mutter('entry for %s %s in shamap: %r, but not found in repository', type, sha, type_data)
548
                raise KeyError(sha)
0.200.556 by Jelmer Vernooij
Fix syntax error.
549
            unusual_modes = extract_unusual_modes(rev)
0.200.491 by Jelmer Vernooij
Cope with map for Tree objects becoming invalid.
550
            try:
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
551
                return self._reconstruct_tree(fileid, revid, tree.inventory,
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
552
                    unusual_modes, expected_sha=sha)
0.200.491 by Jelmer Vernooij
Cope with map for Tree objects becoming invalid.
553
            except errors.NoSuchRevision:
554
                raise KeyError(sha)
0.200.228 by Jelmer Vernooij
Split out map.
555
        else:
556
            raise AssertionError("Unknown object type '%s'" % type)
0.200.782 by Jelmer Vernooij
Add custom generate_pack_contents implementation.
557
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
558
    def generate_lossy_pack_contents(self, have, want, progress=None,
559
            get_tagged=None):
560
        return self.generate_pack_contents(have, want, progress, get_tagged,
561
            lossy=True)
562
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
563
    def generate_pack_contents(self, have, want, progress=None,
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
564
            get_tagged=None, lossy=False):
0.200.782 by Jelmer Vernooij
Add custom generate_pack_contents implementation.
565
        """Iterate over the contents of a pack file.
566
567
        :param have: List of SHA1s of objects that should not be sent
568
        :param want: List of SHA1s of objects that should be sent
569
        """
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
570
        processed = set()
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
571
        ret = self.lookup_git_shas(have + want)
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
572
        for commit_sha in have:
573
            try:
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
574
                (type, (revid, tree_sha)) = ret[commit_sha]
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
575
            except KeyError:
576
                pass
577
            else:
578
                assert type == "commit"
579
                processed.add(revid)
580
        pending = set()
581
        for commit_sha in want:
582
            if commit_sha in have:
583
                continue
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
584
            try:
585
                (type, (revid, tree_sha)) = ret[commit_sha]
586
            except KeyError:
587
                pass
588
            else:
589
                assert type == "commit"
590
                pending.add(revid)
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
591
592
        todo = _find_missing_bzr_revids(self.repository.get_parent_map, 
593
                                        pending, processed)
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
594
        trace.mutter('sending revisions %r', todo)
595
        ret = []
596
        pb = ui.ui_factory.nested_progress_bar()
597
        try:
598
            for i, revid in enumerate(todo):
599
                pb.update("generating git objects", i, len(todo))
600
                rev = self.repository.get_revision(revid)
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
601
                tree = self.tree_cache.revision_tree(revid)
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
602
                for path, obj, ie in self._revision_to_objects(rev, tree,
603
                    roundtrip=not lossy):
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
604
                    ret.append((obj, path))
605
        finally:
606
            pb.finished()
607
        return ret
0.251.1 by Jelmer Vernooij
Implement ObjectStore.add_{thin_,}pack.
608
609
    def add_thin_pack(self):
610
        import tempfile
611
        import os
612
        fd, path = tempfile.mkstemp(suffix=".pack")
613
        f = os.fdopen(fd, 'wb')
614
        def commit():
615
            from dulwich.pack import PackData, Pack
616
            from bzrlib.plugins.git.fetch import import_git_objects
617
            os.fsync(fd)
618
            f.close()
619
            if os.path.getsize(path) == 0:
620
                return
621
            pd = PackData(path)
622
            pd.create_index_v2(path[:-5]+".idx", self.object_store.get_raw)
623
624
            p = Pack(path[:-5])
625
            self.repository.lock_write()
626
            try:
627
                self.repository.start_write_group()
628
                try:
629
                    import_git_objects(self.repository, self.mapping, 
630
                        p.iterobjects(get_raw=self.get_raw),
631
                        self.object_store)
632
                except:
633
                    self.repository.abort_write_group()
634
                    raise
635
                else:
636
                    self.repository.commit_write_group()
637
            finally:
638
                self.repository.unlock()
639
        return f, commit
640
641
    # The pack isn't kept around anyway, so no point 
642
    # in treating full packs different from thin packs
643
    add_pack = add_thin_pack