/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.200.252 by Jelmer Vernooij
Clarify history, copyright.
1
# Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
0.200.228 by Jelmer Vernooij
Split out map.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Map from Git sha's to Bazaar objects."""
18
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
19
from dulwich.objects import (
20
    Blob,
0.200.864 by Jelmer Vernooij
Cope with the first commit being pointless.
21
    Tree,
0.200.586 by Jelmer Vernooij
Fix issues pointed out by pyflakes.
22
    sha_to_hex,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
23
    )
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
24
from dulwich.object_store import (
0.200.457 by Jelmer Vernooij
Use BaseObjectStore.
25
    BaseObjectStore,
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
26
    )
0.200.249 by Jelmer Vernooij
Implement Tree.
27
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
28
from bzrlib import (
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
29
    errors,
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
30
    lru_cache,
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
31
    trace,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
32
    ui,
0.200.773 by Jelmer Vernooij
Implement inventory_to_objects
33
    urlutils,
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
34
    )
0.200.541 by Jelmer Vernooij
Cope with NULL_REVISION.
35
from bzrlib.revision import (
36
    NULL_REVISION,
37
    )
0.200.228 by Jelmer Vernooij
Split out map.
38
0.200.229 by Jelmer Vernooij
More work on converter.
39
from bzrlib.plugins.git.mapping import (
0.200.463 by Jelmer Vernooij
Support remote dpush (except for references).
40
    default_mapping,
0.200.359 by Jelmer Vernooij
Simplify file mode handling, avoid inventory_to_tree_and_blobs as it is expensive if trees/blobs have already been converted.
41
    directory_to_tree,
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
42
    extract_unusual_modes,
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
43
    mapping_registry,
0.200.795 by Jelmer Vernooij
simplify sha extraction for blobs, process multiple blobs at once.
44
    symlink_to_blob,
0.200.229 by Jelmer Vernooij
More work on converter.
45
    )
0.200.260 by Jelmer Vernooij
Add DictGitShaMap, useful for testing.
46
from bzrlib.plugins.git.shamap import (
0.200.842 by Jelmer Vernooij
Allow content cache to be provided.
47
    from_repository as cache_from_repository,
0.200.231 by Jelmer Vernooij
Partially fix pull.
48
    )
49
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
50
import posixpath
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
51
import stat
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
52
0.200.228 by Jelmer Vernooij
Split out map.
53
0.200.452 by Jelmer Vernooij
Rename converter -> object_store, provide utility function for getting ObjectStore's.
54
def get_object_store(repo, mapping=None):
55
    git = getattr(repo, "_git", None)
56
    if git is not None:
57
        return git.object_store
58
    return BazaarObjectStore(repo, mapping)
59
60
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
61
MAX_TREE_CACHE_SIZE = 50 * 1024 * 1024
62
63
64
class LRUTreeCache(object):
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
65
66
    def __init__(self, repository):
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
67
        def approx_tree_size(tree):
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
68
            # Very rough estimate, 1k per inventory entry
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
69
            return len(tree.inventory) * 1024
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
70
        self.repository = repository
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
71
        self._cache = lru_cache.LRUSizeCache(max_size=MAX_TREE_CACHE_SIZE,
72
            after_cleanup_size=None, compute_size=approx_tree_size)
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
73
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
74
    def revision_tree(self, revid):            
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
75
        try:
76
            return self._cache[revid] 
77
        except KeyError:
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
78
            tree = self.repository.revision_tree(revid)
79
            self.add(tree)
80
            return tree
81
82
    def iter_revision_trees(self, revids):
83
        trees = dict([(k, self._cache.get(k)) for k in revids]) 
84
        for tree in self.repository.revision_trees(
85
                [r for r, v in trees.iteritems() if v is None]):
86
            trees[tree.get_revision_id()] = tree
87
            self.add(tree)
88
        return (trees[r] for r in revids)
89
90
    def revision_trees(self, revids):
91
        return list(self.iter_revision_trees(revids))
92
93
    def add(self, tree):
94
        self._cache.add(tree.get_revision_id(), tree)
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
95
96
0.252.5 by Jelmer Vernooij
enable 'bzr push'.
97
def _find_missing_bzr_revids(get_parent_map, want, have):
98
    """Find the revisions that have to be pushed.
99
100
    :param get_parent_map: Function that returns the parents for a sequence
101
        of revisions.
102
    :param want: Revisions the target wants
103
    :param have: Revisions the target already has
104
    :return: Set of revisions to fetch
105
    """
106
    pending = want - have
107
    processed = set()
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
108
    todo = set()
109
    while pending:
110
        processed.update(pending)
111
        next_map = get_parent_map(pending)
112
        next_pending = set()
113
        for item in next_map.iteritems():
0.252.5 by Jelmer Vernooij
enable 'bzr push'.
114
            if item[0] in have:
115
                continue
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
116
            todo.add(item[0])
117
            next_pending.update(p for p in item[1] if p not in processed)
118
        pending = next_pending
119
    if NULL_REVISION in todo:
120
        todo.remove(NULL_REVISION)
121
    return todo
122
123
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
124
def _check_expected_sha(expected_sha, object):
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
125
    """Check whether an object matches an expected SHA.
126
127
    :param expected_sha: None or expected SHA as either binary or as hex digest
128
    :param object: Object to verify
129
    """
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
130
    if expected_sha is None:
131
        return
132
    if len(expected_sha) == 40:
133
        if expected_sha != object.sha().hexdigest():
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
134
            raise AssertionError("Invalid sha for %r: %s" % (object,
135
                expected_sha))
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
136
    elif len(expected_sha) == 20:
137
        if expected_sha != object.sha().digest():
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
138
            raise AssertionError("Invalid sha for %r: %s" % (object,
139
                sha_to_hex(expected_sha)))
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
140
    else:
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
141
        raise AssertionError("Unknown length %d for %r" % (len(expected_sha),
142
            expected_sha))
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
143
144
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
145
def _tree_to_objects(tree, parent_trees, idmap, unusual_modes, dummy_file_name=None):
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
146
    """Iterate over the objects that were introduced in a revision.
147
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
148
    :param idmap: id map
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
149
    :param unusual_modes: Unusual file modes
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
150
    :param dummy_file_name: File name to use for dummy files
151
        in empty directories. None to skip empty directories
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
152
    :return: Yields (path, object, ie) entries
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
153
    """
154
    new_trees = {}
155
    new_blobs = []
156
    shamap = {}
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
157
    try:
158
        base_tree = parent_trees[0]
159
        other_parent_trees = parent_trees[1:]
160
    except IndexError:
161
        base_tree = tree._repository.revision_tree(NULL_REVISION)
162
        other_parent_trees = []
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
163
    def find_unchanged_parent_ie(ie, parent_trees):
164
        assert ie.kind in ("symlink", "file")
165
        for ptree in parent_trees:
166
            try:
167
                pie = ptree.inventory[ie.file_id]
168
            except errors.NoSuchId:
169
                pass
170
            else:
171
                if (pie.text_sha1 == ie.text_sha1 and 
172
                    pie.kind == ie.kind and
173
                    pie.symlink_target == ie.symlink_target):
174
                    return pie
175
        raise KeyError
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
176
    for (file_id, path, changed_content, versioned, parent, name, kind,
177
         executable) in tree.iter_changes(base_tree):
178
        if kind[1] == "file":
179
            ie = tree.inventory[file_id]
180
            if changed_content:
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
181
                try:
182
                    pie = find_unchanged_parent_ie(ie, other_parent_trees)
183
                except KeyError:
184
                    pass
185
                else:
0.252.40 by Jelmer Vernooij
Checks for roundtripping.
186
                    try:
187
                        shamap[ie.file_id] = idmap.lookup_blob_id(
188
                            pie.file_id, pie.revision)
189
                    except KeyError:
190
                        # no-change merge ?
191
                        blob = Blob()
192
                        blob.data = tree.get_file_text(ie.file_id)
193
                        shamap[ie.file_id] = blob.id
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
194
            if not file_id in shamap:
195
                new_blobs.append((path[1], ie))
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
196
            new_trees[posixpath.dirname(path[1])] = parent[1]
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
197
        elif kind[1] == "symlink":
198
            ie = tree.inventory[file_id]
199
            if changed_content:
200
                blob = symlink_to_blob(ie)
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
201
                shamap[file_id] = blob.id
202
                try:
203
                    find_unchanged_parent_ie(ie, other_parent_trees)
204
                except KeyError:
205
                    yield path[1], blob, ie
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
206
            new_trees[posixpath.dirname(path[1])] = parent[1]
0.250.3 by Jelmer Vernooij
Simplify..
207
        elif kind[1] not in (None, "directory"):
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
208
            raise AssertionError(kind[1])
0.250.2 by Jelmer Vernooij
Make it work for evolution.
209
        if path[0] is not None:
0.200.878 by Jelmer Vernooij
Fix determining of unusual file modes.
210
            new_trees[posixpath.dirname(path[0])] = parent[0]
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
211
    
0.250.2 by Jelmer Vernooij
Make it work for evolution.
212
    for (path, ie), chunks in tree.iter_files_bytes(
213
        [(ie.file_id, (path, ie)) for (path, ie) in new_blobs]):
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
214
        obj = Blob()
0.200.851 by Jelmer Vernooij
Use blob.chunked.
215
        obj.chunked = chunks
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
216
        yield path, obj, ie
217
        shamap[ie.file_id] = obj.id
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
218
0.200.879 by Jelmer Vernooij
Fix unusual modes.
219
    for path in unusual_modes:
220
        parent_path = posixpath.dirname(path)
221
        new_trees[parent_path] = tree.path2id(parent_path)
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
222
    
223
    trees = {}
224
    while new_trees:
225
        items = new_trees.items()
226
        new_trees = {}
227
        for path, file_id in items:
0.250.2 by Jelmer Vernooij
Make it work for evolution.
228
            try:
229
                parent_id = tree.inventory[file_id].parent_id
230
            except errors.NoSuchId:
231
                # Directory was removed recursively perhaps ?
232
                continue
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
233
            if parent_id is not None:
234
                parent_path = urlutils.dirname(path)
235
                new_trees[parent_path] = parent_id
236
            trees[path] = file_id
237
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
238
    def ie_to_hexsha(ie):
239
        try:
240
            return shamap[ie.file_id]
241
        except KeyError:
0.200.884 by Jelmer Vernooij
Cope with -0000 as timezone in Git commits.
242
            # FIXME: Should be the same as in parent
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
243
            if ie.kind in ("file", "symlink"):
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
244
                try:
245
                    return idmap.lookup_blob_id(ie.file_id, ie.revision)
246
                except KeyError:
247
                    # no-change merge ?
248
                    blob = Blob()
249
                    blob.data = tree.get_file_text(ie.file_id)
250
                    return blob.id
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
251
            elif ie.kind == "directory":
252
                # Not all cache backends store the tree information, 
253
                # calculate again from scratch
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
254
                ret = directory_to_tree(ie, ie_to_hexsha, unusual_modes,
255
                    dummy_file_name)
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
256
                if ret is None:
257
                    return ret
258
                return ret.id
259
            else:
260
                raise AssertionError
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
261
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
262
    for path in sorted(trees.keys(), reverse=True):
0.250.1 by Jelmer Vernooij
Use iter_changes() rather than iterating over all contents of an inventory.
263
        ie = tree.inventory[trees[path]]
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
264
        assert ie.kind == "directory"
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
265
        obj = directory_to_tree(ie, ie_to_hexsha, unusual_modes,
266
            dummy_file_name)
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
267
        if obj is not None:
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
268
            yield path, obj, ie
0.200.798 by Jelmer Vernooij
Split out _inventory_to_objects into a function.
269
            shamap[ie.file_id] = obj.id
270
271
0.200.457 by Jelmer Vernooij
Use BaseObjectStore.
272
class BazaarObjectStore(BaseObjectStore):
0.200.320 by Jelmer Vernooij
Handle lightweight checkouts.
273
    """A Git-style object store backed onto a Bazaar repository."""
0.200.228 by Jelmer Vernooij
Split out map.
274
275
    def __init__(self, repository, mapping=None):
276
        self.repository = repository
277
        if mapping is None:
0.200.463 by Jelmer Vernooij
Support remote dpush (except for references).
278
            self.mapping = default_mapping
0.200.228 by Jelmer Vernooij
Split out map.
279
        else:
280
            self.mapping = mapping
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
281
        self._cache = cache_from_repository(repository)
0.200.842 by Jelmer Vernooij
Allow content cache to be provided.
282
        self._content_cache_types = ("tree")
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
283
        self.start_write_group = self._cache.idmap.start_write_group
284
        self.abort_write_group = self._cache.idmap.abort_write_group
285
        self.commit_write_group = self._cache.idmap.commit_write_group
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
286
        self.tree_cache = LRUTreeCache(self.repository)
0.200.228 by Jelmer Vernooij
Split out map.
287
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
288
    def _update_sha_map(self, stop_revision=None):
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
289
        graph = self.repository.get_graph()
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
290
        if stop_revision is None:
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
291
            heads = graph.heads(self.repository.all_revision_ids())
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
292
        else:
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
293
            heads = set([stop_revision])
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
294
        missing_revids = self._cache.idmap.missing_revisions(heads)
0.200.683 by Jelmer Vernooij
Lazier checking of which revisions need to be fetched.
295
        while heads:
296
            parents = graph.get_parent_map(heads)
297
            todo = set()
298
            for p in parents.values():
0.200.684 by Jelmer Vernooij
Properly close write groups.
299
                todo.update([x for x in p if x not in missing_revids])
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
300
            heads = self._cache.idmap.missing_revisions(todo)
0.200.684 by Jelmer Vernooij
Properly close write groups.
301
            missing_revids.update(heads)
0.200.694 by Jelmer Vernooij
Avoid processing NULL_REVISION.
302
        if NULL_REVISION in missing_revids:
303
            missing_revids.remove(NULL_REVISION)
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
304
        missing_revids = self.repository.has_revisions(missing_revids)
305
        if not missing_revids:
306
            return
0.200.735 by Jelmer Vernooij
Use convenience functions for start/stop write groups.
307
        self.start_write_group()
0.200.231 by Jelmer Vernooij
Partially fix pull.
308
        try:
0.200.733 by Jelmer Vernooij
Use start/stop commit write group.
309
            pb = ui.ui_factory.nested_progress_bar()
310
            try:
311
                for i, revid in enumerate(graph.iter_topo_order(missing_revids)):
0.200.750 by Jelmer Vernooij
Remove unused tree code, add mechanism for migrating between sha maps.
312
                    trace.mutter('processing %r', revid)
0.200.733 by Jelmer Vernooij
Use start/stop commit write group.
313
                    pb.update("updating git map", i, len(missing_revids))
314
                    self._update_sha_map_revision(revid)
315
            finally:
316
                pb.finished()
0.200.735 by Jelmer Vernooij
Use convenience functions for start/stop write groups.
317
        except:
318
            self.abort_write_group()
319
            raise
320
        else:
321
            self.commit_write_group()
0.200.229 by Jelmer Vernooij
More work on converter.
322
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
323
    def __iter__(self):
324
        self._update_sha_map()
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
325
        return iter(self._cache.idmap.sha1s())
0.200.422 by Jelmer Vernooij
'bzr git-object' without arguments now prints the available git objects.
326
0.252.22 by Jelmer Vernooij
Fix file id map (de)serialization.
327
    def _reconstruct_commit(self, rev, tree_sha, roundtrip):
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
328
        def parent_lookup(revid):
329
            try:
330
                return self._lookup_revision_sha1(revid)
331
            except errors.NoSuchRevision:
332
                return None
0.252.4 by Jelmer Vernooij
More work on roundtripping.
333
        return self.mapping.export_commit(rev, tree_sha, parent_lookup,
0.252.22 by Jelmer Vernooij
Fix file id map (de)serialization.
334
            roundtrip)
0.238.7 by Jelmer Vernooij
Cope with ghosts a bit better.
335
0.252.49 by Jelmer Vernooij
Avoid trying to set HEAD for remote branches.
336
    def _create_fileid_map_blob(self, inv):
337
        # FIXME: This can probably be a lot more efficient, 
338
        # not all files necessarily have to be processed.
339
        file_ids = {}
340
        for (path, ie) in inv.iter_entries():
341
            if self.mapping.generate_file_id(path) != ie.file_id:
342
                file_ids[path] = ie.file_id
343
        return self.mapping.export_fileid_map(file_ids)
344
0.252.4 by Jelmer Vernooij
More work on roundtripping.
345
    def _revision_to_objects(self, rev, tree, roundtrip):
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
346
        """Convert a revision to a set of git objects.
347
348
        :param rev: Bazaar revision object
349
        :param tree: Bazaar revision tree
350
        :param roundtrip: Whether to roundtrip all Bazaar revision data
351
        """
0.200.548 by Jelmer Vernooij
Extract unusual file modes from revision when reconstructing Trees.
352
        unusual_modes = extract_unusual_modes(rev)
0.200.789 by Jelmer Vernooij
Cope with ghosts, cache inventories.
353
        present_parents = self.repository.has_revisions(rev.parent_ids)
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
354
        parent_trees = self.tree_cache.revision_trees(
0.200.797 by Jelmer Vernooij
Add docstring, fix formatting.
355
            [p for p in rev.parent_ids if p in present_parents])
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
356
        root_tree = None
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
357
        for path, obj, ie in _tree_to_objects(tree, parent_trees,
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
358
                self._cache.idmap, unusual_modes, self.mapping.BZR_DUMMY_FILE):
0.200.773 by Jelmer Vernooij
Implement inventory_to_objects
359
            if path == "":
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
360
                root_tree = obj
0.252.34 by Jelmer Vernooij
Yield the proper object for the tree root.
361
                root_ie = ie
362
                # Don't yield just yet
363
            else:
364
                yield path, obj, ie
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
365
        if root_tree is None:
0.250.2 by Jelmer Vernooij
Make it work for evolution.
366
            # Pointless commit - get the tree sha elsewhere
0.200.864 by Jelmer Vernooij
Cope with the first commit being pointless.
367
            if not rev.parent_ids:
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
368
                root_tree = Tree()
0.200.864 by Jelmer Vernooij
Cope with the first commit being pointless.
369
            else:
370
                base_sha1 = self._lookup_revision_sha1(rev.parent_ids[0])
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
371
                root_tree = self[self[base_sha1].tree]
0.252.35 by Jelmer Vernooij
Ignore control files in inventories.
372
            root_ie = tree.inventory.root
0.200.915 by Jelmer Vernooij
Cope with the fact that the old format didn't export file ids.
373
        if roundtrip and self.mapping.BZR_FILE_IDS_FILE is not None:
0.252.49 by Jelmer Vernooij
Avoid trying to set HEAD for remote branches.
374
            b = self._create_fileid_map_blob(tree.inventory)
0.252.23 by Jelmer Vernooij
More work on roundtripping support.
375
            if b is not None:
376
                root_tree[self.mapping.BZR_FILE_IDS_FILE] = ((stat.S_IFREG | 0644), b.id)
377
                yield self.mapping.BZR_FILE_IDS_FILE, b, None
0.252.34 by Jelmer Vernooij
Yield the proper object for the tree root.
378
        yield "", root_tree, root_ie
0.252.43 by Jelmer Vernooij
Some refactoring, support proper file ids in revision deltas.
379
        commit_obj = self._reconstruct_commit(rev, root_tree.id,
380
            roundtrip=roundtrip)
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
381
        try:
0.200.841 by Jelmer Vernooij
Eliminate InventorySHAMap.
382
            foreign_revid, mapping = mapping_registry.parse_revision_id(
383
                rev.revision_id)
0.231.1 by Jelmer Vernooij
Check that regenerated objects have the expected sha1.
384
        except errors.InvalidRevisionId:
385
            pass
386
        else:
0.200.794 by Jelmer Vernooij
Use _check_expected_sha rather than custom checks.
387
            _check_expected_sha(foreign_revid, commit_obj)
0.200.837 by Jelmer Vernooij
Return inventory entries when creating git objects for a revision.
388
        yield None, commit_obj, None
0.200.783 by Jelmer Vernooij
Move object generation into a separate function.
389
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
390
    def _get_updater(self, rev):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
391
        return self._cache.get_updater(rev)
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
392
0.200.783 by Jelmer Vernooij
Move object generation into a separate function.
393
    def _update_sha_map_revision(self, revid):
394
        rev = self.repository.get_revision(revid)
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
395
        tree = self.tree_cache.revision_tree(rev.revision_id)
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
396
        updater = self._get_updater(rev)
0.252.4 by Jelmer Vernooij
More work on roundtripping.
397
        for path, obj, ie in self._revision_to_objects(rev, tree,
398
            roundtrip=True):
0.200.844 by Jelmer Vernooij
Add infrastructure for multiple cache formats.
399
            updater.add_object(obj, ie)
0.200.838 by Jelmer Vernooij
Add convenience object for updating the object store.
400
        commit_obj = updater.finish()
0.200.781 by Jelmer Vernooij
Return commit id after converting a revision.
401
        return commit_obj.id
0.200.229 by Jelmer Vernooij
More work on converter.
402
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
403
    def _reconstruct_blobs(self, keys):
0.200.236 by Jelmer Vernooij
require bzr 1.13.
404
        """Return a Git Blob object from a fileid and revision stored in bzr.
0.200.670 by Jelmer Vernooij
Fix symlinks.
405
0.200.236 by Jelmer Vernooij
require bzr 1.13.
406
        :param fileid: File id of the text
407
        :param revision: Revision of the text
408
        """
0.250.2 by Jelmer Vernooij
Make it work for evolution.
409
        stream = self.repository.iter_files_bytes(
410
            ((key[0], key[1], key) for key in keys))
0.200.856 by Jelmer Vernooij
Support reconstructing multiple blobs at the same time.
411
        for (fileid, revision, expected_sha), chunks in stream:
0.200.854 by Jelmer Vernooij
_get_blob -> _get_blobs.
412
            blob = Blob()
413
            blob.chunked = chunks
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
414
            if blob.id != expected_sha and blob.data == "":
0.200.854 by Jelmer Vernooij
_get_blob -> _get_blobs.
415
                # Perhaps it's a symlink ?
416
                tree = self.tree_cache.revision_tree(revision)
417
                entry = tree.inventory[fileid]
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
418
                if entry.kind == 'symlink':
419
                    blob = symlink_to_blob(entry)
0.200.854 by Jelmer Vernooij
_get_blob -> _get_blobs.
420
            _check_expected_sha(expected_sha, blob)
421
            yield blob
0.200.229 by Jelmer Vernooij
More work on converter.
422
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
423
    def _reconstruct_tree(self, fileid, revid, inv, unusual_modes,
424
        expected_sha=None):
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
425
        """Return a Git Tree object from a file id and a revision stored in bzr.
0.200.249 by Jelmer Vernooij
Implement Tree.
426
0.200.343 by Jelmer Vernooij
Use file ids consistently in map.
427
        :param fileid: fileid in the tree.
0.200.249 by Jelmer Vernooij
Implement Tree.
428
        :param revision: Revision of the tree.
429
        """
0.200.776 by Jelmer Vernooij
Remove unnecessary lookups.
430
        def get_ie_sha1(entry):
431
            if entry.kind == "directory":
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
432
                try:
0.200.859 by Jelmer Vernooij
Trivial cleanups.
433
                    return self._cache.idmap.lookup_tree_id(entry.file_id,
434
                        revid)
0.200.812 by Jelmer Vernooij
Catch KeyError from lookup_tree as well - some caches (such as sqlite) don't store all trees, only some.
435
                except (NotImplementedError, KeyError):
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
436
                    obj = self._reconstruct_tree(entry.file_id, revid, inv,
0.200.808 by Jelmer Vernooij
Avoid recalculating tree shas we already have.
437
                        unusual_modes)
438
                    if obj is None:
439
                        return None
440
                    else:
441
                        return obj.id
0.200.776 by Jelmer Vernooij
Remove unnecessary lookups.
442
            elif entry.kind in ("file", "symlink"):
0.200.868 by Jelmer Vernooij
Cope with no-change merges.
443
                try:
444
                    return self._cache.idmap.lookup_blob_id(entry.file_id,
445
                        entry.revision)
446
                except KeyError:
447
                    # no-change merge?
448
                    return self._reconstruct_blobs(
449
                        [(entry.file_id, entry.revision, None)]).next().id
0.200.776 by Jelmer Vernooij
Remove unnecessary lookups.
450
            else:
451
                raise AssertionError("unknown entry kind '%s'" % entry.kind)
0.252.30 by Jelmer Vernooij
Support creating dummy files for empty directories.
452
        tree = directory_to_tree(inv[fileid], get_ie_sha1, unusual_modes,
453
            self.mapping.BZR_DUMMY_FILE)
0.200.915 by Jelmer Vernooij
Cope with the fact that the old format didn't export file ids.
454
        if (inv.root.file_id == fileid and
455
            self.mapping.BZR_FILE_IDS_FILE is not None):
0.252.49 by Jelmer Vernooij
Avoid trying to set HEAD for remote branches.
456
            b = self._create_fileid_map_blob(inv)
457
            # If this is the root tree, add the file ids
458
            tree[self.mapping.BZR_FILE_IDS_FILE] = ((stat.S_IFREG | 0644), b.id)
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
459
        _check_expected_sha(expected_sha, tree)
0.200.249 by Jelmer Vernooij
Implement Tree.
460
        return tree
0.200.229 by Jelmer Vernooij
More work on converter.
461
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
462
    def get_parents(self, sha):
0.200.454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
463
        """Retrieve the parents of a Git commit by SHA1.
464
465
        :param sha: SHA1 of the commit
466
        :raises: KeyError, NotCommitError
467
        """
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
468
        return self[sha].parents
469
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
470
    def _lookup_revision_sha1(self, revid):
0.200.449 by Jelmer Vernooij
Use BazaarObjectStore to find matching SHA1s for bzr revisions.
471
        """Return the SHA1 matching a Bazaar revision."""
0.200.891 by Jelmer Vernooij
Use ZERO_SHA constant where possible.
472
        from dulwich.protocol import ZERO_SHA
0.200.541 by Jelmer Vernooij
Cope with NULL_REVISION.
473
        if revid == NULL_REVISION:
0.200.891 by Jelmer Vernooij
Use ZERO_SHA constant where possible.
474
            return ZERO_SHA
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
475
        try:
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
476
            return self._cache.idmap.lookup_commit(revid)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
477
        except KeyError:
0.200.682 by Jelmer Vernooij
Avoid doing a full sha map update if we already know the SHA1.
478
            try:
479
                return mapping_registry.parse_revision_id(revid)[0]
480
            except errors.InvalidRevisionId:
0.252.47 by Jelmer Vernooij
Fix handling of HEAD refs.
481
                self.repository.lock_read()
482
                try:
483
                    self._update_sha_map(revid)
484
                finally:
485
                    self.repository.unlock()
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
486
                return self._cache.idmap.lookup_commit(revid)
0.200.364 by Jelmer Vernooij
Reimplement dpush, but more efficient and only writing a single pack file rather than one per revision.
487
0.200.310 by Jelmer Vernooij
Fix pull from remote branches.
488
    def get_raw(self, sha):
0.200.454 by Jelmer Vernooij
Use ObjectStore.find_missing_objects in server.
489
        """Get the raw representation of a Git object by SHA1.
490
491
        :param sha: SHA1 of the git object
492
        """
0.200.566 by Jelmer Vernooij
Fix ObjectStore.get_raw() .
493
        obj = self[sha]
494
        return (obj.type, obj.as_raw_string())
0.200.310 by Jelmer Vernooij
Fix pull from remote branches.
495
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
496
    def __contains__(self, sha):
497
        # See if sha is in map
498
        try:
0.200.897 by Jelmer Vernooij
Make lookup_git_sha public.
499
            (type, type_data) = self.lookup_git_sha(sha)
0.200.568 by Jelmer Vernooij
Properly check that matching bzr objects exist.
500
            if type == "commit":
501
                return self.repository.has_revision(type_data[0])
502
            elif type == "blob":
503
                return self.repository.texts.has_version(type_data)
504
            elif type == "tree":
505
                return self.repository.has_revision(type_data[1])
506
            else:
507
                raise AssertionError("Unknown object type '%s'" % type)
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
508
        except KeyError:
509
            return False
510
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
511
    def lookup_git_shas(self, shas, update_map=True):
512
        ret = {}
513
        for sha in shas:
514
            try:
515
                ret[sha] = self._cache.idmap.lookup_git_sha(sha)
516
            except KeyError:
517
                if update_map:
518
                    # if not, see if there are any unconverted revisions and add
519
                    # them to the map, search for sha in map again
520
                    self._update_sha_map()
521
                    update_map = False
522
                    try:
523
                        ret[sha] = self._cache.idmap.lookup_git_sha(sha)
524
                    except KeyError:
525
                        pass
526
        return ret
527
528
    def lookup_git_sha(self, sha, update_map=True):
529
        return self.lookup_git_shas([sha], update_map=update_map)[sha]
0.200.437 by Jelmer Vernooij
Implement BazaarObjectStore.__contains__, BazaarObjectStore.iter_shas, BazaarObjectStore.get_parents.
530
531
    def __getitem__(self, sha):
0.200.849 by Jelmer Vernooij
Allow cache backends to decide when to add entries rather than adding once per commit.
532
        if self._cache.content_cache is not None:
0.200.840 by Jelmer Vernooij
Support using content cache.
533
            try:
0.200.847 by Jelmer Vernooij
Add BzrGitCache object.
534
                return self._cache.content_cache[sha]
0.200.840 by Jelmer Vernooij
Support using content cache.
535
            except KeyError:
536
                pass
0.200.897 by Jelmer Vernooij
Make lookup_git_sha public.
537
        (type, type_data) = self.lookup_git_sha(sha)
0.200.228 by Jelmer Vernooij
Split out map.
538
        # convert object to git object
0.200.229 by Jelmer Vernooij
More work on converter.
539
        if type == "commit":
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
540
            (revid, tree_sha) = type_data
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
541
            try:
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
542
                rev = self.repository.get_revision(revid)
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
543
            except errors.NoSuchRevision:
0.200.836 by Jelmer Vernooij
Allow content cache.
544
                trace.mutter('entry for %s %s in shamap: %r, but not found in '
545
                             'repository', type, sha, type_data)
0.200.478 by Jelmer Vernooij
Cope with disappeared revisions.
546
                raise KeyError(sha)
0.252.22 by Jelmer Vernooij
Fix file id map (de)serialization.
547
            commit = self._reconstruct_commit(rev, tree_sha, roundtrip=True)
0.200.793 by Jelmer Vernooij
Make _check_expected_sha a global fn.
548
            _check_expected_sha(sha, commit)
0.200.785 by Jelmer Vernooij
Eliminate _get_commit.
549
            return commit
0.200.229 by Jelmer Vernooij
More work on converter.
550
        elif type == "blob":
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
551
            (fileid, revision) = type_data
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
552
            return self._reconstruct_blobs([(fileid, revision, sha)]).next()
0.200.229 by Jelmer Vernooij
More work on converter.
553
        elif type == "tree":
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
554
            (fileid, revid) = type_data
0.200.561 by Jelmer Vernooij
Cope with revisions pointed to by trees in the shamap disappearing.
555
            try:
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
556
                tree = self.tree_cache.revision_tree(revid)
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
557
                rev = self.repository.get_revision(revid)
0.200.561 by Jelmer Vernooij
Cope with revisions pointed to by trees in the shamap disappearing.
558
            except errors.NoSuchRevision:
559
                trace.mutter('entry for %s %s in shamap: %r, but not found in repository', type, sha, type_data)
560
                raise KeyError(sha)
0.200.556 by Jelmer Vernooij
Fix syntax error.
561
            unusual_modes = extract_unusual_modes(rev)
0.200.491 by Jelmer Vernooij
Cope with map for Tree objects becoming invalid.
562
            try:
0.200.855 by Jelmer Vernooij
_get_ -> _reconstruct_.
563
                return self._reconstruct_tree(fileid, revid, tree.inventory,
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
564
                    unusual_modes, expected_sha=sha)
0.200.491 by Jelmer Vernooij
Cope with map for Tree objects becoming invalid.
565
            except errors.NoSuchRevision:
566
                raise KeyError(sha)
0.200.228 by Jelmer Vernooij
Split out map.
567
        else:
568
            raise AssertionError("Unknown object type '%s'" % type)
0.200.782 by Jelmer Vernooij
Add custom generate_pack_contents implementation.
569
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
570
    def generate_lossy_pack_contents(self, have, want, progress=None,
571
            get_tagged=None):
572
        return self.generate_pack_contents(have, want, progress, get_tagged,
573
            lossy=True)
574
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
575
    def generate_pack_contents(self, have, want, progress=None,
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
576
            get_tagged=None, lossy=False):
0.200.782 by Jelmer Vernooij
Add custom generate_pack_contents implementation.
577
        """Iterate over the contents of a pack file.
578
579
        :param have: List of SHA1s of objects that should not be sent
580
        :param want: List of SHA1s of objects that should be sent
581
        """
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
582
        processed = set()
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
583
        ret = self.lookup_git_shas(have + want)
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
584
        for commit_sha in have:
585
            try:
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
586
                (type, (revid, tree_sha)) = ret[commit_sha]
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
587
            except KeyError:
588
                pass
589
            else:
590
                assert type == "commit"
591
                processed.add(revid)
592
        pending = set()
593
        for commit_sha in want:
594
            if commit_sha in have:
595
                continue
0.200.898 by Jelmer Vernooij
Optimize finding of git shas.
596
            try:
597
                (type, (revid, tree_sha)) = ret[commit_sha]
598
            except KeyError:
599
                pass
600
            else:
601
                assert type == "commit"
602
                pending.add(revid)
0.200.899 by Jelmer Vernooij
Add tests for find_missing_bzr_revids.
603
604
        todo = _find_missing_bzr_revids(self.repository.get_parent_map, 
605
                                        pending, processed)
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
606
        trace.mutter('sending revisions %r', todo)
607
        ret = []
608
        pb = ui.ui_factory.nested_progress_bar()
609
        try:
610
            for i, revid in enumerate(todo):
611
                pb.update("generating git objects", i, len(todo))
612
                rev = self.repository.get_revision(revid)
0.200.852 by Jelmer Vernooij
Cache trees rather than inventories.
613
                tree = self.tree_cache.revision_tree(revid)
0.252.37 by Jelmer Vernooij
Factor out some common code for finding refs to send.
614
                for path, obj, ie in self._revision_to_objects(rev, tree,
615
                    roundtrip=not lossy):
0.200.787 by Jelmer Vernooij
Implement custom ObjectWalker.generate_pack_contents.
616
                    ret.append((obj, path))
617
        finally:
618
            pb.finished()
619
        return ret
0.251.1 by Jelmer Vernooij
Implement ObjectStore.add_{thin_,}pack.
620
621
    def add_thin_pack(self):
622
        import tempfile
623
        import os
624
        fd, path = tempfile.mkstemp(suffix=".pack")
625
        f = os.fdopen(fd, 'wb')
626
        def commit():
627
            from dulwich.pack import PackData, Pack
628
            from bzrlib.plugins.git.fetch import import_git_objects
629
            os.fsync(fd)
630
            f.close()
631
            if os.path.getsize(path) == 0:
632
                return
633
            pd = PackData(path)
634
            pd.create_index_v2(path[:-5]+".idx", self.object_store.get_raw)
635
636
            p = Pack(path[:-5])
637
            self.repository.lock_write()
638
            try:
639
                self.repository.start_write_group()
640
                try:
641
                    import_git_objects(self.repository, self.mapping, 
642
                        p.iterobjects(get_raw=self.get_raw),
643
                        self.object_store)
644
                except:
645
                    self.repository.abort_write_group()
646
                    raise
647
                else:
648
                    self.repository.commit_write_group()
649
            finally:
650
                self.repository.unlock()
651
        return f, commit
652
653
    # The pack isn't kept around anyway, so no point 
654
    # in treating full packs different from thin packs
655
    add_pack = add_thin_pack