/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to object_store.py

Cache trees rather than inventories.

Show diffs side-by-side

added added

removed removed

Lines of Context:
55
55
    return BazaarObjectStore(repo, mapping)
56
56
 
57
57
 
58
 
MAX_INV_CACHE_SIZE = 50 * 1024 * 1024
59
 
 
60
 
 
61
 
class LRUInventoryCache(object):
 
58
MAX_TREE_CACHE_SIZE = 50 * 1024 * 1024
 
59
 
 
60
 
 
61
class LRUTreeCache(object):
62
62
 
63
63
    def __init__(self, repository):
64
 
        def approx_inv_size(inv):
 
64
        def approx_tree_size(tree):
65
65
            # Very rough estimate, 1k per inventory entry
66
 
            return len(inv) * 1024
 
66
            return len(tree.inventory) * 1024
67
67
        self.repository = repository
68
 
        self._cache = lru_cache.LRUSizeCache(max_size=MAX_INV_CACHE_SIZE,
69
 
            after_cleanup_size=None, compute_size=approx_inv_size)
 
68
        self._cache = lru_cache.LRUSizeCache(max_size=MAX_TREE_CACHE_SIZE,
 
69
            after_cleanup_size=None, compute_size=approx_tree_size)
70
70
 
71
 
    def get_inventory(self, revid):            
 
71
    def revision_tree(self, revid):            
72
72
        try:
73
73
            return self._cache[revid] 
74
74
        except KeyError:
75
 
            inv = self.repository.get_inventory(revid)
76
 
            self._cache.add(revid, inv)
77
 
            return inv
78
 
 
79
 
    def iter_inventories(self, revids):
80
 
        invs = dict([(k, self._cache.get(k)) for k in revids]) 
81
 
        for inv in self.repository.iter_inventories(
82
 
                [r for r, v in invs.iteritems() if v is None]):
83
 
            invs[inv.revision_id] = inv
84
 
            self._cache.add(inv.revision_id, inv)
85
 
        return (invs[r] for r in revids)
86
 
 
87
 
    def get_inventories(self, revids):
88
 
        return list(self.iter_inventories(revids))
89
 
 
90
 
    def add(self, revid, inv):
91
 
        self._cache.add(revid, inv)
 
75
            tree = self.repository.revision_tree(revid)
 
76
            self.add(tree)
 
77
            return tree
 
78
 
 
79
    def iter_revision_trees(self, revids):
 
80
        trees = dict([(k, self._cache.get(k)) for k in revids]) 
 
81
        for tree in self.repository.revision_trees(
 
82
                [r for r, v in trees.iteritems() if v is None]):
 
83
            trees[tree.get_revision_id()] = tree
 
84
            self.add(tree)
 
85
        return (trees[r] for r in revids)
 
86
 
 
87
    def revision_trees(self, revids):
 
88
        return list(self.iter_revision_trees(revids))
 
89
 
 
90
    def add(self, tree):
 
91
        self._cache.add(tree.get_revision_id(), tree)
92
92
 
93
93
 
94
94
def _check_expected_sha(expected_sha, object):
112
112
            expected_sha))
113
113
 
114
114
 
115
 
def _inventory_to_objects(inv, parent_invs, idmap,
 
115
def _tree_to_objects(tree, parent_trees, idmap,
116
116
        unusual_modes, iter_files_bytes, has_ghost_parents):
117
117
    """Iterate over the objects that were introduced in a revision.
118
118
 
119
 
    :param inv: Inventory to process
120
 
    :param parent_invs: parent inventory SHA maps
121
119
    :param idmap: id map
122
120
    :param unusual_modes: Unusual file modes
123
121
    :param iter_files_bytes: Repository.iter_files_bytes-like callback
124
122
    :return: Yields (path, object, ie) entries
125
123
    """
 
124
    inv = tree.inventory
 
125
    parent_invs = [t.inventory for t in parent_trees]
126
126
    new_trees = {}
127
127
    new_blobs = []
128
128
    shamap = {}
235
235
        self.start_write_group = self._cache.idmap.start_write_group
236
236
        self.abort_write_group = self._cache.idmap.abort_write_group
237
237
        self.commit_write_group = self._cache.idmap.commit_write_group
238
 
        self.parent_invs_cache = LRUInventoryCache(self.repository)
 
238
        self.tree_cache = LRUTreeCache(self.repository)
239
239
 
240
240
    def _update_sha_map(self, stop_revision=None):
241
241
        graph = self.repository.get_graph()
285
285
                return None
286
286
        return self.mapping.export_commit(rev, tree_sha, parent_lookup)
287
287
 
288
 
    def _revision_to_objects(self, rev, inv):
 
288
    def _revision_to_objects(self, rev, tree):
289
289
        unusual_modes = extract_unusual_modes(rev)
290
290
        present_parents = self.repository.has_revisions(rev.parent_ids)
291
291
        has_ghost_parents = (len(rev.parent_ids) < len(present_parents))
292
 
        parent_invs = self.parent_invs_cache.get_inventories(
 
292
        parent_trees = self.tree_cache.revision_trees(
293
293
            [p for p in rev.parent_ids if p in present_parents])
294
294
        tree_sha = None
295
 
        for path, obj, ie in _inventory_to_objects(inv, parent_invs,
 
295
        for path, obj, ie in _tree_to_objects(tree, parent_trees,
296
296
                self._cache.idmap, unusual_modes,
297
297
                self.repository.iter_files_bytes, has_ghost_parents):
298
298
            yield path, obj, ie
318
318
 
319
319
    def _update_sha_map_revision(self, revid):
320
320
        rev = self.repository.get_revision(revid)
321
 
        inv = self.parent_invs_cache.get_inventory(rev.revision_id)
 
321
        tree = self.tree_cache.revision_tree(rev.revision_id)
322
322
        updater = self._get_updater(rev)
323
 
        for path, obj, ie in self._revision_to_objects(rev, inv):
 
323
        for path, obj, ie in self._revision_to_objects(rev, tree):
324
324
            updater.add_object(obj, ie)
325
325
        commit_obj = updater.finish()
326
326
        return commit_obj.id
336
336
        blob.chunked = chunks
337
337
        if blob.id != expected_sha:
338
338
            # Perhaps it's a symlink ?
339
 
            inv = self.parent_invs_cache.get_inventory(revision)
340
 
            entry = inv[fileid]
 
339
            tree = self.tree_cache.revision_tree(revision)
 
340
            entry = tree.inventory[fileid]
341
341
            assert entry.kind == 'symlink'
342
342
            blob = symlink_to_blob(entry)
343
343
        _check_expected_sha(expected_sha, blob)
447
447
        elif type == "tree":
448
448
            (fileid, revid) = type_data
449
449
            try:
450
 
                inv = self.parent_invs_cache.get_inventory(revid)
 
450
                tree = self.tree_cache.revision_tree(revid)
451
451
                rev = self.repository.get_revision(revid)
452
452
            except errors.NoSuchRevision:
453
453
                trace.mutter('entry for %s %s in shamap: %r, but not found in repository', type, sha, type_data)
454
454
                raise KeyError(sha)
455
455
            unusual_modes = extract_unusual_modes(rev)
456
456
            try:
457
 
                return self._get_tree(fileid, revid, inv, unusual_modes,
458
 
                    expected_sha=sha)
 
457
                return self._get_tree(fileid, revid, tree.inventory,
 
458
                    unusual_modes, expected_sha=sha)
459
459
            except errors.NoSuchRevision:
460
460
                raise KeyError(sha)
461
461
        else:
501
501
            for i, revid in enumerate(todo):
502
502
                pb.update("generating git objects", i, len(todo))
503
503
                rev = self.repository.get_revision(revid)
504
 
                inv = self.parent_invs_cache.get_inventory(revid)
505
 
                for path, obj, ie in self._revision_to_objects(rev, inv):
 
504
                tree = self.tree_cache.revision_tree(revid)
 
505
                for path, obj, ie in self._revision_to_objects(rev, tree):
506
506
                    ret.append((obj, path))
507
507
        finally:
508
508
            pb.finished()