55
55
return BazaarObjectStore(repo, mapping)
58
MAX_INV_CACHE_SIZE = 50 * 1024 * 1024
61
class LRUInventoryCache(object):
58
MAX_TREE_CACHE_SIZE = 50 * 1024 * 1024
61
class LRUTreeCache(object):
63
63
def __init__(self, repository):
64
def approx_inv_size(inv):
64
def approx_tree_size(tree):
65
65
# Very rough estimate, 1k per inventory entry
66
return len(inv) * 1024
66
return len(tree.inventory) * 1024
67
67
self.repository = repository
68
self._cache = lru_cache.LRUSizeCache(max_size=MAX_INV_CACHE_SIZE,
69
after_cleanup_size=None, compute_size=approx_inv_size)
68
self._cache = lru_cache.LRUSizeCache(max_size=MAX_TREE_CACHE_SIZE,
69
after_cleanup_size=None, compute_size=approx_tree_size)
71
def get_inventory(self, revid):
71
def revision_tree(self, revid):
73
73
return self._cache[revid]
75
inv = self.repository.get_inventory(revid)
76
self._cache.add(revid, inv)
79
def iter_inventories(self, revids):
80
invs = dict([(k, self._cache.get(k)) for k in revids])
81
for inv in self.repository.iter_inventories(
82
[r for r, v in invs.iteritems() if v is None]):
83
invs[inv.revision_id] = inv
84
self._cache.add(inv.revision_id, inv)
85
return (invs[r] for r in revids)
87
def get_inventories(self, revids):
88
return list(self.iter_inventories(revids))
90
def add(self, revid, inv):
91
self._cache.add(revid, inv)
75
tree = self.repository.revision_tree(revid)
79
def iter_revision_trees(self, revids):
80
trees = dict([(k, self._cache.get(k)) for k in revids])
81
for tree in self.repository.revision_trees(
82
[r for r, v in trees.iteritems() if v is None]):
83
trees[tree.get_revision_id()] = tree
85
return (trees[r] for r in revids)
87
def revision_trees(self, revids):
88
return list(self.iter_revision_trees(revids))
91
self._cache.add(tree.get_revision_id(), tree)
94
94
def _check_expected_sha(expected_sha, object):
115
def _inventory_to_objects(inv, parent_invs, idmap,
115
def _tree_to_objects(tree, parent_trees, idmap,
116
116
unusual_modes, iter_files_bytes, has_ghost_parents):
117
117
"""Iterate over the objects that were introduced in a revision.
119
:param inv: Inventory to process
120
:param parent_invs: parent inventory SHA maps
121
119
:param idmap: id map
122
120
:param unusual_modes: Unusual file modes
123
121
:param iter_files_bytes: Repository.iter_files_bytes-like callback
124
122
:return: Yields (path, object, ie) entries
125
parent_invs = [t.inventory for t in parent_trees]
235
235
self.start_write_group = self._cache.idmap.start_write_group
236
236
self.abort_write_group = self._cache.idmap.abort_write_group
237
237
self.commit_write_group = self._cache.idmap.commit_write_group
238
self.parent_invs_cache = LRUInventoryCache(self.repository)
238
self.tree_cache = LRUTreeCache(self.repository)
240
240
def _update_sha_map(self, stop_revision=None):
241
241
graph = self.repository.get_graph()
286
286
return self.mapping.export_commit(rev, tree_sha, parent_lookup)
288
def _revision_to_objects(self, rev, inv):
288
def _revision_to_objects(self, rev, tree):
289
289
unusual_modes = extract_unusual_modes(rev)
290
290
present_parents = self.repository.has_revisions(rev.parent_ids)
291
291
has_ghost_parents = (len(rev.parent_ids) < len(present_parents))
292
parent_invs = self.parent_invs_cache.get_inventories(
292
parent_trees = self.tree_cache.revision_trees(
293
293
[p for p in rev.parent_ids if p in present_parents])
295
for path, obj, ie in _inventory_to_objects(inv, parent_invs,
295
for path, obj, ie in _tree_to_objects(tree, parent_trees,
296
296
self._cache.idmap, unusual_modes,
297
297
self.repository.iter_files_bytes, has_ghost_parents):
298
298
yield path, obj, ie
319
319
def _update_sha_map_revision(self, revid):
320
320
rev = self.repository.get_revision(revid)
321
inv = self.parent_invs_cache.get_inventory(rev.revision_id)
321
tree = self.tree_cache.revision_tree(rev.revision_id)
322
322
updater = self._get_updater(rev)
323
for path, obj, ie in self._revision_to_objects(rev, inv):
323
for path, obj, ie in self._revision_to_objects(rev, tree):
324
324
updater.add_object(obj, ie)
325
325
commit_obj = updater.finish()
326
326
return commit_obj.id
336
336
blob.chunked = chunks
337
337
if blob.id != expected_sha:
338
338
# Perhaps it's a symlink ?
339
inv = self.parent_invs_cache.get_inventory(revision)
339
tree = self.tree_cache.revision_tree(revision)
340
entry = tree.inventory[fileid]
341
341
assert entry.kind == 'symlink'
342
342
blob = symlink_to_blob(entry)
343
343
_check_expected_sha(expected_sha, blob)
447
447
elif type == "tree":
448
448
(fileid, revid) = type_data
450
inv = self.parent_invs_cache.get_inventory(revid)
450
tree = self.tree_cache.revision_tree(revid)
451
451
rev = self.repository.get_revision(revid)
452
452
except errors.NoSuchRevision:
453
453
trace.mutter('entry for %s %s in shamap: %r, but not found in repository', type, sha, type_data)
454
454
raise KeyError(sha)
455
455
unusual_modes = extract_unusual_modes(rev)
457
return self._get_tree(fileid, revid, inv, unusual_modes,
457
return self._get_tree(fileid, revid, tree.inventory,
458
unusual_modes, expected_sha=sha)
459
459
except errors.NoSuchRevision:
460
460
raise KeyError(sha)
501
501
for i, revid in enumerate(todo):
502
502
pb.update("generating git objects", i, len(todo))
503
503
rev = self.repository.get_revision(revid)
504
inv = self.parent_invs_cache.get_inventory(revid)
505
for path, obj, ie in self._revision_to_objects(rev, inv):
504
tree = self.tree_cache.revision_tree(revid)
505
for path, obj, ie in self._revision_to_objects(rev, tree):
506
506
ret.append((obj, path))