1051
1054
@needs_write_lock
1052
1055
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1053
1056
signature = gpg_strategy.sign(plaintext)
1057
self.add_signature_text(revision_id, signature)
1060
def add_signature_text(self, revision_id, signature):
1054
1061
self._revision_store.add_revision_signature_text(revision_id,
1056
1063
self.get_transaction())
1058
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1060
"""Helper routine for fileids_altered_by_revision_ids.
1065
def find_text_key_references(self):
1066
"""Find the text key references within the repository.
1068
:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1069
revision_ids. Each altered file-ids has the exact revision_ids that
1070
altered it listed explicitly.
1071
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1072
to whether they were referred to by the inventory of the
1073
revision_id that they contain. The inventory texts from all present
1074
revision ids are assessed to generate this report.
1076
revision_ids = self.all_revision_ids()
1077
w = self.get_inventory_weave()
1078
pb = ui.ui_factory.nested_progress_bar()
1080
return self._find_text_key_references_from_xml_inventory_lines(
1081
w.iter_lines_added_or_present_in_versions(revision_ids, pb=pb))
1085
def _find_text_key_references_from_xml_inventory_lines(self,
1087
"""Core routine for extracting references to texts from inventories.
1062
1089
This performs the translation of xml lines to revision ids.
1064
1091
:param line_iterator: An iterator of lines, origin_version_id
1065
:param revision_ids: The revision ids to filter for. This should be a
1066
set or other type which supports efficient __contains__ lookups, as
1067
the revision id from each parsed line will be looked up in the
1068
revision_ids filter.
1069
:return: a dictionary mapping altered file-ids to an iterable of
1070
revision_ids. Each altered file-ids has the exact revision_ids that
1071
altered it listed explicitly.
1092
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1093
to whether they were referred to by the inventory of the
1094
revision_id that they contain. Note that if that revision_id was
1095
not part of the line_iterator's output then False will be given -
1096
even though it may actually refer to that key.
1098
if not self._serializer.support_altered_by_hack:
1099
raise AssertionError(
1100
"_find_text_key_references_from_xml_inventory_lines only "
1101
"supported for branches which store inventory as unnested xml"
1102
", not on %r" % self)
1075
1105
# this code needs to read every new line in every inventory for the
1115
1145
unescape_revid_cache[revision_id] = unescaped
1116
1146
revision_id = unescaped
1148
# Note that unconditionally unescaping means that we deserialise
1149
# every fileid, which for general 'pull' is not great, but we don't
1150
# really want to have some many fulltexts that this matters anyway.
1153
file_id = unescape_fileid_cache[file_id]
1155
unescaped = unescape(file_id)
1156
unescape_fileid_cache[file_id] = unescaped
1159
key = (file_id, revision_id)
1160
setdefault(key, False)
1161
if revision_id == version_id:
1165
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1167
"""Helper routine for fileids_altered_by_revision_ids.
1169
This performs the translation of xml lines to revision ids.
1171
:param line_iterator: An iterator of lines, origin_version_id
1172
:param revision_ids: The revision ids to filter for. This should be a
1173
set or other type which supports efficient __contains__ lookups, as
1174
the revision id from each parsed line will be looked up in the
1175
revision_ids filter.
1176
:return: a dictionary mapping altered file-ids to an iterable of
1177
revision_ids. Each altered file-ids has the exact revision_ids that
1178
altered it listed explicitly.
1181
setdefault = result.setdefault
1182
for file_id, revision_id in \
1183
self._find_text_key_references_from_xml_inventory_lines(
1184
line_iterator).iterkeys():
1118
1185
# once data is all ensured-consistent; then this is
1119
1186
# if revision_id == version_id
1120
1187
if revision_id in revision_ids:
1122
file_id = unescape_fileid_cache[file_id]
1124
unescaped = unescape(file_id)
1125
unescape_fileid_cache[file_id] = unescaped
1127
1188
setdefault(file_id, set()).add(revision_id)
1176
1234
raise errors.NoSuchIdInRepository(self, file_id)
1177
1235
yield callable_data, weave.get_lines(revision_id)
1237
def _generate_text_key_index(self):
1238
"""Generate a new text key index for the repository.
1240
This is an expensive function that will take considerable time to run.
1242
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1243
list of parents, also text keys. When a given key has no parents,
1244
the parents list will be [NULL_REVISION].
1246
# All revisions, to find inventory parents.
1247
revision_graph = self.get_revision_graph_with_ghosts()
1248
ancestors = revision_graph.get_ancestors()
1249
text_key_references = self.find_text_key_references()
1250
pb = ui.ui_factory.nested_progress_bar()
1252
return self._do_generate_text_key_index(ancestors,
1253
text_key_references, pb)
1257
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1258
"""Helper for _generate_text_key_index to avoid deep nesting."""
1259
revision_order = tsort.topo_sort(ancestors)
1260
invalid_keys = set()
1262
for revision_id in revision_order:
1263
revision_keys[revision_id] = set()
1264
text_count = len(text_key_references)
1265
# a cache of the text keys to allow reuse; costs a dict of all the
1266
# keys, but saves a 2-tuple for every child of a given key.
1268
for text_key, valid in text_key_references.iteritems():
1270
invalid_keys.add(text_key)
1272
revision_keys[text_key[1]].add(text_key)
1273
text_key_cache[text_key] = text_key
1274
del text_key_references
1276
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1277
NULL_REVISION = _mod_revision.NULL_REVISION
1278
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1279
# too small for large or very branchy trees. However, for 55K path
1280
# trees, it would be easy to use too much memory trivially. Ideally we
1281
# could gauge this by looking at available real memory etc, but this is
1282
# always a tricky proposition.
1283
inventory_cache = lru_cache.LRUCache(10)
1284
batch_size = 10 # should be ~150MB on a 55K path tree
1285
batch_count = len(revision_order) / batch_size + 1
1287
pb.update("Calculating text parents.", processed_texts, text_count)
1288
for offset in xrange(batch_count):
1289
to_query = revision_order[offset * batch_size:(offset + 1) *
1293
for rev_tree in self.revision_trees(to_query):
1294
revision_id = rev_tree.get_revision_id()
1295
parent_ids = ancestors[revision_id]
1296
for text_key in revision_keys[revision_id]:
1297
pb.update("Calculating text parents.", processed_texts)
1298
processed_texts += 1
1299
candidate_parents = []
1300
for parent_id in parent_ids:
1301
parent_text_key = (text_key[0], parent_id)
1303
check_parent = parent_text_key not in \
1304
revision_keys[parent_id]
1306
# the parent parent_id is a ghost:
1307
check_parent = False
1308
# truncate the derived graph against this ghost.
1309
parent_text_key = None
1311
# look at the parent commit details inventories to
1312
# determine possible candidates in the per file graph.
1315
inv = inventory_cache[parent_id]
1317
inv = self.revision_tree(parent_id).inventory
1318
inventory_cache[parent_id] = inv
1319
parent_entry = inv._byid.get(text_key[0], None)
1320
if parent_entry is not None:
1322
text_key[0], parent_entry.revision)
1324
parent_text_key = None
1325
if parent_text_key is not None:
1326
candidate_parents.append(
1327
text_key_cache[parent_text_key])
1328
parent_heads = text_graph.heads(candidate_parents)
1329
new_parents = list(parent_heads)
1330
new_parents.sort(key=lambda x:candidate_parents.index(x))
1331
if new_parents == []:
1332
new_parents = [NULL_REVISION]
1333
text_index[text_key] = new_parents
1335
for text_key in invalid_keys:
1336
text_index[text_key] = [NULL_REVISION]
1179
1339
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1180
1340
"""Get an iterable listing the keys of all the data introduced by a set
1181
1341
of revision IDs.
1629
1790
def install_revision(repository, rev, revision_tree):
1630
1791
"""Install all revision data into a repository."""
1792
install_revisions(repository, [(rev, revision_tree, None)])
1795
def install_revisions(repository, iterable):
1796
"""Install all revision data into a repository.
1798
Accepts an iterable of revision, tree, signature tuples. The signature
1631
1801
repository.start_write_group()
1633
_install_revision(repository, rev, revision_tree)
1803
for revision, revision_tree, signature in iterable:
1804
_install_revision(repository, revision, revision_tree, signature)
1635
1806
repository.abort_write_group()
2495
2685
return f.count_copied, f.failed_revisions
2688
class InterDifferingSerializer(InterKnitRepo):
2691
def _get_repo_format_to_test(self):
2695
def is_compatible(source, target):
2696
"""Be compatible with Knit2 source and Knit3 target"""
2697
if source.supports_rich_root() != target.supports_rich_root():
2699
# Ideally, we'd support fetching if the source had no tree references
2700
# even if it supported them...
2701
if (getattr(source, '_format.supports_tree_reference', False) and
2702
not getattr(target, '_format.supports_tree_reference', False)):
2707
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2708
"""See InterRepository.fetch()."""
2709
revision_ids = self.target.missing_revision_ids(self.source,
2711
def revisions_iterator():
2712
for current_revision_id in revision_ids:
2713
revision = self.source.get_revision(current_revision_id)
2714
tree = self.source.revision_tree(current_revision_id)
2716
signature = self.source.get_signature_text(
2717
current_revision_id)
2718
except errors.NoSuchRevision:
2720
yield revision, tree, signature
2721
install_revisions(self.target, revisions_iterator())
2722
return len(revision_ids), 0
2498
2725
class InterRemoteToOther(InterRepository):
2500
2727
def __init__(self, source, target):
2656
2884
return _unescape_re.sub(_unescaper, data)
2659
class _RevisionTextVersionCache(object):
2660
"""A cache of the versionedfile versions for revision and file-id."""
2887
class VersionedFileChecker(object):
2662
2889
def __init__(self, repository):
2663
2890
self.repository = repository
2664
self.revision_versions = {}
2665
self.revision_parents = {}
2666
self.repo_graph = self.repository.get_graph()
2667
# XXX: RBC: I haven't tracked down what uses this, but it would be
2668
# better to use the headscache directly I think.
2669
self.heads = graph.HeadsCache(self.repo_graph).heads
2671
def add_revision_text_versions(self, tree):
2672
"""Cache text version data from the supplied revision tree"""
2674
for path, entry in tree.iter_entries_by_dir():
2675
inv_revisions[entry.file_id] = entry.revision
2676
self.revision_versions[tree.get_revision_id()] = inv_revisions
2677
return inv_revisions
2679
def get_text_version(self, file_id, revision_id):
2680
"""Determine the text version for a given file-id and revision-id"""
2682
inv_revisions = self.revision_versions[revision_id]
2685
tree = self.repository.revision_tree(revision_id)
2686
except errors.RevisionNotPresent:
2687
self.revision_versions[revision_id] = inv_revisions = {}
2689
inv_revisions = self.add_revision_text_versions(tree)
2690
return inv_revisions.get(file_id)
2692
def prepopulate_revs(self, revision_ids):
2693
# Filter out versions that we don't have an inventory for, so that the
2694
# revision_trees() call won't fail.
2695
inv_weave = self.repository.get_inventory_weave()
2696
revs = [r for r in revision_ids if inv_weave.has_version(r)]
2697
# XXX: this loop is very similar to
2698
# bzrlib.fetch.Inter1and2Helper.iter_rev_trees.
2700
mutter('%d revisions left to prepopulate', len(revs))
2701
for tree in self.repository.revision_trees(revs[:100]):
2702
if tree.inventory.revision_id is None:
2703
tree.inventory.revision_id = tree.get_revision_id()
2704
self.add_revision_text_versions(tree)
2707
def get_parents(self, revision_id):
2709
return self.revision_parents[revision_id]
2711
parents = self.repository.get_parents([revision_id])[0]
2712
self.revision_parents[revision_id] = parents
2715
def used_file_versions(self):
2716
"""Return a set of (revision_id, file_id) pairs for each file version
2717
referenced by any inventory cached by this _RevisionTextVersionCache.
2719
If the entire repository has been cached, this can be used to find all
2720
file versions that are actually referenced by inventories. Thus any
2721
other file version is completely unused and can be removed safely.
2724
for inventory_summary in self.revision_versions.itervalues():
2725
result.update(inventory_summary.items())
2729
class VersionedFileChecker(object):
2731
def __init__(self, planned_revisions, revision_versions, repository):
2732
self.planned_revisions = planned_revisions
2733
self.revision_versions = revision_versions
2734
self.repository = repository
2891
self.text_index = self.repository._generate_text_key_index()
2736
2893
def calculate_file_version_parents(self, revision_id, file_id):
2737
2894
"""Calculate the correct parents for a file version according to
2738
2895
the inventories.
2740
text_revision = self.revision_versions.get_text_version(
2741
file_id, revision_id)
2742
if text_revision is None:
2744
parents_of_text_revision = self.revision_versions.get_parents(
2746
parents_from_inventories = []
2747
for parent in parents_of_text_revision:
2748
if parent == _mod_revision.NULL_REVISION:
2750
introduced_in = self.revision_versions.get_text_version(file_id,
2752
if introduced_in is not None:
2753
parents_from_inventories.append(introduced_in)
2754
heads = set(self.revision_versions.heads(parents_from_inventories))
2756
for parent in parents_from_inventories:
2757
if parent in heads and parent not in new_parents:
2758
new_parents.append(parent)
2759
return tuple(new_parents)
2897
parent_keys = self.text_index[(file_id, revision_id)]
2898
if parent_keys == [_mod_revision.NULL_REVISION]:
2900
# strip the file_id, for the weave api
2901
return tuple([revision_id for file_id, revision_id in parent_keys])
2761
def check_file_version_parents(self, weave, file_id):
2903
def check_file_version_parents(self, weave, file_id, planned_revisions):
2762
2904
"""Check the parents stored in a versioned file are correct.
2764
2906
It also detects file versions that are not referenced by their
2772
2914
file, but not used by the corresponding inventory.
2774
2916
wrong_parents = {}
2775
dangling_file_versions = set()
2776
for num, revision_id in enumerate(self.planned_revisions):
2777
correct_parents = self.calculate_file_version_parents(
2778
revision_id, file_id)
2779
if correct_parents is None:
2781
text_revision = self.revision_versions.get_text_version(
2782
file_id, revision_id)
2917
unused_versions = set()
2918
for num, revision_id in enumerate(planned_revisions):
2784
knit_parents = tuple(weave.get_parents(revision_id))
2785
except errors.RevisionNotPresent:
2787
if text_revision != revision_id:
2788
# This file version is not referenced by its corresponding
2790
dangling_file_versions.add((file_id, revision_id))
2791
if correct_parents != knit_parents:
2792
wrong_parents[revision_id] = (knit_parents, correct_parents)
2793
return wrong_parents, dangling_file_versions
2920
correct_parents = self.calculate_file_version_parents(
2921
revision_id, file_id)
2923
# we were asked to investigate a non-existant version.
2924
unused_versions.add(revision_id)
2927
knit_parents = tuple(weave.get_parents(revision_id))
2928
except errors.RevisionNotPresent:
2930
if correct_parents != knit_parents:
2931
wrong_parents[revision_id] = (knit_parents, correct_parents)
2932
return wrong_parents, unused_versions