73
60
raise NotImplementedError(self.report_results)
76
class VersionedFileCheck(Check):
77
"""Check a versioned file repository"""
79
# The Check object interacts with InventoryEntry.check, etc.
81
def __init__(self, repository, check_repo=True):
82
self.repository = repository
83
self.checked_rev_cnt = 0
85
self.missing_parent_links = {}
86
self.missing_inventory_sha_cnt = 0
87
self.missing_revision_cnt = 0
88
self.checked_weaves = set()
89
self.unreferenced_versions = set()
90
self.inconsistent_parents = []
91
self.rich_roots = repository.supports_rich_root()
92
self.text_key_references = {}
93
self.check_repo = check_repo
94
self.other_results = []
95
# Plain text lines to include in the report
96
self._report_items = []
97
# Keys we are looking for; may be large and need spilling to disk.
98
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
99
self.pending_keys = {}
100
# Ancestors map for all of revisions being checked; while large helper
101
# functions we call would create it anyway, so better to have once and
105
def check(self, callback_refs=None, check_repo=True):
106
if callback_refs is None:
108
with self.repository.lock_read(), ui.ui_factory.nested_progress_bar() as self.progress:
109
self.progress.update(gettext('check'), 0, 4)
111
self.progress.update(gettext('checking revisions'), 0)
112
self.check_revisions()
113
self.progress.update(gettext('checking commit contents'), 1)
114
self.repository._check_inventories(self)
115
self.progress.update(gettext('checking file graphs'), 2)
116
# check_weaves is done after the revision scan so that
117
# revision index is known to be valid.
119
self.progress.update(gettext('checking branches and trees'), 3)
121
repo = self.repository
122
# calculate all refs, and callback the objects requesting them.
124
wanting_items = set()
125
# Current crude version calculates everything and calls
126
# everything at once. Doing a queue and popping as things are
127
# satisfied would be cheaper on memory [but few people have
128
# huge numbers of working trees today. TODO: fix before
132
for ref, wantlist in viewitems(callback_refs):
133
wanting_items.update(wantlist)
136
refs[ref] = repo.revision_tree(value)
137
elif kind == 'lefthand-distance':
139
elif kind == 'revision-existence':
140
existences.add(value)
142
raise AssertionError(
143
'unknown ref kind for ref %s' % ref)
144
node_distances = repo.get_graph().find_lefthand_distances(distances)
145
for key, distance in viewitems(node_distances):
146
refs[('lefthand-distance', key)] = distance
147
if key in existences and distance > 0:
148
refs[('revision-existence', key)] = True
149
existences.remove(key)
150
parent_map = repo.get_graph().get_parent_map(existences)
151
for key in parent_map:
152
refs[('revision-existence', key)] = True
153
existences.remove(key)
154
for key in existences:
155
refs[('revision-existence', key)] = False
156
for item in wanting_items:
157
if isinstance(item, WorkingTree):
159
if isinstance(item, Branch):
160
self.other_results.append(item.check(refs))
162
def _check_revisions(self, revisions_iterator):
163
"""Check revision objects by decorating a generator.
165
:param revisions_iterator: An iterator of(revid, Revision-or-None).
166
:return: A generator of the contents of revisions_iterator.
168
self.planned_revisions = set()
169
for revid, revision in revisions_iterator:
170
yield revid, revision
171
self._check_one_rev(revid, revision)
172
# Flatten the revisions we found to guarantee consistent later
174
self.planned_revisions = list(self.planned_revisions)
175
# TODO: extract digital signatures as items to callback on too.
177
def check_revisions(self):
178
"""Scan revisions, checking data directly available as we go."""
179
revision_iterator = self.repository.iter_revisions(
180
self.repository.all_revision_ids())
181
revision_iterator = self._check_revisions(revision_iterator)
182
# We read the all revisions here:
183
# - doing this allows later code to depend on the revision index.
184
# - we can fill out existence flags at this point
185
# - we can read the revision inventory sha at this point
186
# - we can check properties and serialisers etc.
187
if not self.repository._format.revision_graph_can_have_wrong_parents:
188
# The check against the index isn't needed.
189
self.revs_with_bad_parents_in_index = None
190
for thing in revision_iterator:
193
bad_revisions = self.repository._find_inconsistent_revision_parents(
195
self.revs_with_bad_parents_in_index = list(bad_revisions)
197
def report_results(self, verbose):
199
self._report_repo_results(verbose)
200
for result in self.other_results:
201
result.report_results(verbose)
203
def _report_repo_results(self, verbose):
204
note(gettext('checked repository {0} format {1}').format(
205
self.repository.user_url,
206
self.repository._format))
207
note(gettext('%6d revisions'), self.checked_rev_cnt)
208
note(gettext('%6d file-ids'), len(self.checked_weaves))
210
note(gettext('%6d unreferenced text versions'),
211
len(self.unreferenced_versions))
212
if verbose and len(self.unreferenced_versions):
213
for file_id, revision_id in self.unreferenced_versions:
214
note(gettext('unreferenced version: {{{0}}} in {1}').format(revision_id,
216
if self.missing_inventory_sha_cnt:
217
note(gettext('%6d revisions are missing inventory_sha1'),
218
self.missing_inventory_sha_cnt)
219
if self.missing_revision_cnt:
220
note(gettext('%6d revisions are mentioned but not present'),
221
self.missing_revision_cnt)
223
note(gettext('%6d ghost revisions'), len(self.ghosts))
225
for ghost in self.ghosts:
227
if len(self.missing_parent_links):
228
note(gettext('%6d revisions missing parents in ancestry'),
229
len(self.missing_parent_links))
231
for link, linkers in viewitems(self.missing_parent_links):
232
note(gettext(' %s should be in the ancestry for:'), link)
233
for linker in linkers:
234
note(' * %s', linker)
235
if len(self.inconsistent_parents):
236
note(gettext('%6d inconsistent parents'), len(self.inconsistent_parents))
238
for info in self.inconsistent_parents:
239
revision_id, file_id, found_parents, correct_parents = info
240
note(gettext(' * {0} version {1} has parents {2!r} '
241
'but should have {3!r}').format(
242
file_id, revision_id, found_parents,
244
if self.revs_with_bad_parents_in_index:
246
'%6d revisions have incorrect parents in the revision index'),
247
len(self.revs_with_bad_parents_in_index))
249
for item in self.revs_with_bad_parents_in_index:
250
revision_id, index_parents, actual_parents = item
252
' {0} has wrong parents in index: '
253
'{1!r} should be {2!r}').format(
254
revision_id, index_parents, actual_parents))
255
for item in self._report_items:
258
def _check_one_rev(self, rev_id, rev):
259
"""Cross-check one revision.
261
:param rev_id: A revision id to check.
262
:param rev: A revision or None to indicate a missing revision.
264
if rev.revision_id != rev_id:
265
self._report_items.append(gettext(
266
'Mismatched internal revid {{{0}}} and index revid {{{1}}}').format(
267
rev.revision_id, rev_id))
268
rev_id = rev.revision_id
269
# Check this revision tree etc, and count as seen when we encounter a
271
self.planned_revisions.add(rev_id)
273
self.ghosts.discard(rev_id)
274
# Count all parents as ghosts if we haven't seen them yet.
275
for parent in rev.parent_ids:
276
if not parent in self.planned_revisions:
277
self.ghosts.add(parent)
279
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
280
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
282
self.checked_rev_cnt += 1
284
def add_pending_item(self, referer, key, kind, sha1):
285
"""Add a reference to a sha1 to be cross checked against a key.
287
:param referer: The referer that expects key to have sha1.
288
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
289
:param kind: revision/inventory/text/map/signature
290
:param sha1: A hex sha1 or None if no sha1 is known.
292
existing = self.pending_keys.get(key)
294
if sha1 != existing[1]:
295
self._report_items.append(gettext('Multiple expected sha1s for {0}. {{{1}}}'
296
' expects {{{2}}}, {{{3}}} expects {{{4}}}').format(
297
key, referer, sha1, existing[1], existing[0]))
299
self.pending_keys[key] = (kind, sha1, referer)
301
def check_weaves(self):
302
"""Check all the weaves we can get our hands on.
305
with ui.ui_factory.nested_progress_bar() as storebar:
306
self._check_weaves(storebar)
308
def _check_weaves(self, storebar):
309
storebar.update('text-index', 0, 2)
310
if self.repository._format.fast_deltas:
311
# We haven't considered every fileid instance so far.
312
weave_checker = self.repository._get_versioned_file_checker(
313
ancestors=self.ancestors)
315
weave_checker = self.repository._get_versioned_file_checker(
316
text_key_references=self.text_key_references,
317
ancestors=self.ancestors)
318
storebar.update('file-graph', 1)
319
wrongs, unused_versions = weave_checker.check_file_version_parents(
320
self.repository.texts)
321
self.checked_weaves = weave_checker.file_ids
322
for text_key, (stored_parents, correct_parents) in viewitems(wrongs):
323
# XXX not ready for id join/split operations.
324
weave_id = text_key[0]
325
revision_id = text_key[-1]
326
weave_parents = tuple([parent[-1] for parent in stored_parents])
327
correct_parents = tuple([parent[-1] for parent in correct_parents])
328
self.inconsistent_parents.append(
329
(revision_id, weave_id, weave_parents, correct_parents))
330
self.unreferenced_versions.update(unused_versions)
332
def _add_entry_to_text_key_references(self, inv, entry):
333
if not self.rich_roots and entry.name == '':
335
key = (entry.file_id, entry.revision)
336
self.text_key_references.setdefault(key, False)
337
if entry.revision == inv.revision_id:
338
self.text_key_references[key] = True
341
63
def scan_branch(branch, needed_refs, to_unlock):
342
64
"""Scan a branch for refs.