54
73
raise NotImplementedError(self.report_results)
57
def scan_branch(branch, needed_refs, exit_stack):
76
class VersionedFileCheck(Check):
77
"""Check a versioned file repository"""
79
# The Check object interacts with InventoryEntry.check, etc.
81
def __init__(self, repository, check_repo=True):
82
self.repository = repository
83
self.checked_rev_cnt = 0
85
self.missing_parent_links = {}
86
self.missing_inventory_sha_cnt = 0
87
self.missing_revision_cnt = 0
88
self.checked_weaves = set()
89
self.unreferenced_versions = set()
90
self.inconsistent_parents = []
91
self.rich_roots = repository.supports_rich_root()
92
self.text_key_references = {}
93
self.check_repo = check_repo
94
self.other_results = []
95
# Plain text lines to include in the report
96
self._report_items = []
97
# Keys we are looking for; may be large and need spilling to disk.
98
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
99
self.pending_keys = {}
100
# Ancestors map for all of revisions being checked; while large helper
101
# functions we call would create it anyway, so better to have once and
105
def check(self, callback_refs=None, check_repo=True):
106
if callback_refs is None:
108
self.repository.lock_read()
109
self.progress = ui.ui_factory.nested_progress_bar()
111
self.progress.update(gettext('check'), 0, 4)
113
self.progress.update(gettext('checking revisions'), 0)
114
self.check_revisions()
115
self.progress.update(gettext('checking commit contents'), 1)
116
self.repository._check_inventories(self)
117
self.progress.update(gettext('checking file graphs'), 2)
118
# check_weaves is done after the revision scan so that
119
# revision index is known to be valid.
121
self.progress.update(gettext('checking branches and trees'), 3)
123
repo = self.repository
124
# calculate all refs, and callback the objects requesting them.
126
wanting_items = set()
127
# Current crude version calculates everything and calls
128
# everything at once. Doing a queue and popping as things are
129
# satisfied would be cheaper on memory [but few people have
130
# huge numbers of working trees today. TODO: fix before
134
for ref, wantlist in viewitems(callback_refs):
135
wanting_items.update(wantlist)
138
refs[ref] = repo.revision_tree(value)
139
elif kind == 'lefthand-distance':
141
elif kind == 'revision-existence':
142
existences.add(value)
144
raise AssertionError(
145
'unknown ref kind for ref %s' % ref)
146
node_distances = repo.get_graph().find_lefthand_distances(distances)
147
for key, distance in viewitems(node_distances):
148
refs[('lefthand-distance', key)] = distance
149
if key in existences and distance > 0:
150
refs[('revision-existence', key)] = True
151
existences.remove(key)
152
parent_map = repo.get_graph().get_parent_map(existences)
153
for key in parent_map:
154
refs[('revision-existence', key)] = True
155
existences.remove(key)
156
for key in existences:
157
refs[('revision-existence', key)] = False
158
for item in wanting_items:
159
if isinstance(item, WorkingTree):
161
if isinstance(item, Branch):
162
self.other_results.append(item.check(refs))
164
self.progress.finished()
165
self.repository.unlock()
167
def _check_revisions(self, revisions_iterator):
168
"""Check revision objects by decorating a generator.
170
:param revisions_iterator: An iterator of(revid, Revision-or-None).
171
:return: A generator of the contents of revisions_iterator.
173
self.planned_revisions = set()
174
for revid, revision in revisions_iterator:
175
yield revid, revision
176
self._check_one_rev(revid, revision)
177
# Flatten the revisions we found to guarantee consistent later
179
self.planned_revisions = list(self.planned_revisions)
180
# TODO: extract digital signatures as items to callback on too.
182
def check_revisions(self):
183
"""Scan revisions, checking data directly available as we go."""
184
revision_iterator = self.repository.iter_revisions(
185
self.repository.all_revision_ids())
186
revision_iterator = self._check_revisions(revision_iterator)
187
# We read the all revisions here:
188
# - doing this allows later code to depend on the revision index.
189
# - we can fill out existence flags at this point
190
# - we can read the revision inventory sha at this point
191
# - we can check properties and serialisers etc.
192
if not self.repository._format.revision_graph_can_have_wrong_parents:
193
# The check against the index isn't needed.
194
self.revs_with_bad_parents_in_index = None
195
for thing in revision_iterator:
198
bad_revisions = self.repository._find_inconsistent_revision_parents(
200
self.revs_with_bad_parents_in_index = list(bad_revisions)
202
def report_results(self, verbose):
204
self._report_repo_results(verbose)
205
for result in self.other_results:
206
result.report_results(verbose)
208
def _report_repo_results(self, verbose):
209
note(gettext('checked repository {0} format {1}').format(
210
self.repository.user_url,
211
self.repository._format))
212
note(gettext('%6d revisions'), self.checked_rev_cnt)
213
note(gettext('%6d file-ids'), len(self.checked_weaves))
215
note(gettext('%6d unreferenced text versions'),
216
len(self.unreferenced_versions))
217
if verbose and len(self.unreferenced_versions):
218
for file_id, revision_id in self.unreferenced_versions:
219
note(gettext('unreferenced version: {{{0}}} in {1}').format(revision_id,
221
if self.missing_inventory_sha_cnt:
222
note(gettext('%6d revisions are missing inventory_sha1'),
223
self.missing_inventory_sha_cnt)
224
if self.missing_revision_cnt:
225
note(gettext('%6d revisions are mentioned but not present'),
226
self.missing_revision_cnt)
228
note(gettext('%6d ghost revisions'), len(self.ghosts))
230
for ghost in self.ghosts:
232
if len(self.missing_parent_links):
233
note(gettext('%6d revisions missing parents in ancestry'),
234
len(self.missing_parent_links))
236
for link, linkers in viewitems(self.missing_parent_links):
237
note(gettext(' %s should be in the ancestry for:'), link)
238
for linker in linkers:
239
note(' * %s', linker)
240
if len(self.inconsistent_parents):
241
note(gettext('%6d inconsistent parents'), len(self.inconsistent_parents))
243
for info in self.inconsistent_parents:
244
revision_id, file_id, found_parents, correct_parents = info
245
note(gettext(' * {0} version {1} has parents {2!r} '
246
'but should have {3!r}').format(
247
file_id, revision_id, found_parents,
249
if self.revs_with_bad_parents_in_index:
251
'%6d revisions have incorrect parents in the revision index'),
252
len(self.revs_with_bad_parents_in_index))
254
for item in self.revs_with_bad_parents_in_index:
255
revision_id, index_parents, actual_parents = item
257
' {0} has wrong parents in index: '
258
'{1!r} should be {2!r}').format(
259
revision_id, index_parents, actual_parents))
260
for item in self._report_items:
263
def _check_one_rev(self, rev_id, rev):
264
"""Cross-check one revision.
266
:param rev_id: A revision id to check.
267
:param rev: A revision or None to indicate a missing revision.
269
if rev.revision_id != rev_id:
270
self._report_items.append(gettext(
271
'Mismatched internal revid {{{0}}} and index revid {{{1}}}').format(
272
rev.revision_id, rev_id))
273
rev_id = rev.revision_id
274
# Check this revision tree etc, and count as seen when we encounter a
276
self.planned_revisions.add(rev_id)
278
self.ghosts.discard(rev_id)
279
# Count all parents as ghosts if we haven't seen them yet.
280
for parent in rev.parent_ids:
281
if not parent in self.planned_revisions:
282
self.ghosts.add(parent)
284
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
285
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
287
self.checked_rev_cnt += 1
289
def add_pending_item(self, referer, key, kind, sha1):
290
"""Add a reference to a sha1 to be cross checked against a key.
292
:param referer: The referer that expects key to have sha1.
293
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
294
:param kind: revision/inventory/text/map/signature
295
:param sha1: A hex sha1 or None if no sha1 is known.
297
existing = self.pending_keys.get(key)
299
if sha1 != existing[1]:
300
self._report_items.append(gettext('Multiple expected sha1s for {0}. {{{1}}}'
301
' expects {{{2}}}, {{{3}}} expects {{{4}}}').format(
302
key, referer, sha1, existing[1], existing[0]))
304
self.pending_keys[key] = (kind, sha1, referer)
306
def check_weaves(self):
307
"""Check all the weaves we can get our hands on.
310
storebar = ui.ui_factory.nested_progress_bar()
312
self._check_weaves(storebar)
316
def _check_weaves(self, storebar):
317
storebar.update('text-index', 0, 2)
318
if self.repository._format.fast_deltas:
319
# We haven't considered every fileid instance so far.
320
weave_checker = self.repository._get_versioned_file_checker(
321
ancestors=self.ancestors)
323
weave_checker = self.repository._get_versioned_file_checker(
324
text_key_references=self.text_key_references,
325
ancestors=self.ancestors)
326
storebar.update('file-graph', 1)
327
wrongs, unused_versions = weave_checker.check_file_version_parents(
328
self.repository.texts)
329
self.checked_weaves = weave_checker.file_ids
330
for text_key, (stored_parents, correct_parents) in viewitems(wrongs):
331
# XXX not ready for id join/split operations.
332
weave_id = text_key[0]
333
revision_id = text_key[-1]
334
weave_parents = tuple([parent[-1] for parent in stored_parents])
335
correct_parents = tuple([parent[-1] for parent in correct_parents])
336
self.inconsistent_parents.append(
337
(revision_id, weave_id, weave_parents, correct_parents))
338
self.unreferenced_versions.update(unused_versions)
340
def _add_entry_to_text_key_references(self, inv, entry):
341
if not self.rich_roots and entry.name == '':
343
key = (entry.file_id, entry.revision)
344
self.text_key_references.setdefault(key, False)
345
if entry.revision == inv.revision_id:
346
self.text_key_references[key] = True
349
def scan_branch(branch, needed_refs, to_unlock):
58
350
"""Scan a branch for refs.
60
352
:param branch: The branch to schedule for checking.
61
353
:param needed_refs: Refs we are accumulating.
62
:param exit_stack: The exit stack accumulating.
354
:param to_unlock: The unlock list accumulating.
64
356
note(gettext("Checking branch at '%s'.") % (branch.base,))
65
exit_stack.enter_context(branch.lock_read())
358
to_unlock.append(branch)
66
359
branch_refs = branch._get_check_refs()
67
360
for ref in branch_refs:
68
361
reflist = needed_refs.setdefault(ref, [])
69
362
reflist.append(branch)
72
def scan_tree(base_tree, tree, needed_refs, exit_stack):
365
def scan_tree(base_tree, tree, needed_refs, to_unlock):
73
366
"""Scan a tree for refs.
75
368
:param base_tree: The original tree check opened, used to detect duplicate
77
370
:param tree: The tree to schedule for checking.
78
371
:param needed_refs: Refs we are accumulating.
79
:param exit_stack: The exit stack accumulating.
372
:param to_unlock: The unlock list accumulating.
81
374
if base_tree is not None and tree.basedir == base_tree.basedir:
83
376
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
84
exit_stack.enter_context(tree.lock_read())
378
to_unlock.append(tree)
85
379
tree_refs = tree._get_check_refs()
86
380
for ref in tree_refs:
87
381
reflist = needed_refs.setdefault(ref, [])