54
73
raise NotImplementedError(self.report_results)
57
def scan_branch(branch, needed_refs, exit_stack):
76
class VersionedFileCheck(Check):
77
"""Check a versioned file repository"""
79
# The Check object interacts with InventoryEntry.check, etc.
81
def __init__(self, repository, check_repo=True):
82
self.repository = repository
83
self.checked_rev_cnt = 0
85
self.missing_parent_links = {}
86
self.missing_inventory_sha_cnt = 0
87
self.missing_revision_cnt = 0
88
self.checked_weaves = set()
89
self.unreferenced_versions = set()
90
self.inconsistent_parents = []
91
self.rich_roots = repository.supports_rich_root()
92
self.text_key_references = {}
93
self.check_repo = check_repo
94
self.other_results = []
95
# Plain text lines to include in the report
96
self._report_items = []
97
# Keys we are looking for; may be large and need spilling to disk.
98
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
99
self.pending_keys = {}
100
# Ancestors map for all of revisions being checked; while large helper
101
# functions we call would create it anyway, so better to have once and
105
def check(self, callback_refs=None, check_repo=True):
106
if callback_refs is None:
108
self.repository.lock_read()
109
self.progress = ui.ui_factory.nested_progress_bar()
111
self.progress.update(gettext('check'), 0, 4)
113
self.progress.update(gettext('checking revisions'), 0)
114
self.check_revisions()
115
self.progress.update(gettext('checking commit contents'), 1)
116
self.repository._check_inventories(self)
117
self.progress.update(gettext('checking file graphs'), 2)
118
# check_weaves is done after the revision scan so that
119
# revision index is known to be valid.
121
self.progress.update(gettext('checking branches and trees'), 3)
123
repo = self.repository
124
# calculate all refs, and callback the objects requesting them.
126
wanting_items = set()
127
# Current crude version calculates everything and calls
128
# everything at once. Doing a queue and popping as things are
129
# satisfied would be cheaper on memory [but few people have
130
# huge numbers of working trees today. TODO: fix before
134
for ref, wantlist in viewitems(callback_refs):
135
wanting_items.update(wantlist)
138
refs[ref] = repo.revision_tree(value)
139
elif kind == 'lefthand-distance':
141
elif kind == 'revision-existence':
142
existences.add(value)
144
raise AssertionError(
145
'unknown ref kind for ref %s' % ref)
146
node_distances = repo.get_graph().find_lefthand_distances(distances)
147
for key, distance in viewitems(node_distances):
148
refs[('lefthand-distance', key)] = distance
149
if key in existences and distance > 0:
150
refs[('revision-existence', key)] = True
151
existences.remove(key)
152
parent_map = repo.get_graph().get_parent_map(existences)
153
for key in parent_map:
154
refs[('revision-existence', key)] = True
155
existences.remove(key)
156
for key in existences:
157
refs[('revision-existence', key)] = False
158
for item in wanting_items:
159
if isinstance(item, WorkingTree):
161
if isinstance(item, Branch):
162
self.other_results.append(item.check(refs))
164
self.progress.finished()
165
self.repository.unlock()
167
def _check_revisions(self, revisions_iterator):
168
"""Check revision objects by decorating a generator.
170
:param revisions_iterator: An iterator of(revid, Revision-or-None).
171
:return: A generator of the contents of revisions_iterator.
173
self.planned_revisions = set()
174
for revid, revision in revisions_iterator:
175
yield revid, revision
176
self._check_one_rev(revid, revision)
177
# Flatten the revisions we found to guarantee consistent later
179
self.planned_revisions = list(self.planned_revisions)
180
# TODO: extract digital signatures as items to callback on too.
182
def check_revisions(self):
183
"""Scan revisions, checking data directly available as we go."""
184
revision_iterator = self.repository._iter_revisions(None)
185
revision_iterator = self._check_revisions(revision_iterator)
186
# We read the all revisions here:
187
# - doing this allows later code to depend on the revision index.
188
# - we can fill out existence flags at this point
189
# - we can read the revision inventory sha at this point
190
# - we can check properties and serialisers etc.
191
if not self.repository._format.revision_graph_can_have_wrong_parents:
192
# The check against the index isn't needed.
193
self.revs_with_bad_parents_in_index = None
194
for thing in revision_iterator:
197
bad_revisions = self.repository._find_inconsistent_revision_parents(
199
self.revs_with_bad_parents_in_index = list(bad_revisions)
201
def report_results(self, verbose):
203
self._report_repo_results(verbose)
204
for result in self.other_results:
205
result.report_results(verbose)
207
def _report_repo_results(self, verbose):
208
note(gettext('checked repository {0} format {1}').format(
209
self.repository.user_url,
210
self.repository._format))
211
note(gettext('%6d revisions'), self.checked_rev_cnt)
212
note(gettext('%6d file-ids'), len(self.checked_weaves))
214
note(gettext('%6d unreferenced text versions'),
215
len(self.unreferenced_versions))
216
if verbose and len(self.unreferenced_versions):
217
for file_id, revision_id in self.unreferenced_versions:
218
note(gettext('unreferenced version: {{{0}}} in {1}').format(revision_id,
220
if self.missing_inventory_sha_cnt:
221
note(gettext('%6d revisions are missing inventory_sha1'),
222
self.missing_inventory_sha_cnt)
223
if self.missing_revision_cnt:
224
note(gettext('%6d revisions are mentioned but not present'),
225
self.missing_revision_cnt)
227
note(gettext('%6d ghost revisions'), len(self.ghosts))
229
for ghost in self.ghosts:
231
if len(self.missing_parent_links):
232
note(gettext('%6d revisions missing parents in ancestry'),
233
len(self.missing_parent_links))
235
for link, linkers in viewitems(self.missing_parent_links):
236
note(gettext(' %s should be in the ancestry for:'), link)
237
for linker in linkers:
238
note(' * %s', linker)
239
if len(self.inconsistent_parents):
240
note(gettext('%6d inconsistent parents'), len(self.inconsistent_parents))
242
for info in self.inconsistent_parents:
243
revision_id, file_id, found_parents, correct_parents = info
244
note(gettext(' * {0} version {1} has parents {2!r} '
245
'but should have {3!r}').format(
246
file_id, revision_id, found_parents,
248
if self.revs_with_bad_parents_in_index:
250
'%6d revisions have incorrect parents in the revision index'),
251
len(self.revs_with_bad_parents_in_index))
253
for item in self.revs_with_bad_parents_in_index:
254
revision_id, index_parents, actual_parents = item
256
' {0} has wrong parents in index: '
257
'{1!r} should be {2!r}').format(
258
revision_id, index_parents, actual_parents))
259
for item in self._report_items:
262
def _check_one_rev(self, rev_id, rev):
263
"""Cross-check one revision.
265
:param rev_id: A revision id to check.
266
:param rev: A revision or None to indicate a missing revision.
268
if rev.revision_id != rev_id:
269
self._report_items.append(gettext(
270
'Mismatched internal revid {{{0}}} and index revid {{{1}}}').format(
271
rev.revision_id, rev_id))
272
rev_id = rev.revision_id
273
# Check this revision tree etc, and count as seen when we encounter a
275
self.planned_revisions.add(rev_id)
277
self.ghosts.discard(rev_id)
278
# Count all parents as ghosts if we haven't seen them yet.
279
for parent in rev.parent_ids:
280
if not parent in self.planned_revisions:
281
self.ghosts.add(parent)
283
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
284
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
286
self.checked_rev_cnt += 1
288
def add_pending_item(self, referer, key, kind, sha1):
289
"""Add a reference to a sha1 to be cross checked against a key.
291
:param referer: The referer that expects key to have sha1.
292
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
293
:param kind: revision/inventory/text/map/signature
294
:param sha1: A hex sha1 or None if no sha1 is known.
296
existing = self.pending_keys.get(key)
298
if sha1 != existing[1]:
299
self._report_items.append(gettext('Multiple expected sha1s for {0}. {{{1}}}'
300
' expects {{{2}}}, {{{3}}} expects {{{4}}}').format(
301
key, referer, sha1, existing[1], existing[0]))
303
self.pending_keys[key] = (kind, sha1, referer)
305
def check_weaves(self):
306
"""Check all the weaves we can get our hands on.
309
storebar = ui.ui_factory.nested_progress_bar()
311
self._check_weaves(storebar)
315
def _check_weaves(self, storebar):
316
storebar.update('text-index', 0, 2)
317
if self.repository._format.fast_deltas:
318
# We haven't considered every fileid instance so far.
319
weave_checker = self.repository._get_versioned_file_checker(
320
ancestors=self.ancestors)
322
weave_checker = self.repository._get_versioned_file_checker(
323
text_key_references=self.text_key_references,
324
ancestors=self.ancestors)
325
storebar.update('file-graph', 1)
326
wrongs, unused_versions = weave_checker.check_file_version_parents(
327
self.repository.texts)
328
self.checked_weaves = weave_checker.file_ids
329
for text_key, (stored_parents, correct_parents) in viewitems(wrongs):
330
# XXX not ready for id join/split operations.
331
weave_id = text_key[0]
332
revision_id = text_key[-1]
333
weave_parents = tuple([parent[-1] for parent in stored_parents])
334
correct_parents = tuple([parent[-1] for parent in correct_parents])
335
self.inconsistent_parents.append(
336
(revision_id, weave_id, weave_parents, correct_parents))
337
self.unreferenced_versions.update(unused_versions)
339
def _add_entry_to_text_key_references(self, inv, entry):
340
if not self.rich_roots and entry.name == '':
342
key = (entry.file_id, entry.revision)
343
self.text_key_references.setdefault(key, False)
344
if entry.revision == inv.revision_id:
345
self.text_key_references[key] = True
348
def scan_branch(branch, needed_refs, to_unlock):
58
349
"""Scan a branch for refs.
60
351
:param branch: The branch to schedule for checking.
61
352
:param needed_refs: Refs we are accumulating.
62
:param exit_stack: The exit stack accumulating.
353
:param to_unlock: The unlock list accumulating.
64
355
note(gettext("Checking branch at '%s'.") % (branch.base,))
65
exit_stack.enter_context(branch.lock_read())
357
to_unlock.append(branch)
66
358
branch_refs = branch._get_check_refs()
67
359
for ref in branch_refs:
68
360
reflist = needed_refs.setdefault(ref, [])
69
361
reflist.append(branch)
72
def scan_tree(base_tree, tree, needed_refs, exit_stack):
364
def scan_tree(base_tree, tree, needed_refs, to_unlock):
73
365
"""Scan a tree for refs.
75
367
:param base_tree: The original tree check opened, used to detect duplicate
77
369
:param tree: The tree to schedule for checking.
78
370
:param needed_refs: Refs we are accumulating.
79
:param exit_stack: The exit stack accumulating.
371
:param to_unlock: The unlock list accumulating.
81
373
if base_tree is not None and tree.basedir == base_tree.basedir:
83
375
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
84
exit_stack.enter_context(tree.lock_read())
377
to_unlock.append(tree)
85
378
tree_refs = tree._get_check_refs()
86
379
for ref in tree_refs:
87
380
reflist = needed_refs.setdefault(ref, [])