70
54
raise NotImplementedError(self.report_results)
73
class VersionedFileCheck(Check):
74
"""Check a versioned file repository"""
76
# The Check object interacts with InventoryEntry.check, etc.
78
def __init__(self, repository, check_repo=True):
79
self.repository = repository
80
self.checked_rev_cnt = 0
82
self.missing_parent_links = {}
83
self.missing_inventory_sha_cnt = 0
84
self.missing_revision_cnt = 0
85
self.checked_weaves = set()
86
self.unreferenced_versions = set()
87
self.inconsistent_parents = []
88
self.rich_roots = repository.supports_rich_root()
89
self.text_key_references = {}
90
self.check_repo = check_repo
91
self.other_results = []
92
# Plain text lines to include in the report
93
self._report_items = []
94
# Keys we are looking for; may be large and need spilling to disk.
95
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
96
self.pending_keys = {}
97
# Ancestors map for all of revisions being checked; while large helper
98
# functions we call would create it anyway, so better to have once and
102
def check(self, callback_refs=None, check_repo=True):
103
if callback_refs is None:
105
self.repository.lock_read()
106
self.progress = ui.ui_factory.nested_progress_bar()
108
self.progress.update(gettext('check'), 0, 4)
110
self.progress.update(gettext('checking revisions'), 0)
111
self.check_revisions()
112
self.progress.update(gettext('checking commit contents'), 1)
113
self.repository._check_inventories(self)
114
self.progress.update(gettext('checking file graphs'), 2)
115
# check_weaves is done after the revision scan so that
116
# revision index is known to be valid.
118
self.progress.update(gettext('checking branches and trees'), 3)
120
repo = self.repository
121
# calculate all refs, and callback the objects requesting them.
123
wanting_items = set()
124
# Current crude version calculates everything and calls
125
# everything at once. Doing a queue and popping as things are
126
# satisfied would be cheaper on memory [but few people have
127
# huge numbers of working trees today. TODO: fix before
131
for ref, wantlist in callback_refs.iteritems():
132
wanting_items.update(wantlist)
135
refs[ref] = repo.revision_tree(value)
136
elif kind == 'lefthand-distance':
138
elif kind == 'revision-existence':
139
existences.add(value)
141
raise AssertionError(
142
'unknown ref kind for ref %s' % ref)
143
node_distances = repo.get_graph().find_lefthand_distances(distances)
144
for key, distance in node_distances.iteritems():
145
refs[('lefthand-distance', key)] = distance
146
if key in existences and distance > 0:
147
refs[('revision-existence', key)] = True
148
existences.remove(key)
149
parent_map = repo.get_graph().get_parent_map(existences)
150
for key in parent_map:
151
refs[('revision-existence', key)] = True
152
existences.remove(key)
153
for key in existences:
154
refs[('revision-existence', key)] = False
155
for item in wanting_items:
156
if isinstance(item, WorkingTree):
158
if isinstance(item, Branch):
159
self.other_results.append(item.check(refs))
161
self.progress.finished()
162
self.repository.unlock()
164
def _check_revisions(self, revisions_iterator):
165
"""Check revision objects by decorating a generator.
167
:param revisions_iterator: An iterator of(revid, Revision-or-None).
168
:return: A generator of the contents of revisions_iterator.
170
self.planned_revisions = set()
171
for revid, revision in revisions_iterator:
172
yield revid, revision
173
self._check_one_rev(revid, revision)
174
# Flatten the revisions we found to guarantee consistent later
176
self.planned_revisions = list(self.planned_revisions)
177
# TODO: extract digital signatures as items to callback on too.
179
def check_revisions(self):
180
"""Scan revisions, checking data directly available as we go."""
181
revision_iterator = self.repository._iter_revisions(None)
182
revision_iterator = self._check_revisions(revision_iterator)
183
# We read the all revisions here:
184
# - doing this allows later code to depend on the revision index.
185
# - we can fill out existence flags at this point
186
# - we can read the revision inventory sha at this point
187
# - we can check properties and serialisers etc.
188
if not self.repository._format.revision_graph_can_have_wrong_parents:
189
# The check against the index isn't needed.
190
self.revs_with_bad_parents_in_index = None
191
for thing in revision_iterator:
194
bad_revisions = self.repository._find_inconsistent_revision_parents(
196
self.revs_with_bad_parents_in_index = list(bad_revisions)
198
def report_results(self, verbose):
200
self._report_repo_results(verbose)
201
for result in self.other_results:
202
result.report_results(verbose)
204
def _report_repo_results(self, verbose):
205
note(gettext('checked repository {0} format {1}').format(
206
self.repository.user_url,
207
self.repository._format))
208
note(gettext('%6d revisions'), self.checked_rev_cnt)
209
note(gettext('%6d file-ids'), len(self.checked_weaves))
211
note(gettext('%6d unreferenced text versions'),
212
len(self.unreferenced_versions))
213
if verbose and len(self.unreferenced_versions):
214
for file_id, revision_id in self.unreferenced_versions:
215
note(gettext('unreferenced version: {{{0}}} in {1}').format(revision_id,
217
if self.missing_inventory_sha_cnt:
218
note(gettext('%6d revisions are missing inventory_sha1'),
219
self.missing_inventory_sha_cnt)
220
if self.missing_revision_cnt:
221
note(gettext('%6d revisions are mentioned but not present'),
222
self.missing_revision_cnt)
224
note(gettext('%6d ghost revisions'), len(self.ghosts))
226
for ghost in self.ghosts:
228
if len(self.missing_parent_links):
229
note(gettext('%6d revisions missing parents in ancestry'),
230
len(self.missing_parent_links))
232
for link, linkers in self.missing_parent_links.items():
233
note(gettext(' %s should be in the ancestry for:'), link)
234
for linker in linkers:
235
note(' * %s', linker)
236
if len(self.inconsistent_parents):
237
note(gettext('%6d inconsistent parents'), len(self.inconsistent_parents))
239
for info in self.inconsistent_parents:
240
revision_id, file_id, found_parents, correct_parents = info
241
note(gettext(' * {0} version {1} has parents {2!r} '
242
'but should have {3!r}').format(
243
file_id, revision_id, found_parents,
245
if self.revs_with_bad_parents_in_index:
247
'%6d revisions have incorrect parents in the revision index'),
248
len(self.revs_with_bad_parents_in_index))
250
for item in self.revs_with_bad_parents_in_index:
251
revision_id, index_parents, actual_parents = item
253
' {0} has wrong parents in index: '
254
'{1!r} should be {2!r}').format(
255
revision_id, index_parents, actual_parents))
256
for item in self._report_items:
259
def _check_one_rev(self, rev_id, rev):
260
"""Cross-check one revision.
262
:param rev_id: A revision id to check.
263
:param rev: A revision or None to indicate a missing revision.
265
if rev.revision_id != rev_id:
266
self._report_items.append(gettext(
267
'Mismatched internal revid {{{0}}} and index revid {{{1}}}').format(
268
rev.revision_id, rev_id))
269
rev_id = rev.revision_id
270
# Check this revision tree etc, and count as seen when we encounter a
272
self.planned_revisions.add(rev_id)
274
self.ghosts.discard(rev_id)
275
# Count all parents as ghosts if we haven't seen them yet.
276
for parent in rev.parent_ids:
277
if not parent in self.planned_revisions:
278
self.ghosts.add(parent)
280
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
281
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
283
self.checked_rev_cnt += 1
285
def add_pending_item(self, referer, key, kind, sha1):
286
"""Add a reference to a sha1 to be cross checked against a key.
288
:param referer: The referer that expects key to have sha1.
289
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
290
:param kind: revision/inventory/text/map/signature
291
:param sha1: A hex sha1 or None if no sha1 is known.
293
existing = self.pending_keys.get(key)
295
if sha1 != existing[1]:
296
self._report_items.append(gettext('Multiple expected sha1s for {0}. {{{1}}}'
297
' expects {{{2}}}, {{{3}}} expects {{{4}}}').format(
298
key, referer, sha1, existing[1], existing[0]))
300
self.pending_keys[key] = (kind, sha1, referer)
302
def check_weaves(self):
303
"""Check all the weaves we can get our hands on.
306
storebar = ui.ui_factory.nested_progress_bar()
308
self._check_weaves(storebar)
312
def _check_weaves(self, storebar):
313
storebar.update('text-index', 0, 2)
314
if self.repository._format.fast_deltas:
315
# We haven't considered every fileid instance so far.
316
weave_checker = self.repository._get_versioned_file_checker(
317
ancestors=self.ancestors)
319
weave_checker = self.repository._get_versioned_file_checker(
320
text_key_references=self.text_key_references,
321
ancestors=self.ancestors)
322
storebar.update('file-graph', 1)
323
result = weave_checker.check_file_version_parents(
324
self.repository.texts)
325
self.checked_weaves = weave_checker.file_ids
326
bad_parents, unused_versions = result
327
bad_parents = bad_parents.items()
328
for text_key, (stored_parents, correct_parents) in bad_parents:
329
# XXX not ready for id join/split operations.
330
weave_id = text_key[0]
331
revision_id = text_key[-1]
332
weave_parents = tuple([parent[-1] for parent in stored_parents])
333
correct_parents = tuple([parent[-1] for parent in correct_parents])
334
self.inconsistent_parents.append(
335
(revision_id, weave_id, weave_parents, correct_parents))
336
self.unreferenced_versions.update(unused_versions)
338
def _add_entry_to_text_key_references(self, inv, entry):
339
if not self.rich_roots and entry.name == '':
341
key = (entry.file_id, entry.revision)
342
self.text_key_references.setdefault(key, False)
343
if entry.revision == inv.revision_id:
344
self.text_key_references[key] = True
347
def scan_branch(branch, needed_refs, to_unlock):
57
def scan_branch(branch, needed_refs, exit_stack):
348
58
"""Scan a branch for refs.
350
60
:param branch: The branch to schedule for checking.
351
61
:param needed_refs: Refs we are accumulating.
352
:param to_unlock: The unlock list accumulating.
62
:param exit_stack: The exit stack accumulating.
354
64
note(gettext("Checking branch at '%s'.") % (branch.base,))
356
to_unlock.append(branch)
65
exit_stack.enter_context(branch.lock_read())
357
66
branch_refs = branch._get_check_refs()
358
67
for ref in branch_refs:
359
68
reflist = needed_refs.setdefault(ref, [])
360
69
reflist.append(branch)
363
def scan_tree(base_tree, tree, needed_refs, to_unlock):
72
def scan_tree(base_tree, tree, needed_refs, exit_stack):
364
73
"""Scan a tree for refs.
366
75
:param base_tree: The original tree check opened, used to detect duplicate
368
77
:param tree: The tree to schedule for checking.
369
78
:param needed_refs: Refs we are accumulating.
370
:param to_unlock: The unlock list accumulating.
79
:param exit_stack: The exit stack accumulating.
372
81
if base_tree is not None and tree.basedir == base_tree.basedir:
374
83
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
376
to_unlock.append(tree)
84
exit_stack.enter_context(tree.lock_read())
377
85
tree_refs = tree._get_check_refs()
378
86
for ref in tree_refs:
379
87
reflist = needed_refs.setdefault(ref, [])