46
34
indicating that the revision was found/not found.
49
from bzrlib import errors
50
from bzrlib.branch import Branch
51
from bzrlib.bzrdir import BzrDir
52
from bzrlib.revision import NULL_REVISION
53
from bzrlib.symbol_versioning import deprecated_function, deprecated_in
54
from bzrlib.trace import note
56
from bzrlib.workingtree import WorkingTree
37
from __future__ import absolute_import
43
from .controldir import ControlDir
44
from .trace import note
45
from .i18n import gettext
58
48
class Check(object):
59
49
"""Check a repository"""
61
# The Check object interacts with InventoryEntry.check, etc.
63
51
def __init__(self, repository, check_repo=True):
64
52
self.repository = repository
65
self.checked_rev_cnt = 0
67
self.missing_parent_links = {}
68
self.missing_inventory_sha_cnt = 0
69
self.missing_revision_cnt = 0
70
self.checked_weaves = set()
71
self.unreferenced_versions = set()
72
self.inconsistent_parents = []
73
self.rich_roots = repository.supports_rich_root()
74
self.text_key_references = {}
75
self.check_repo = check_repo
76
self.other_results = []
77
# Plain text lines to include in the report
78
self._report_items = []
79
# Keys we are looking for; may be large and need spilling to disk.
80
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
81
self.pending_keys = {}
82
# Ancestors map for all of revisions being checked; while large helper
83
# functions we call would create it anyway, so better to have once and
87
def check(self, callback_refs=None, check_repo=True):
88
if callback_refs is None:
90
self.repository.lock_read()
91
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
93
self.progress.update('check', 0, 4)
95
self.progress.update('checking revisions', 0)
96
self.check_revisions()
97
self.progress.update('checking commit contents', 1)
98
self.repository._check_inventories(self)
99
self.progress.update('checking file graphs', 2)
100
# check_weaves is done after the revision scan so that
101
# revision index is known to be valid.
103
self.progress.update('checking branches and trees', 3)
105
repo = self.repository
106
# calculate all refs, and callback the objects requesting them.
108
wanting_items = set()
109
# Current crude version calculates everything and calls
110
# everything at once. Doing a queue and popping as things are
111
# satisfied would be cheaper on memory [but few people have
112
# huge numbers of working trees today. TODO: fix before
116
for ref, wantlist in callback_refs.iteritems():
117
wanting_items.update(wantlist)
120
refs[ref] = repo.revision_tree(value)
121
elif kind == 'lefthand-distance':
123
elif kind == 'revision-existence':
124
existences.add(value)
126
raise AssertionError(
127
'unknown ref kind for ref %s' % ref)
128
node_distances = repo.get_graph().find_lefthand_distances(distances)
129
for key, distance in node_distances.iteritems():
130
refs[('lefthand-distance', key)] = distance
131
if key in existences and distance > 0:
132
refs[('revision-existence', key)] = True
133
existences.remove(key)
134
parent_map = repo.get_graph().get_parent_map(existences)
135
for key in parent_map:
136
refs[('revision-existence', key)] = True
137
existences.remove(key)
138
for key in existences:
139
refs[('revision-existence', key)] = False
140
for item in wanting_items:
141
if isinstance(item, WorkingTree):
143
if isinstance(item, Branch):
144
self.other_results.append(item.check(refs))
146
self.progress.finished()
147
self.repository.unlock()
149
def _check_revisions(self, revisions_iterator):
150
"""Check revision objects by decorating a generator.
152
:param revisions_iterator: An iterator of(revid, Revision-or-None).
153
:return: A generator of the contents of revisions_iterator.
155
self.planned_revisions = set()
156
for revid, revision in revisions_iterator:
157
yield revid, revision
158
self._check_one_rev(revid, revision)
159
# Flatten the revisions we found to guarantee consistent later
161
self.planned_revisions = list(self.planned_revisions)
162
# TODO: extract digital signatures as items to callback on too.
164
def check_revisions(self):
165
"""Scan revisions, checking data directly available as we go."""
166
revision_iterator = self.repository._iter_revisions(None)
167
revision_iterator = self._check_revisions(revision_iterator)
168
# We read the all revisions here:
169
# - doing this allows later code to depend on the revision index.
170
# - we can fill out existence flags at this point
171
# - we can read the revision inventory sha at this point
172
# - we can check properties and serialisers etc.
173
if not self.repository.revision_graph_can_have_wrong_parents():
174
# The check against the index isn't needed.
175
self.revs_with_bad_parents_in_index = None
176
for thing in revision_iterator:
179
bad_revisions = self.repository._find_inconsistent_revision_parents(
181
self.revs_with_bad_parents_in_index = list(bad_revisions)
183
54
def report_results(self, verbose):
185
self._report_repo_results(verbose)
186
for result in self.other_results:
187
result.report_results(verbose)
189
def _report_repo_results(self, verbose):
190
note('checked repository %s format %s',
191
self.repository.user_url,
192
self.repository._format)
193
note('%6d revisions', self.checked_rev_cnt)
194
note('%6d file-ids', len(self.checked_weaves))
196
note('%6d unreferenced text versions',
197
len(self.unreferenced_versions))
198
if verbose and len(self.unreferenced_versions):
199
for file_id, revision_id in self.unreferenced_versions:
200
note('unreferenced version: {%s} in %s', revision_id,
202
if self.missing_inventory_sha_cnt:
203
note('%6d revisions are missing inventory_sha1',
204
self.missing_inventory_sha_cnt)
205
if self.missing_revision_cnt:
206
note('%6d revisions are mentioned but not present',
207
self.missing_revision_cnt)
209
note('%6d ghost revisions', len(self.ghosts))
211
for ghost in self.ghosts:
213
if len(self.missing_parent_links):
214
note('%6d revisions missing parents in ancestry',
215
len(self.missing_parent_links))
217
for link, linkers in self.missing_parent_links.items():
218
note(' %s should be in the ancestry for:', link)
219
for linker in linkers:
220
note(' * %s', linker)
221
if len(self.inconsistent_parents):
222
note('%6d inconsistent parents', len(self.inconsistent_parents))
224
for info in self.inconsistent_parents:
225
revision_id, file_id, found_parents, correct_parents = info
226
note(' * %s version %s has parents %r '
228
% (file_id, revision_id, found_parents,
230
if self.revs_with_bad_parents_in_index:
231
note('%6d revisions have incorrect parents in the revision index',
232
len(self.revs_with_bad_parents_in_index))
234
for item in self.revs_with_bad_parents_in_index:
235
revision_id, index_parents, actual_parents = item
237
' %s has wrong parents in index: '
239
revision_id, index_parents, actual_parents)
240
for item in self._report_items:
243
def _check_one_rev(self, rev_id, rev):
244
"""Cross-check one revision.
246
:param rev_id: A revision id to check.
247
:param rev: A revision or None to indicate a missing revision.
249
if rev.revision_id != rev_id:
250
self._report_items.append(
251
'Mismatched internal revid {%s} and index revid {%s}' % (
252
rev.revision_id, rev_id))
253
rev_id = rev.revision_id
254
# Check this revision tree etc, and count as seen when we encounter a
256
self.planned_revisions.add(rev_id)
258
self.ghosts.discard(rev_id)
259
# Count all parents as ghosts if we haven't seen them yet.
260
for parent in rev.parent_ids:
261
if not parent in self.planned_revisions:
262
self.ghosts.add(parent)
264
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
265
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
267
self.checked_rev_cnt += 1
269
def add_pending_item(self, referer, key, kind, sha1):
270
"""Add a reference to a sha1 to be cross checked against a key.
272
:param referer: The referer that expects key to have sha1.
273
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
274
:param kind: revision/inventory/text/map/signature
275
:param sha1: A hex sha1 or None if no sha1 is known.
277
existing = self.pending_keys.get(key)
279
if sha1 != existing[1]:
280
self._report_items.append('Multiple expected sha1s for %s. {%s}'
281
' expects {%s}, {%s} expects {%s}', (
282
key, referer, sha1, existing[1], existing[0]))
284
self.pending_keys[key] = (kind, sha1, referer)
286
def check_weaves(self):
287
"""Check all the weaves we can get our hands on.
290
storebar = bzrlib.ui.ui_factory.nested_progress_bar()
292
self._check_weaves(storebar)
296
def _check_weaves(self, storebar):
297
storebar.update('text-index', 0, 2)
298
if self.repository._format.fast_deltas:
299
# We haven't considered every fileid instance so far.
300
weave_checker = self.repository._get_versioned_file_checker(
301
ancestors=self.ancestors)
303
weave_checker = self.repository._get_versioned_file_checker(
304
text_key_references=self.text_key_references,
305
ancestors=self.ancestors)
306
storebar.update('file-graph', 1)
307
result = weave_checker.check_file_version_parents(
308
self.repository.texts)
309
self.checked_weaves = weave_checker.file_ids
310
bad_parents, unused_versions = result
311
bad_parents = bad_parents.items()
312
for text_key, (stored_parents, correct_parents) in bad_parents:
313
# XXX not ready for id join/split operations.
314
weave_id = text_key[0]
315
revision_id = text_key[-1]
316
weave_parents = tuple([parent[-1] for parent in stored_parents])
317
correct_parents = tuple([parent[-1] for parent in correct_parents])
318
self.inconsistent_parents.append(
319
(revision_id, weave_id, weave_parents, correct_parents))
320
self.unreferenced_versions.update(unused_versions)
322
def _add_entry_to_text_key_references(self, inv, entry):
323
if not self.rich_roots and entry.name == '':
325
key = (entry.file_id, entry.revision)
326
self.text_key_references.setdefault(key, False)
327
if entry.revision == inv.revision_id:
328
self.text_key_references[key] = True
331
@deprecated_function(deprecated_in((1,6,0)))
332
def check(branch, verbose):
333
"""Run consistency checks on a branch.
335
Results are reported through logging.
337
Deprecated in 1.6. Please use check_dwim instead.
339
:raise BzrCheckError: if there's a consistency error.
341
check_branch(branch, verbose)
344
@deprecated_function(deprecated_in((1,16,0)))
345
def check_branch(branch, verbose):
346
"""Run consistency checks on a branch.
348
Results are reported through logging.
350
:raise BzrCheckError: if there's a consistency error.
355
for ref in branch._get_check_refs():
356
needed_refs.setdefault(ref, []).append(branch)
357
result = branch.repository.check([branch.last_revision()], needed_refs)
358
branch_result = result.other_results[0]
361
branch_result.report_results(verbose)
364
def scan_branch(branch, needed_refs, to_unlock):
55
raise NotImplementedError(self.report_results)
58
def scan_branch(branch, needed_refs, exit_stack):
365
59
"""Scan a branch for refs.
367
61
:param branch: The branch to schedule for checking.
368
62
:param needed_refs: Refs we are accumulating.
369
:param to_unlock: The unlock list accumulating.
63
:param exit_stack: The exit stack accumulating.
371
note("Checking branch at '%s'." % (branch.base,))
373
to_unlock.append(branch)
65
note(gettext("Checking branch at '%s'.") % (branch.base,))
66
exit_stack.enter_context(branch.lock_read())
374
67
branch_refs = branch._get_check_refs()
375
68
for ref in branch_refs:
376
69
reflist = needed_refs.setdefault(ref, [])
377
70
reflist.append(branch)
380
def scan_tree(base_tree, tree, needed_refs, to_unlock):
73
def scan_tree(base_tree, tree, needed_refs, exit_stack):
381
74
"""Scan a tree for refs.
383
76
:param base_tree: The original tree check opened, used to detect duplicate
385
78
:param tree: The tree to schedule for checking.
386
79
:param needed_refs: Refs we are accumulating.
387
:param to_unlock: The unlock list accumulating.
80
:param exit_stack: The exit stack accumulating.
389
82
if base_tree is not None and tree.basedir == base_tree.basedir:
391
note("Checking working tree at '%s'." % (tree.basedir,))
393
to_unlock.append(tree)
84
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
85
exit_stack.enter_context(tree.lock_read())
394
86
tree_refs = tree._get_check_refs()
395
87
for ref in tree_refs:
396
88
reflist = needed_refs.setdefault(ref, [])