1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
"""Checking of bzr objects.
37
check_refs is a concept used for optimising check. Objects that depend on other
38
objects (e.g. tree on repository) can list the objects they would be requesting
39
so that when the dependent object is checked, matches can be pulled out and
40
evaluated in-line rather than re-reading the same data many times.
41
check_refs are tuples (kind, value). Currently defined kinds are:
43
* 'trees', where value is a revid and the looked up objects are revision trees.
44
* 'lefthand-distance', where value is a revid and the looked up objects are the
45
distance along the lefthand path to NULL for that revid.
46
* 'revision-existence', where value is a revid, and the result is True or False
47
indicating that the revision was found/not found.
50
from __future__ import absolute_import
56
from .branch import Branch
57
from .controldir import ControlDir
58
from .revision import NULL_REVISION
62
from .trace import note
63
from .workingtree import WorkingTree
64
from .i18n import gettext
67
"""Check a repository"""
69
def __init__(self, repository, check_repo=True):
70
self.repository = repository
72
def report_results(self, verbose):
73
raise NotImplementedError(self.report_results)
76
class VersionedFileCheck(Check):
77
"""Check a versioned file repository"""
79
# The Check object interacts with InventoryEntry.check, etc.
81
def __init__(self, repository, check_repo=True):
82
self.repository = repository
83
self.checked_rev_cnt = 0
85
self.missing_parent_links = {}
86
self.missing_inventory_sha_cnt = 0
87
self.missing_revision_cnt = 0
88
self.checked_weaves = set()
89
self.unreferenced_versions = set()
90
self.inconsistent_parents = []
91
self.rich_roots = repository.supports_rich_root()
92
self.text_key_references = {}
93
self.check_repo = check_repo
94
self.other_results = []
95
# Plain text lines to include in the report
96
self._report_items = []
97
# Keys we are looking for; may be large and need spilling to disk.
98
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
99
self.pending_keys = {}
100
# Ancestors map for all of revisions being checked; while large helper
101
# functions we call would create it anyway, so better to have once and
105
def check(self, callback_refs=None, check_repo=True):
106
if callback_refs is None:
108
self.repository.lock_read()
109
self.progress = ui.ui_factory.nested_progress_bar()
111
self.progress.update(gettext('check'), 0, 4)
113
self.progress.update(gettext('checking revisions'), 0)
114
self.check_revisions()
115
self.progress.update(gettext('checking commit contents'), 1)
116
self.repository._check_inventories(self)
117
self.progress.update(gettext('checking file graphs'), 2)
118
# check_weaves is done after the revision scan so that
119
# revision index is known to be valid.
121
self.progress.update(gettext('checking branches and trees'), 3)
123
repo = self.repository
124
# calculate all refs, and callback the objects requesting them.
126
wanting_items = set()
127
# Current crude version calculates everything and calls
128
# everything at once. Doing a queue and popping as things are
129
# satisfied would be cheaper on memory [but few people have
130
# huge numbers of working trees today. TODO: fix before
134
for ref, wantlist in viewitems(callback_refs):
135
wanting_items.update(wantlist)
138
refs[ref] = repo.revision_tree(value)
139
elif kind == 'lefthand-distance':
141
elif kind == 'revision-existence':
142
existences.add(value)
144
raise AssertionError(
145
'unknown ref kind for ref %s' % ref)
146
node_distances = repo.get_graph().find_lefthand_distances(distances)
147
for key, distance in viewitems(node_distances):
148
refs[('lefthand-distance', key)] = distance
149
if key in existences and distance > 0:
150
refs[('revision-existence', key)] = True
151
existences.remove(key)
152
parent_map = repo.get_graph().get_parent_map(existences)
153
for key in parent_map:
154
refs[('revision-existence', key)] = True
155
existences.remove(key)
156
for key in existences:
157
refs[('revision-existence', key)] = False
158
for item in wanting_items:
159
if isinstance(item, WorkingTree):
161
if isinstance(item, Branch):
162
self.other_results.append(item.check(refs))
164
self.progress.finished()
165
self.repository.unlock()
167
def _check_revisions(self, revisions_iterator):
168
"""Check revision objects by decorating a generator.
170
:param revisions_iterator: An iterator of(revid, Revision-or-None).
171
:return: A generator of the contents of revisions_iterator.
173
self.planned_revisions = set()
174
for revid, revision in revisions_iterator:
175
yield revid, revision
176
self._check_one_rev(revid, revision)
177
# Flatten the revisions we found to guarantee consistent later
179
self.planned_revisions = list(self.planned_revisions)
180
# TODO: extract digital signatures as items to callback on too.
182
def check_revisions(self):
183
"""Scan revisions, checking data directly available as we go."""
184
revision_iterator = self.repository.iter_revisions(
185
self.repository.all_revision_ids())
186
revision_iterator = self._check_revisions(revision_iterator)
187
# We read the all revisions here:
188
# - doing this allows later code to depend on the revision index.
189
# - we can fill out existence flags at this point
190
# - we can read the revision inventory sha at this point
191
# - we can check properties and serialisers etc.
192
if not self.repository._format.revision_graph_can_have_wrong_parents:
193
# The check against the index isn't needed.
194
self.revs_with_bad_parents_in_index = None
195
for thing in revision_iterator:
198
bad_revisions = self.repository._find_inconsistent_revision_parents(
200
self.revs_with_bad_parents_in_index = list(bad_revisions)
202
def report_results(self, verbose):
204
self._report_repo_results(verbose)
205
for result in self.other_results:
206
result.report_results(verbose)
208
def _report_repo_results(self, verbose):
209
note(gettext('checked repository {0} format {1}').format(
210
self.repository.user_url,
211
self.repository._format))
212
note(gettext('%6d revisions'), self.checked_rev_cnt)
213
note(gettext('%6d file-ids'), len(self.checked_weaves))
215
note(gettext('%6d unreferenced text versions'),
216
len(self.unreferenced_versions))
217
if verbose and len(self.unreferenced_versions):
218
for file_id, revision_id in self.unreferenced_versions:
219
note(gettext('unreferenced version: {{{0}}} in {1}').format(revision_id,
221
if self.missing_inventory_sha_cnt:
222
note(gettext('%6d revisions are missing inventory_sha1'),
223
self.missing_inventory_sha_cnt)
224
if self.missing_revision_cnt:
225
note(gettext('%6d revisions are mentioned but not present'),
226
self.missing_revision_cnt)
228
note(gettext('%6d ghost revisions'), len(self.ghosts))
230
for ghost in self.ghosts:
232
if len(self.missing_parent_links):
233
note(gettext('%6d revisions missing parents in ancestry'),
234
len(self.missing_parent_links))
236
for link, linkers in viewitems(self.missing_parent_links):
237
note(gettext(' %s should be in the ancestry for:'), link)
238
for linker in linkers:
239
note(' * %s', linker)
240
if len(self.inconsistent_parents):
241
note(gettext('%6d inconsistent parents'), len(self.inconsistent_parents))
243
for info in self.inconsistent_parents:
244
revision_id, file_id, found_parents, correct_parents = info
245
note(gettext(' * {0} version {1} has parents {2!r} '
246
'but should have {3!r}').format(
247
file_id, revision_id, found_parents,
249
if self.revs_with_bad_parents_in_index:
251
'%6d revisions have incorrect parents in the revision index'),
252
len(self.revs_with_bad_parents_in_index))
254
for item in self.revs_with_bad_parents_in_index:
255
revision_id, index_parents, actual_parents = item
257
' {0} has wrong parents in index: '
258
'{1!r} should be {2!r}').format(
259
revision_id, index_parents, actual_parents))
260
for item in self._report_items:
263
def _check_one_rev(self, rev_id, rev):
264
"""Cross-check one revision.
266
:param rev_id: A revision id to check.
267
:param rev: A revision or None to indicate a missing revision.
269
if rev.revision_id != rev_id:
270
self._report_items.append(gettext(
271
'Mismatched internal revid {{{0}}} and index revid {{{1}}}').format(
272
rev.revision_id, rev_id))
273
rev_id = rev.revision_id
274
# Check this revision tree etc, and count as seen when we encounter a
276
self.planned_revisions.add(rev_id)
278
self.ghosts.discard(rev_id)
279
# Count all parents as ghosts if we haven't seen them yet.
280
for parent in rev.parent_ids:
281
if not parent in self.planned_revisions:
282
self.ghosts.add(parent)
284
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
285
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
287
self.checked_rev_cnt += 1
289
def add_pending_item(self, referer, key, kind, sha1):
290
"""Add a reference to a sha1 to be cross checked against a key.
292
:param referer: The referer that expects key to have sha1.
293
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
294
:param kind: revision/inventory/text/map/signature
295
:param sha1: A hex sha1 or None if no sha1 is known.
297
existing = self.pending_keys.get(key)
299
if sha1 != existing[1]:
300
self._report_items.append(gettext('Multiple expected sha1s for {0}. {{{1}}}'
301
' expects {{{2}}}, {{{3}}} expects {{{4}}}').format(
302
key, referer, sha1, existing[1], existing[0]))
304
self.pending_keys[key] = (kind, sha1, referer)
306
def check_weaves(self):
307
"""Check all the weaves we can get our hands on.
310
storebar = ui.ui_factory.nested_progress_bar()
312
self._check_weaves(storebar)
316
def _check_weaves(self, storebar):
317
storebar.update('text-index', 0, 2)
318
if self.repository._format.fast_deltas:
319
# We haven't considered every fileid instance so far.
320
weave_checker = self.repository._get_versioned_file_checker(
321
ancestors=self.ancestors)
323
weave_checker = self.repository._get_versioned_file_checker(
324
text_key_references=self.text_key_references,
325
ancestors=self.ancestors)
326
storebar.update('file-graph', 1)
327
wrongs, unused_versions = weave_checker.check_file_version_parents(
328
self.repository.texts)
329
self.checked_weaves = weave_checker.file_ids
330
for text_key, (stored_parents, correct_parents) in viewitems(wrongs):
331
# XXX not ready for id join/split operations.
332
weave_id = text_key[0]
333
revision_id = text_key[-1]
334
weave_parents = tuple([parent[-1] for parent in stored_parents])
335
correct_parents = tuple([parent[-1] for parent in correct_parents])
336
self.inconsistent_parents.append(
337
(revision_id, weave_id, weave_parents, correct_parents))
338
self.unreferenced_versions.update(unused_versions)
340
def _add_entry_to_text_key_references(self, inv, entry):
341
if not self.rich_roots and entry.name == '':
343
key = (entry.file_id, entry.revision)
344
self.text_key_references.setdefault(key, False)
345
if entry.revision == inv.revision_id:
346
self.text_key_references[key] = True
349
def scan_branch(branch, needed_refs, to_unlock):
350
"""Scan a branch for refs.
352
:param branch: The branch to schedule for checking.
353
:param needed_refs: Refs we are accumulating.
354
:param to_unlock: The unlock list accumulating.
356
note(gettext("Checking branch at '%s'.") % (branch.base,))
358
to_unlock.append(branch)
359
branch_refs = branch._get_check_refs()
360
for ref in branch_refs:
361
reflist = needed_refs.setdefault(ref, [])
362
reflist.append(branch)
365
def scan_tree(base_tree, tree, needed_refs, to_unlock):
366
"""Scan a tree for refs.
368
:param base_tree: The original tree check opened, used to detect duplicate
370
:param tree: The tree to schedule for checking.
371
:param needed_refs: Refs we are accumulating.
372
:param to_unlock: The unlock list accumulating.
374
if base_tree is not None and tree.basedir == base_tree.basedir:
376
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
378
to_unlock.append(tree)
379
tree_refs = tree._get_check_refs()
380
for ref in tree_refs:
381
reflist = needed_refs.setdefault(ref, [])
385
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
386
"""Check multiple objects.
388
If errors occur they are accumulated and reported as far as possible, and
389
an exception raised at the end of the process.
392
base_tree, branch, repo, relpath = \
393
ControlDir.open_containing_tree_branch_or_repository(path)
394
except errors.NotBranchError:
395
base_tree = branch = repo = None
400
if base_tree is not None:
401
# If the tree is a lightweight checkout we won't see it in
402
# repo.find_branches - add now.
404
scan_tree(None, base_tree, needed_refs, to_unlock)
405
branch = base_tree.branch
406
if branch is not None:
409
# The branch is in a shared repository
410
repo = branch.repository
413
to_unlock.append(repo)
414
branches = repo.find_branches(using=True)
416
if do_branch or do_tree:
417
for branch in branches:
420
tree = branch.controldir.open_workingtree()
422
except (errors.NotLocalUrl, errors.NoWorkingTree):
425
scan_tree(base_tree, tree, needed_refs, to_unlock)
427
scan_branch(branch, needed_refs, to_unlock)
428
if do_branch and not branches:
429
note(gettext("No branch found at specified location."))
430
if do_tree and base_tree is None and not saw_tree:
431
note(gettext("No working tree found at specified location."))
432
if do_repo or do_branch or do_tree:
434
note(gettext("Checking repository at '%s'.")
436
result = repo.check(None, callback_refs=needed_refs,
438
result.report_results(verbose)
441
note(gettext("No working tree found at specified location."))
443
note(gettext("No branch found at specified location."))
445
note(gettext("No repository found at specified location."))
447
for thing in to_unlock: