1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
"""Checking of bzr objects.
37
check_refs is a concept used for optimising check. Objects that depend on other
38
objects (e.g. tree on repository) can list the objects they would be requesting
39
so that when the dependent object is checked, matches can be pulled out and
40
evaluated in-line rather than re-reading the same data many times.
41
check_refs are tuples (kind, value). Currently defined kinds are:
43
* 'trees', where value is a revid and the looked up objects are revision trees.
44
* 'lefthand-distance', where value is a revid and the looked up objects are the
45
distance along the lefthand path to NULL for that revid.
46
* 'revision-existence', where value is a revid, and the result is True or False
47
indicating that the revision was found/not found.
50
from __future__ import absolute_import
56
from .branch import Branch
57
from .controldir import ControlDir
58
from .revision import NULL_REVISION
62
from .trace import note
63
from .workingtree import WorkingTree
64
from .i18n import gettext
67
"""Check a repository"""
69
def __init__(self, repository, check_repo=True):
70
self.repository = repository
72
def report_results(self, verbose):
73
raise NotImplementedError(self.report_results)
76
class VersionedFileCheck(Check):
77
"""Check a versioned file repository"""
79
# The Check object interacts with InventoryEntry.check, etc.
81
def __init__(self, repository, check_repo=True):
82
self.repository = repository
83
self.checked_rev_cnt = 0
85
self.missing_parent_links = {}
86
self.missing_inventory_sha_cnt = 0
87
self.missing_revision_cnt = 0
88
self.checked_weaves = set()
89
self.unreferenced_versions = set()
90
self.inconsistent_parents = []
91
self.rich_roots = repository.supports_rich_root()
92
self.text_key_references = {}
93
self.check_repo = check_repo
94
self.other_results = []
95
# Plain text lines to include in the report
96
self._report_items = []
97
# Keys we are looking for; may be large and need spilling to disk.
98
# key->(type(revision/inventory/text/signature/map), sha1, first-referer)
99
self.pending_keys = {}
100
# Ancestors map for all of revisions being checked; while large helper
101
# functions we call would create it anyway, so better to have once and
105
def check(self, callback_refs=None, check_repo=True):
106
if callback_refs is None:
108
self.repository.lock_read()
109
self.progress = ui.ui_factory.nested_progress_bar()
111
self.progress.update(gettext('check'), 0, 4)
113
self.progress.update(gettext('checking revisions'), 0)
114
self.check_revisions()
115
self.progress.update(gettext('checking commit contents'), 1)
116
self.repository._check_inventories(self)
117
self.progress.update(gettext('checking file graphs'), 2)
118
# check_weaves is done after the revision scan so that
119
# revision index is known to be valid.
121
self.progress.update(gettext('checking branches and trees'), 3)
123
repo = self.repository
124
# calculate all refs, and callback the objects requesting them.
126
wanting_items = set()
127
# Current crude version calculates everything and calls
128
# everything at once. Doing a queue and popping as things are
129
# satisfied would be cheaper on memory [but few people have
130
# huge numbers of working trees today. TODO: fix before
134
for ref, wantlist in viewitems(callback_refs):
135
wanting_items.update(wantlist)
138
refs[ref] = repo.revision_tree(value)
139
elif kind == 'lefthand-distance':
141
elif kind == 'revision-existence':
142
existences.add(value)
144
raise AssertionError(
145
'unknown ref kind for ref %s' % ref)
146
node_distances = repo.get_graph().find_lefthand_distances(distances)
147
for key, distance in viewitems(node_distances):
148
refs[('lefthand-distance', key)] = distance
149
if key in existences and distance > 0:
150
refs[('revision-existence', key)] = True
151
existences.remove(key)
152
parent_map = repo.get_graph().get_parent_map(existences)
153
for key in parent_map:
154
refs[('revision-existence', key)] = True
155
existences.remove(key)
156
for key in existences:
157
refs[('revision-existence', key)] = False
158
for item in wanting_items:
159
if isinstance(item, WorkingTree):
161
if isinstance(item, Branch):
162
self.other_results.append(item.check(refs))
164
self.progress.finished()
165
self.repository.unlock()
167
def _check_revisions(self, revisions_iterator):
168
"""Check revision objects by decorating a generator.
170
:param revisions_iterator: An iterator of(revid, Revision-or-None).
171
:return: A generator of the contents of revisions_iterator.
173
self.planned_revisions = set()
174
for revid, revision in revisions_iterator:
175
yield revid, revision
176
self._check_one_rev(revid, revision)
177
# Flatten the revisions we found to guarantee consistent later
179
self.planned_revisions = list(self.planned_revisions)
180
# TODO: extract digital signatures as items to callback on too.
182
def check_revisions(self):
183
"""Scan revisions, checking data directly available as we go."""
184
revision_iterator = self.repository._iter_revisions(None)
185
revision_iterator = self._check_revisions(revision_iterator)
186
# We read the all revisions here:
187
# - doing this allows later code to depend on the revision index.
188
# - we can fill out existence flags at this point
189
# - we can read the revision inventory sha at this point
190
# - we can check properties and serialisers etc.
191
if not self.repository._format.revision_graph_can_have_wrong_parents:
192
# The check against the index isn't needed.
193
self.revs_with_bad_parents_in_index = None
194
for thing in revision_iterator:
197
bad_revisions = self.repository._find_inconsistent_revision_parents(
199
self.revs_with_bad_parents_in_index = list(bad_revisions)
201
def report_results(self, verbose):
203
self._report_repo_results(verbose)
204
for result in self.other_results:
205
result.report_results(verbose)
207
def _report_repo_results(self, verbose):
208
note(gettext('checked repository {0} format {1}').format(
209
self.repository.user_url,
210
self.repository._format))
211
note(gettext('%6d revisions'), self.checked_rev_cnt)
212
note(gettext('%6d file-ids'), len(self.checked_weaves))
214
note(gettext('%6d unreferenced text versions'),
215
len(self.unreferenced_versions))
216
if verbose and len(self.unreferenced_versions):
217
for file_id, revision_id in self.unreferenced_versions:
218
note(gettext('unreferenced version: {{{0}}} in {1}').format(revision_id,
220
if self.missing_inventory_sha_cnt:
221
note(gettext('%6d revisions are missing inventory_sha1'),
222
self.missing_inventory_sha_cnt)
223
if self.missing_revision_cnt:
224
note(gettext('%6d revisions are mentioned but not present'),
225
self.missing_revision_cnt)
227
note(gettext('%6d ghost revisions'), len(self.ghosts))
229
for ghost in self.ghosts:
231
if len(self.missing_parent_links):
232
note(gettext('%6d revisions missing parents in ancestry'),
233
len(self.missing_parent_links))
235
for link, linkers in viewitems(self.missing_parent_links):
236
note(gettext(' %s should be in the ancestry for:'), link)
237
for linker in linkers:
238
note(' * %s', linker)
239
if len(self.inconsistent_parents):
240
note(gettext('%6d inconsistent parents'), len(self.inconsistent_parents))
242
for info in self.inconsistent_parents:
243
revision_id, file_id, found_parents, correct_parents = info
244
note(gettext(' * {0} version {1} has parents {2!r} '
245
'but should have {3!r}').format(
246
file_id, revision_id, found_parents,
248
if self.revs_with_bad_parents_in_index:
250
'%6d revisions have incorrect parents in the revision index'),
251
len(self.revs_with_bad_parents_in_index))
253
for item in self.revs_with_bad_parents_in_index:
254
revision_id, index_parents, actual_parents = item
256
' {0} has wrong parents in index: '
257
'{1!r} should be {2!r}').format(
258
revision_id, index_parents, actual_parents))
259
for item in self._report_items:
262
def _check_one_rev(self, rev_id, rev):
263
"""Cross-check one revision.
265
:param rev_id: A revision id to check.
266
:param rev: A revision or None to indicate a missing revision.
268
if rev.revision_id != rev_id:
269
self._report_items.append(gettext(
270
'Mismatched internal revid {{{0}}} and index revid {{{1}}}').format(
271
rev.revision_id, rev_id))
272
rev_id = rev.revision_id
273
# Check this revision tree etc, and count as seen when we encounter a
275
self.planned_revisions.add(rev_id)
277
self.ghosts.discard(rev_id)
278
# Count all parents as ghosts if we haven't seen them yet.
279
for parent in rev.parent_ids:
280
if not parent in self.planned_revisions:
281
self.ghosts.add(parent)
283
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
284
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
286
self.checked_rev_cnt += 1
288
def add_pending_item(self, referer, key, kind, sha1):
289
"""Add a reference to a sha1 to be cross checked against a key.
291
:param referer: The referer that expects key to have sha1.
292
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
293
:param kind: revision/inventory/text/map/signature
294
:param sha1: A hex sha1 or None if no sha1 is known.
296
existing = self.pending_keys.get(key)
298
if sha1 != existing[1]:
299
self._report_items.append(gettext('Multiple expected sha1s for {0}. {{{1}}}'
300
' expects {{{2}}}, {{{3}}} expects {{{4}}}').format(
301
key, referer, sha1, existing[1], existing[0]))
303
self.pending_keys[key] = (kind, sha1, referer)
305
def check_weaves(self):
306
"""Check all the weaves we can get our hands on.
309
storebar = ui.ui_factory.nested_progress_bar()
311
self._check_weaves(storebar)
315
def _check_weaves(self, storebar):
316
storebar.update('text-index', 0, 2)
317
if self.repository._format.fast_deltas:
318
# We haven't considered every fileid instance so far.
319
weave_checker = self.repository._get_versioned_file_checker(
320
ancestors=self.ancestors)
322
weave_checker = self.repository._get_versioned_file_checker(
323
text_key_references=self.text_key_references,
324
ancestors=self.ancestors)
325
storebar.update('file-graph', 1)
326
wrongs, unused_versions = weave_checker.check_file_version_parents(
327
self.repository.texts)
328
self.checked_weaves = weave_checker.file_ids
329
for text_key, (stored_parents, correct_parents) in viewitems(wrongs):
330
# XXX not ready for id join/split operations.
331
weave_id = text_key[0]
332
revision_id = text_key[-1]
333
weave_parents = tuple([parent[-1] for parent in stored_parents])
334
correct_parents = tuple([parent[-1] for parent in correct_parents])
335
self.inconsistent_parents.append(
336
(revision_id, weave_id, weave_parents, correct_parents))
337
self.unreferenced_versions.update(unused_versions)
339
def _add_entry_to_text_key_references(self, inv, entry):
340
if not self.rich_roots and entry.name == '':
342
key = (entry.file_id, entry.revision)
343
self.text_key_references.setdefault(key, False)
344
if entry.revision == inv.revision_id:
345
self.text_key_references[key] = True
348
def scan_branch(branch, needed_refs, to_unlock):
349
"""Scan a branch for refs.
351
:param branch: The branch to schedule for checking.
352
:param needed_refs: Refs we are accumulating.
353
:param to_unlock: The unlock list accumulating.
355
note(gettext("Checking branch at '%s'.") % (branch.base,))
357
to_unlock.append(branch)
358
branch_refs = branch._get_check_refs()
359
for ref in branch_refs:
360
reflist = needed_refs.setdefault(ref, [])
361
reflist.append(branch)
364
def scan_tree(base_tree, tree, needed_refs, to_unlock):
365
"""Scan a tree for refs.
367
:param base_tree: The original tree check opened, used to detect duplicate
369
:param tree: The tree to schedule for checking.
370
:param needed_refs: Refs we are accumulating.
371
:param to_unlock: The unlock list accumulating.
373
if base_tree is not None and tree.basedir == base_tree.basedir:
375
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
377
to_unlock.append(tree)
378
tree_refs = tree._get_check_refs()
379
for ref in tree_refs:
380
reflist = needed_refs.setdefault(ref, [])
384
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
385
"""Check multiple objects.
387
If errors occur they are accumulated and reported as far as possible, and
388
an exception raised at the end of the process.
391
base_tree, branch, repo, relpath = \
392
ControlDir.open_containing_tree_branch_or_repository(path)
393
except errors.NotBranchError:
394
base_tree = branch = repo = None
399
if base_tree is not None:
400
# If the tree is a lightweight checkout we won't see it in
401
# repo.find_branches - add now.
403
scan_tree(None, base_tree, needed_refs, to_unlock)
404
branch = base_tree.branch
405
if branch is not None:
408
# The branch is in a shared repository
409
repo = branch.repository
412
to_unlock.append(repo)
413
branches = repo.find_branches(using=True)
415
if do_branch or do_tree:
416
for branch in branches:
419
tree = branch.bzrdir.open_workingtree()
421
except (errors.NotLocalUrl, errors.NoWorkingTree):
424
scan_tree(base_tree, tree, needed_refs, to_unlock)
426
scan_branch(branch, needed_refs, to_unlock)
427
if do_branch and not branches:
428
note(gettext("No branch found at specified location."))
429
if do_tree and base_tree is None and not saw_tree:
430
note(gettext("No working tree found at specified location."))
431
if do_repo or do_branch or do_tree:
433
note(gettext("Checking repository at '%s'.")
435
result = repo.check(None, callback_refs=needed_refs,
437
result.report_results(verbose)
440
note(gettext("No working tree found at specified location."))
442
note(gettext("No branch found at specified location."))
444
note(gettext("No repository found at specified location."))
446
for thing in to_unlock: