1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
3
4
# This program is free software; you can redistribute it and/or modify
4
5
# it under the terms of the GNU General Public License as published by
5
6
# the Free Software Foundation; either version 2 of the License, or
6
7
# (at your option) any later version.
8
9
# This program is distributed in the hope that it will be useful,
9
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
12
# GNU General Public License for more details.
13
14
# You should have received a copy of the GNU General Public License
14
15
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Perhaps have a way to record errors other than by raising exceptions;
18
# would perhaps be enough to accumulate exception objects in a list without
19
# raising them. If there's more than one exception it'd be good to see them
22
"""Checking of bzr objects.
24
check_refs is a concept used for optimising check. Objects that depend on other
25
objects (e.g. tree on repository) can list the objects they would be requesting
26
so that when the dependent object is checked, matches can be pulled out and
27
evaluated in-line rather than re-reading the same data many times.
28
check_refs are tuples (kind, value). Currently defined kinds are:
30
* 'trees', where value is a revid and the looked up objects are revision trees.
31
* 'lefthand-distance', where value is a revid and the looked up objects are the
32
distance along the lefthand path to NULL for that revid.
33
* 'revision-existence', where value is a revid, and the result is True or False
34
indicating that the revision was found/not found.
37
from __future__ import absolute_import
43
from .controldir import ControlDir
44
from .trace import note
45
from .i18n import gettext
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Check ancestries are correct for every revision: includes
19
# every committed so far, and in a reasonable order.
21
# TODO: Also check non-mainline revisions mentioned as parents.
23
# TODO: Check for extra files in the control directory.
25
# TODO: Check revision, inventory and entry objects have all
28
# TODO: Get every revision in the revision-store even if they're not
29
# referenced by history and make sure they're all valid.
32
from bzrlib.trace import note, warning
33
from bzrlib.osutils import rename, sha_string, fingerprint_file
34
from bzrlib.trace import mutter
35
from bzrlib.errors import BzrCheckError, NoSuchRevision
36
from bzrlib.inventory import ROOT_ID
37
from bzrlib.branch import gen_root_id
48
40
class Check(object):
49
"""Check a repository"""
51
def __init__(self, repository, check_repo=True):
52
self.repository = repository
43
def __init__(self, branch):
45
self.checked_text_cnt = 0
46
self.checked_rev_cnt = 0
48
self.repeated_text_cnt = 0
49
self.missing_parent_links = {}
50
self.missing_inventory_sha_cnt = 0
51
self.missing_revision_cnt = 0
52
# maps (file-id, version) -> sha1
53
self.checked_texts = {}
56
self.branch.lock_read()
58
self.history = self.branch.revision_history()
59
if not len(self.history):
62
self.planned_revisions = self.branch.get_ancestry(self.history[-1])
63
self.planned_revisions.remove(None)
66
self.progress = bzrlib.ui.ui_factory.progress_bar()
67
while revno < len(self.planned_revisions):
68
rev_id = self.planned_revisions[revno]
69
self.progress.update('checking revision', revno,
70
len(self.planned_revisions))
72
self.check_one_rev(rev_id)
54
77
def report_results(self, verbose):
55
raise NotImplementedError(self.report_results)
58
def scan_branch(branch, needed_refs, exit_stack):
59
"""Scan a branch for refs.
61
:param branch: The branch to schedule for checking.
62
:param needed_refs: Refs we are accumulating.
63
:param exit_stack: The exit stack accumulating.
65
note(gettext("Checking branch at '%s'.") % (branch.base,))
66
exit_stack.enter_context(branch.lock_read())
67
branch_refs = branch._get_check_refs()
68
for ref in branch_refs:
69
reflist = needed_refs.setdefault(ref, [])
70
reflist.append(branch)
73
def scan_tree(base_tree, tree, needed_refs, exit_stack):
74
"""Scan a tree for refs.
76
:param base_tree: The original tree check opened, used to detect duplicate
78
:param tree: The tree to schedule for checking.
79
:param needed_refs: Refs we are accumulating.
80
:param exit_stack: The exit stack accumulating.
82
if base_tree is not None and tree.basedir == base_tree.basedir:
84
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
85
exit_stack.enter_context(tree.lock_read())
86
tree_refs = tree._get_check_refs()
88
reflist = needed_refs.setdefault(ref, [])
92
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
93
"""Check multiple objects.
95
If errors occur they are accumulated and reported as far as possible, and
96
an exception raised at the end of the process.
99
base_tree, branch, repo, relpath = \
100
ControlDir.open_containing_tree_branch_or_repository(path)
101
except errors.NotBranchError:
102
base_tree = branch = repo = None
104
with cleanup.ExitStack() as exit_stack:
106
if base_tree is not None:
107
# If the tree is a lightweight checkout we won't see it in
108
# repo.find_branches - add now.
110
scan_tree(None, base_tree, needed_refs, exit_stack)
111
branch = base_tree.branch
112
if branch is not None:
115
# The branch is in a shared repository
116
repo = branch.repository
118
exit_stack.enter_context(repo.lock_read())
119
branches = list(repo.find_branches(using=True))
121
if do_branch or do_tree:
122
for branch in branches:
125
tree = branch.controldir.open_workingtree()
127
except (errors.NotLocalUrl, errors.NoWorkingTree):
130
scan_tree(base_tree, tree, needed_refs, exit_stack)
132
scan_branch(branch, needed_refs, exit_stack)
133
if do_branch and not branches:
134
note(gettext("No branch found at specified location."))
135
if do_tree and base_tree is None and not saw_tree:
136
note(gettext("No working tree found at specified location."))
137
if do_repo or do_branch or do_tree:
139
note(gettext("Checking repository at '%s'.")
141
result = repo.check(None, callback_refs=needed_refs,
143
result.report_results(verbose)
146
note(gettext("No working tree found at specified location."))
148
note(gettext("No branch found at specified location."))
150
note(gettext("No repository found at specified location."))
78
note('checked branch %s format %d',
80
self.branch._branch_format)
82
note('%6d revisions', self.checked_rev_cnt)
83
note('%6d unique file texts', self.checked_text_cnt)
84
note('%6d repeated file texts', self.repeated_text_cnt)
85
if self.missing_inventory_sha_cnt:
86
note('%6d revisions are missing inventory_sha1',
87
self.missing_inventory_sha_cnt)
88
if self.missing_revision_cnt:
89
note('%6d revisions are mentioned but not present',
90
self.missing_revision_cnt)
92
note('%6d ghost revisions', len(self.ghosts))
94
for ghost in self.ghosts:
96
if len(self.missing_parent_links):
97
note('%6d revisions missing parents in ancestry',
98
len(self.missing_parent_links))
100
for link, linkers in self.missing_parent_links.items():
101
note(' %s should be in the ancestry for:', link)
102
for linker in linkers:
103
note(' * %s', linker)
105
def check_one_rev(self, rev_id):
106
"""Check one revision.
108
rev_id - the one to check
110
last_rev_id - the previous one on the mainline, if any.
113
# mutter(' revision {%s}', rev_id)
116
rev_history_position = self.history.index(rev_id)
118
rev_history_position = None
120
if rev_history_position:
121
rev = branch.get_revision(rev_id)
122
if rev_history_position > 0:
123
last_rev_id = self.history[rev_history_position - 1]
125
rev = branch.get_revision(rev_id)
127
if rev.revision_id != rev_id:
128
raise BzrCheckError('wrong internal revision id in revision {%s}'
131
# check the previous history entry is a parent of this entry
133
if last_rev_id is not None:
134
for parent_id in rev.parent_ids:
135
if parent_id == last_rev_id:
138
raise BzrCheckError("previous revision {%s} not listed among "
140
% (last_rev_id, rev_id))
141
for parent in rev.parent_ids:
142
if not parent in self.planned_revisions:
143
missing_links = self.missing_parent_links.get(parent, [])
144
missing_links.append(rev_id)
145
self.missing_parent_links[parent] = missing_links
146
# list based so slow, TODO have a planned_revisions list and set.
147
if self.branch.has_revision(parent):
148
missing_ancestry = self.branch.get_ancestry(parent)
149
for missing in missing_ancestry:
150
if (missing is not None
151
and missing not in self.planned_revisions):
152
self.planned_revisions.append(missing)
154
self.ghosts.append(rev_id)
156
raise BzrCheckError("revision {%s} has no parents listed "
157
"but preceded by {%s}"
158
% (rev_id, last_rev_id))
160
if rev.inventory_sha1:
161
inv_sha1 = branch.get_inventory_sha1(rev_id)
162
if inv_sha1 != rev.inventory_sha1:
163
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
164
' value in revision {%s}' % rev_id)
166
missing_inventory_sha_cnt += 1
167
mutter("no inventory_sha1 on revision {%s}", rev_id)
168
self._check_revision_tree(rev_id)
169
self.checked_rev_cnt += 1
171
def _check_revision_tree(self, rev_id):
172
tree = self.branch.revision_tree(rev_id)
176
if file_id in seen_ids:
177
raise BzrCheckError('duplicated file_id {%s} '
178
'in inventory for revision {%s}'
180
seen_ids[file_id] = True
183
ie.check(self, rev_id, inv, tree)
185
for path, ie in inv.iter_entries():
186
if path in seen_names:
187
raise BzrCheckError('duplicated path %s '
188
'in inventory for revision {%s}'
190
seen_names[path] = True
193
def check(branch, verbose):
194
"""Run consistency checks on a branch."""
195
checker = Check(branch)
197
checker.report_results(verbose)