1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
3
4
# This program is free software; you can redistribute it and/or modify
4
5
# it under the terms of the GNU General Public License as published by
5
6
# the Free Software Foundation; either version 2 of the License, or
6
7
# (at your option) any later version.
8
9
# This program is distributed in the hope that it will be useful,
9
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
12
# GNU General Public License for more details.
13
14
# You should have received a copy of the GNU General Public License
14
15
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Perhaps have a way to record errors other than by raising exceptions;
18
# would perhaps be enough to accumulate exception objects in a list without
19
# raising them. If there's more than one exception it'd be good to see them
22
"""Checking of bzr objects.
24
check_refs is a concept used for optimising check. Objects that depend on other
25
objects (e.g. tree on repository) can list the objects they would be requesting
26
so that when the dependent object is checked, matches can be pulled out and
27
evaluated in-line rather than re-reading the same data many times.
28
check_refs are tuples (kind, value). Currently defined kinds are:
30
* 'trees', where value is a revid and the looked up objects are revision trees.
31
* 'lefthand-distance', where value is a revid and the looked up objects are the
32
distance along the lefthand path to NULL for that revid.
33
* 'revision-existence', where value is a revid, and the result is True or False
34
indicating that the revision was found/not found.
37
from __future__ import absolute_import
43
from .controldir import ControlDir
44
from .trace import note
45
from .i18n import gettext
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Check ancestries are correct for every revision: includes
19
# every committed so far, and in a reasonable order.
21
# TODO: Also check non-mainline revisions mentioned as parents.
23
# TODO: Check for extra files in the control directory.
25
# TODO: Check revision, inventory and entry objects have all
30
from bzrlib.trace import note, warning
31
from bzrlib.osutils import rename, sha_string, fingerprint_file
32
from bzrlib.trace import mutter
33
from bzrlib.errors import BzrCheckError, NoSuchRevision
34
from bzrlib.inventory import ROOT_ID
35
from bzrlib.branch import gen_root_id
48
38
class Check(object):
49
"""Check a repository"""
51
def __init__(self, repository, check_repo=True):
52
self.repository = repository
41
def __init__(self, branch):
43
self.checked_text_cnt = 0
44
self.checked_rev_cnt = 0
46
self.repeated_text_cnt = 0
47
self.missing_parent_links = {}
48
self.missing_inventory_sha_cnt = 0
49
self.missing_revision_cnt = 0
50
# maps (file-id, version) -> sha1
51
self.checked_texts = {}
54
self.branch.lock_read()
56
self.history = self.branch.revision_history()
57
if not len(self.history):
60
self.planned_revisions = self.branch.get_ancestry(self.history[-1])
61
self.planned_revisions.remove(None)
64
self.progress = bzrlib.ui.ui_factory.progress_bar()
65
while revno < len(self.planned_revisions):
66
rev_id = self.planned_revisions[revno]
67
self.progress.update('checking revision', revno,
68
len(self.planned_revisions))
70
self.check_one_rev(rev_id)
54
75
def report_results(self, verbose):
55
raise NotImplementedError(self.report_results)
58
def scan_branch(branch, needed_refs, exit_stack):
59
"""Scan a branch for refs.
61
:param branch: The branch to schedule for checking.
62
:param needed_refs: Refs we are accumulating.
63
:param exit_stack: The exit stack accumulating.
65
note(gettext("Checking branch at '%s'.") % (branch.base,))
66
exit_stack.enter_context(branch.lock_read())
67
branch_refs = branch._get_check_refs()
68
for ref in branch_refs:
69
reflist = needed_refs.setdefault(ref, [])
70
reflist.append(branch)
73
def scan_tree(base_tree, tree, needed_refs, exit_stack):
74
"""Scan a tree for refs.
76
:param base_tree: The original tree check opened, used to detect duplicate
78
:param tree: The tree to schedule for checking.
79
:param needed_refs: Refs we are accumulating.
80
:param exit_stack: The exit stack accumulating.
82
if base_tree is not None and tree.basedir == base_tree.basedir:
84
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
85
exit_stack.enter_context(tree.lock_read())
86
tree_refs = tree._get_check_refs()
88
reflist = needed_refs.setdefault(ref, [])
92
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
93
"""Check multiple objects.
95
If errors occur they are accumulated and reported as far as possible, and
96
an exception raised at the end of the process.
99
base_tree, branch, repo, relpath = \
100
ControlDir.open_containing_tree_branch_or_repository(path)
101
except errors.NotBranchError:
102
base_tree = branch = repo = None
104
with cleanup.ExitStack() as exit_stack:
106
if base_tree is not None:
107
# If the tree is a lightweight checkout we won't see it in
108
# repo.find_branches - add now.
110
scan_tree(None, base_tree, needed_refs, exit_stack)
111
branch = base_tree.branch
112
if branch is not None:
115
# The branch is in a shared repository
116
repo = branch.repository
118
exit_stack.enter_context(repo.lock_read())
119
branches = list(repo.find_branches(using=True))
121
if do_branch or do_tree:
122
for branch in branches:
125
tree = branch.controldir.open_workingtree()
127
except (errors.NotLocalUrl, errors.NoWorkingTree):
130
scan_tree(base_tree, tree, needed_refs, exit_stack)
132
scan_branch(branch, needed_refs, exit_stack)
133
if do_branch and not branches:
134
note(gettext("No branch found at specified location."))
135
if do_tree and base_tree is None and not saw_tree:
136
note(gettext("No working tree found at specified location."))
137
if do_repo or do_branch or do_tree:
139
note(gettext("Checking repository at '%s'.")
141
result = repo.check(None, callback_refs=needed_refs,
143
result.report_results(verbose)
146
note(gettext("No working tree found at specified location."))
148
note(gettext("No branch found at specified location."))
150
note(gettext("No repository found at specified location."))
76
note('checked branch %s format %d',
78
self.branch._branch_format)
80
note('%6d revisions', self.checked_rev_cnt)
81
note('%6d unique file texts', self.checked_text_cnt)
82
note('%6d repeated file texts', self.repeated_text_cnt)
83
if self.missing_inventory_sha_cnt:
84
note('%6d revisions are missing inventory_sha1',
85
self.missing_inventory_sha_cnt)
86
if self.missing_revision_cnt:
87
note('%6d revisions are mentioned but not present',
88
self.missing_revision_cnt)
90
note('%6d ghost revisions', len(self.ghosts))
92
for ghost in self.ghosts:
94
if len(self.missing_parent_links):
95
note('%6d revisions missing parents in ancestry',
96
len(self.missing_parent_links))
98
for link, linkers in self.missing_parent_links.items():
99
note(' %s should be in the ancestry for:', link)
100
for linker in linkers:
101
note(' * %s', linker)
103
def check_one_rev(self, rev_id):
104
"""Check one revision.
106
rev_id - the one to check
108
last_rev_id - the previous one on the mainline, if any.
111
# mutter(' revision {%s}' % rev_id)
114
rev_history_position = self.history.index(rev_id)
116
rev_history_position = None
118
if rev_history_position:
119
rev = branch.get_revision(rev_id)
120
if rev_history_position > 0:
121
last_rev_id = self.history[rev_history_position - 1]
123
rev = branch.get_revision(rev_id)
125
if rev.revision_id != rev_id:
126
raise BzrCheckError('wrong internal revision id in revision {%s}'
129
# check the previous history entry is a parent of this entry
131
if last_rev_id is None and rev_history_position is not None:
132
# what if the start is a ghost ? i.e. conceptually the
134
raise BzrCheckError("revision {%s} has %d parents, but is the "
135
"start of the branch"
136
% (rev_id, len(rev.parent_ids)))
137
if last_rev_id is not None:
138
for parent_id in rev.parent_ids:
139
if parent_id == last_rev_id:
142
raise BzrCheckError("previous revision {%s} not listed among "
144
% (last_rev_id, rev_id))
145
for parent in rev.parent_ids:
146
if not parent in self.planned_revisions:
147
missing_links = self.missing_parent_links.get(parent, [])
148
missing_links.append(rev_id)
149
self.missing_parent_links[parent] = missing_links
150
# list based so slow, TODO have a planned_revisions list and set.
151
if self.branch.has_revision(parent):
152
missing_ancestry = self.branch.get_ancestry(parent)
153
for missing in missing_ancestry:
154
if (missing is not None
155
and missing not in self.planned_revisions):
156
self.planned_revisions.append(missing)
158
self.ghosts.append(rev_id)
160
raise BzrCheckError("revision {%s} has no parents listed "
161
"but preceded by {%s}"
162
% (rev_id, last_rev_id))
164
if rev.inventory_sha1:
165
inv_sha1 = branch.get_inventory_sha1(rev_id)
166
if inv_sha1 != rev.inventory_sha1:
167
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
168
' value in revision {%s}' % rev_id)
170
missing_inventory_sha_cnt += 1
171
mutter("no inventory_sha1 on revision {%s}" % rev_id)
172
self._check_revision_tree(rev_id)
173
self.checked_rev_cnt += 1
175
def _check_revision_tree(self, rev_id):
176
tree = self.branch.revision_tree(rev_id)
180
if file_id in seen_ids:
181
raise BzrCheckError('duplicated file_id {%s} '
182
'in inventory for revision {%s}'
184
seen_ids[file_id] = True
187
ie.check(self, rev_id, inv, tree)
189
for path, ie in inv.iter_entries():
190
if path in seen_names:
191
raise BzrCheckError('duplicated path %s '
192
'in inventory for revision {%s}'
194
seen_names[path] = True
197
def check(branch, verbose):
198
"""Run consistency checks on a branch."""
199
checker = Check(branch)
201
checker.report_results(verbose)