1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
3
4
# This program is free software; you can redistribute it and/or modify
4
5
# it under the terms of the GNU General Public License as published by
5
6
# the Free Software Foundation; either version 2 of the License, or
6
7
# (at your option) any later version.
8
9
# This program is distributed in the hope that it will be useful,
9
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
12
# GNU General Public License for more details.
13
14
# You should have received a copy of the GNU General Public License
14
15
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Perhaps have a way to record errors other than by raising exceptions;
18
# would perhaps be enough to accumulate exception objects in a list without
19
# raising them. If there's more than one exception it'd be good to see them
22
"""Checking of bzr objects.
24
check_refs is a concept used for optimising check. Objects that depend on other
25
objects (e.g. tree on repository) can list the objects they would be requesting
26
so that when the dependent object is checked, matches can be pulled out and
27
evaluated in-line rather than re-reading the same data many times.
28
check_refs are tuples (kind, value). Currently defined kinds are:
30
* 'trees', where value is a revid and the looked up objects are revision trees.
31
* 'lefthand-distance', where value is a revid and the looked up objects are the
32
distance along the lefthand path to NULL for that revid.
33
* 'revision-existence', where value is a revid, and the result is True or False
34
indicating that the revision was found/not found.
37
from __future__ import absolute_import
43
from .controldir import ControlDir
44
from .trace import note
45
from .i18n import gettext
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Check ancestries are correct for every revision: includes
19
# every committed so far, and in a reasonable order.
21
# TODO: Also check non-mainline revisions mentioned as parents.
23
# TODO: Check for extra files in the control directory.
25
# TODO: Check revision, inventory and entry objects have all
28
# TODO: Get every revision in the revision-store even if they're not
29
# referenced by history and make sure they're all valid.
32
from bzrlib.trace import note, warning
33
from bzrlib.osutils import rename, sha_string, fingerprint_file
34
from bzrlib.trace import mutter
35
from bzrlib.errors import BzrCheckError, NoSuchRevision
36
from bzrlib.inventory import ROOT_ID
48
39
class Check(object):
49
"""Check a repository"""
51
def __init__(self, repository, check_repo=True):
52
self.repository = repository
42
def __init__(self, branch):
44
self.repository = branch.repository
45
self.checked_text_cnt = 0
46
self.checked_rev_cnt = 0
48
self.repeated_text_cnt = 0
49
self.missing_parent_links = {}
50
self.missing_inventory_sha_cnt = 0
51
self.missing_revision_cnt = 0
52
# maps (file-id, version) -> sha1
53
self.checked_texts = {}
54
self.checked_weaves = {}
57
self.branch.lock_read()
58
self.progress = bzrlib.ui.ui_factory.progress_bar()
60
self.progress.update('retrieving inventory', 0, 0)
61
# do not put in init, as it should be done with progess,
62
# and inside the lock.
63
self.inventory_weave = self.branch.repository.get_inventory_weave()
64
self.history = self.branch.revision_history()
65
if not len(self.history):
71
while revno < len(self.planned_revisions):
72
rev_id = self.planned_revisions[revno]
73
self.progress.update('checking revision', revno,
74
len(self.planned_revisions))
76
self.check_one_rev(rev_id)
81
def plan_revisions(self):
82
repository = self.branch.repository
83
if not repository.revision_store.listable():
84
self.planned_revisions = repository.get_ancestry(self.history[-1])
85
self.planned_revisions.remove(None)
86
# FIXME progress bars should support this more nicely.
88
print ("Checking reachable history -"
89
" for a complete check use a local branch.")
92
self.planned_revisions = set(repository.revision_store)
93
inventoried = set(self.inventory_weave.names())
94
awol = self.planned_revisions - inventoried
96
raise BzrCheckError('Stored revisions missing from inventory'
97
'{%s}' % ','.join([f for f in awol]))
98
self.planned_revisions = list(self.planned_revisions)
54
100
def report_results(self, verbose):
55
raise NotImplementedError(self.report_results)
58
def scan_branch(branch, needed_refs, exit_stack):
59
"""Scan a branch for refs.
61
:param branch: The branch to schedule for checking.
62
:param needed_refs: Refs we are accumulating.
63
:param exit_stack: The exit stack accumulating.
65
note(gettext("Checking branch at '%s'.") % (branch.base,))
66
exit_stack.enter_context(branch.lock_read())
67
branch_refs = branch._get_check_refs()
68
for ref in branch_refs:
69
reflist = needed_refs.setdefault(ref, [])
70
reflist.append(branch)
73
def scan_tree(base_tree, tree, needed_refs, exit_stack):
74
"""Scan a tree for refs.
76
:param base_tree: The original tree check opened, used to detect duplicate
78
:param tree: The tree to schedule for checking.
79
:param needed_refs: Refs we are accumulating.
80
:param exit_stack: The exit stack accumulating.
82
if base_tree is not None and tree.basedir == base_tree.basedir:
84
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
85
exit_stack.enter_context(tree.lock_read())
86
tree_refs = tree._get_check_refs()
88
reflist = needed_refs.setdefault(ref, [])
92
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
93
"""Check multiple objects.
95
If errors occur they are accumulated and reported as far as possible, and
96
an exception raised at the end of the process.
99
base_tree, branch, repo, relpath = \
100
ControlDir.open_containing_tree_branch_or_repository(path)
101
except errors.NotBranchError:
102
base_tree = branch = repo = None
104
with cleanup.ExitStack() as exit_stack:
106
if base_tree is not None:
107
# If the tree is a lightweight checkout we won't see it in
108
# repo.find_branches - add now.
110
scan_tree(None, base_tree, needed_refs, exit_stack)
111
branch = base_tree.branch
112
if branch is not None:
115
# The branch is in a shared repository
116
repo = branch.repository
118
exit_stack.enter_context(repo.lock_read())
119
branches = list(repo.find_branches(using=True))
121
if do_branch or do_tree:
122
for branch in branches:
125
tree = branch.controldir.open_workingtree()
127
except (errors.NotLocalUrl, errors.NoWorkingTree):
130
scan_tree(base_tree, tree, needed_refs, exit_stack)
132
scan_branch(branch, needed_refs, exit_stack)
133
if do_branch and not branches:
134
note(gettext("No branch found at specified location."))
135
if do_tree and base_tree is None and not saw_tree:
136
note(gettext("No working tree found at specified location."))
137
if do_repo or do_branch or do_tree:
139
note(gettext("Checking repository at '%s'.")
141
result = repo.check(None, callback_refs=needed_refs,
143
result.report_results(verbose)
146
note(gettext("No working tree found at specified location."))
148
note(gettext("No branch found at specified location."))
150
note(gettext("No repository found at specified location."))
101
note('checked branch %s format %s',
103
self.branch._branch_format)
105
note('%6d revisions', self.checked_rev_cnt)
106
note('%6d unique file texts', self.checked_text_cnt)
107
note('%6d repeated file texts', self.repeated_text_cnt)
108
note('%6d weaves', len(self.checked_weaves))
109
if self.missing_inventory_sha_cnt:
110
note('%6d revisions are missing inventory_sha1',
111
self.missing_inventory_sha_cnt)
112
if self.missing_revision_cnt:
113
note('%6d revisions are mentioned but not present',
114
self.missing_revision_cnt)
116
note('%6d ghost revisions', len(self.ghosts))
118
for ghost in self.ghosts:
120
if len(self.missing_parent_links):
121
note('%6d revisions missing parents in ancestry',
122
len(self.missing_parent_links))
124
for link, linkers in self.missing_parent_links.items():
125
note(' %s should be in the ancestry for:', link)
126
for linker in linkers:
127
note(' * %s', linker)
129
def check_one_rev(self, rev_id):
130
"""Check one revision.
132
rev_id - the one to check
134
last_rev_id - the previous one on the mainline, if any.
137
# mutter(' revision {%s}', rev_id)
140
rev_history_position = self.history.index(rev_id)
142
rev_history_position = None
144
if rev_history_position:
145
rev = branch.repository.get_revision(rev_id)
146
if rev_history_position > 0:
147
last_rev_id = self.history[rev_history_position - 1]
149
rev = branch.repository.get_revision(rev_id)
151
if rev.revision_id != rev_id:
152
raise BzrCheckError('wrong internal revision id in revision {%s}'
155
# check the previous history entry is a parent of this entry
157
if last_rev_id is not None:
158
for parent_id in rev.parent_ids:
159
if parent_id == last_rev_id:
162
raise BzrCheckError("previous revision {%s} not listed among "
164
% (last_rev_id, rev_id))
165
for parent in rev.parent_ids:
166
if not parent in self.planned_revisions:
167
missing_links = self.missing_parent_links.get(parent, [])
168
missing_links.append(rev_id)
169
self.missing_parent_links[parent] = missing_links
170
# list based so somewhat slow,
171
# TODO have a planned_revisions list and set.
172
if self.branch.has_revision(parent):
173
missing_ancestry = self.repository.get_ancestry(parent)
174
for missing in missing_ancestry:
175
if (missing is not None
176
and missing not in self.planned_revisions):
177
self.planned_revisions.append(missing)
179
self.ghosts.append(rev_id)
181
raise BzrCheckError("revision {%s} has no parents listed "
182
"but preceded by {%s}"
183
% (rev_id, last_rev_id))
185
if rev.inventory_sha1:
186
inv_sha1 = branch.repository.get_inventory_sha1(rev_id)
187
if inv_sha1 != rev.inventory_sha1:
188
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
189
' value in revision {%s}' % rev_id)
191
missing_inventory_sha_cnt += 1
192
mutter("no inventory_sha1 on revision {%s}", rev_id)
193
self._check_revision_tree(rev_id)
194
self.checked_rev_cnt += 1
196
def check_weaves(self):
197
"""Check all the weaves we can get our hands on.
201
if self.branch.repository.weave_store.listable():
202
weave_ids = list(self.branch.repository.weave_store)
203
n_weaves = len(weave_ids)
204
self.progress.update('checking weave', 0, n_weaves)
205
self.inventory_weave.check(progress_bar=self.progress)
206
for i, weave_id in enumerate(weave_ids):
207
self.progress.update('checking weave', i, n_weaves)
208
w = self.branch.repository.weave_store.get_weave(weave_id,
209
self.branch.repository.get_transaction())
210
# No progress here, because it looks ugly.
212
self.checked_weaves[weave_id] = True
214
def _check_revision_tree(self, rev_id):
215
tree = self.branch.repository.revision_tree(rev_id)
219
if file_id in seen_ids:
220
raise BzrCheckError('duplicated file_id {%s} '
221
'in inventory for revision {%s}'
223
seen_ids[file_id] = True
226
ie.check(self, rev_id, inv, tree)
228
for path, ie in inv.iter_entries():
229
if path in seen_names:
230
raise BzrCheckError('duplicated path %s '
231
'in inventory for revision {%s}'
233
seen_names[path] = True
236
def check(branch, verbose):
237
"""Run consistency checks on a branch."""
238
checker = Check(branch)
240
checker.report_results(verbose)