1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005, 2006 by Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
5
5
# the Free Software Foundation; either version 2 of the License, or
6
6
# (at your option) any later version.
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
11
# GNU General Public License for more details.
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
17
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
18
31
# would perhaps be enough to accumulate exception objects in a list without
19
32
# raising them. If there's more than one exception it'd be good to see them
22
"""Checking of bzr objects.
24
check_refs is a concept used for optimising check. Objects that depend on other
25
objects (e.g. tree on repository) can list the objects they would be requesting
26
so that when the dependent object is checked, matches can be pulled out and
27
evaluated in-line rather than re-reading the same data many times.
28
check_refs are tuples (kind, value). Currently defined kinds are:
30
* 'trees', where value is a revid and the looked up objects are revision trees.
31
* 'lefthand-distance', where value is a revid and the looked up objects are the
32
distance along the lefthand path to NULL for that revid.
33
* 'revision-existence', where value is a revid, and the result is True or False
34
indicating that the revision was found/not found.
37
from __future__ import absolute_import
43
from .controldir import ControlDir
44
from .trace import note
45
from .i18n import gettext
36
from bzrlib.trace import note, warning
37
from bzrlib.osutils import rename, sha_string, fingerprint_file
38
from bzrlib.trace import mutter
39
from bzrlib.errors import BzrCheckError, NoSuchRevision
40
from bzrlib.inventory import ROOT_ID
48
43
class Check(object):
49
"""Check a repository"""
51
def __init__(self, repository, check_repo=True):
52
self.repository = repository
46
# The Check object interacts with InventoryEntry.check, etc.
48
def __init__(self, branch):
50
self.repository = branch.repository
51
self.checked_text_cnt = 0
52
self.checked_rev_cnt = 0
54
self.repeated_text_cnt = 0
55
self.missing_parent_links = {}
56
self.missing_inventory_sha_cnt = 0
57
self.missing_revision_cnt = 0
58
# maps (file-id, version) -> sha1; used by InventoryFile._check
59
self.checked_texts = {}
60
self.checked_weaves = {}
63
self.branch.lock_read()
64
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
66
self.progress.update('retrieving inventory', 0, 0)
67
# do not put in init, as it should be done with progess,
68
# and inside the lock.
69
self.inventory_weave = self.branch.repository.get_inventory_weave()
70
self.history = self.branch.revision_history()
71
if not len(self.history):
77
while revno < len(self.planned_revisions):
78
rev_id = self.planned_revisions[revno]
79
self.progress.update('checking revision', revno,
80
len(self.planned_revisions))
82
self.check_one_rev(rev_id)
84
self.progress.finished()
87
def plan_revisions(self):
88
repository = self.branch.repository
89
self.planned_revisions = set(repository.all_revision_ids())
91
inventoried = set(self.inventory_weave.versions())
92
awol = self.planned_revisions - inventoried
94
raise BzrCheckError('Stored revisions missing from inventory'
95
'{%s}' % ','.join([f for f in awol]))
96
self.planned_revisions = list(self.planned_revisions)
54
98
def report_results(self, verbose):
55
raise NotImplementedError(self.report_results)
58
def scan_branch(branch, needed_refs, exit_stack):
59
"""Scan a branch for refs.
61
:param branch: The branch to schedule for checking.
62
:param needed_refs: Refs we are accumulating.
63
:param exit_stack: The exit stack accumulating.
65
note(gettext("Checking branch at '%s'.") % (branch.base,))
66
exit_stack.enter_context(branch.lock_read())
67
branch_refs = branch._get_check_refs()
68
for ref in branch_refs:
69
reflist = needed_refs.setdefault(ref, [])
70
reflist.append(branch)
73
def scan_tree(base_tree, tree, needed_refs, exit_stack):
74
"""Scan a tree for refs.
76
:param base_tree: The original tree check opened, used to detect duplicate
78
:param tree: The tree to schedule for checking.
79
:param needed_refs: Refs we are accumulating.
80
:param exit_stack: The exit stack accumulating.
82
if base_tree is not None and tree.basedir == base_tree.basedir:
84
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
85
exit_stack.enter_context(tree.lock_read())
86
tree_refs = tree._get_check_refs()
88
reflist = needed_refs.setdefault(ref, [])
92
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
93
"""Check multiple objects.
95
If errors occur they are accumulated and reported as far as possible, and
96
an exception raised at the end of the process.
99
base_tree, branch, repo, relpath = \
100
ControlDir.open_containing_tree_branch_or_repository(path)
101
except errors.NotBranchError:
102
base_tree = branch = repo = None
104
with cleanup.ExitStack() as exit_stack:
106
if base_tree is not None:
107
# If the tree is a lightweight checkout we won't see it in
108
# repo.find_branches - add now.
110
scan_tree(None, base_tree, needed_refs, exit_stack)
111
branch = base_tree.branch
112
if branch is not None:
115
# The branch is in a shared repository
116
repo = branch.repository
118
exit_stack.enter_context(repo.lock_read())
119
branches = list(repo.find_branches(using=True))
121
if do_branch or do_tree:
122
for branch in branches:
125
tree = branch.controldir.open_workingtree()
127
except (errors.NotLocalUrl, errors.NoWorkingTree):
130
scan_tree(base_tree, tree, needed_refs, exit_stack)
132
scan_branch(branch, needed_refs, exit_stack)
133
if do_branch and not branches:
134
note(gettext("No branch found at specified location."))
135
if do_tree and base_tree is None and not saw_tree:
136
note(gettext("No working tree found at specified location."))
137
if do_repo or do_branch or do_tree:
139
note(gettext("Checking repository at '%s'.")
141
result = repo.check(None, callback_refs=needed_refs,
143
result.report_results(verbose)
146
note(gettext("No working tree found at specified location."))
148
note(gettext("No branch found at specified location."))
150
note(gettext("No repository found at specified location."))
99
note('checked branch %s format %s',
103
note('%6d revisions', self.checked_rev_cnt)
104
note('%6d unique file texts', self.checked_text_cnt)
105
note('%6d repeated file texts', self.repeated_text_cnt)
106
note('%6d weaves', len(self.checked_weaves))
107
if self.missing_inventory_sha_cnt:
108
note('%6d revisions are missing inventory_sha1',
109
self.missing_inventory_sha_cnt)
110
if self.missing_revision_cnt:
111
note('%6d revisions are mentioned but not present',
112
self.missing_revision_cnt)
114
note('%6d ghost revisions', len(self.ghosts))
116
for ghost in self.ghosts:
118
if len(self.missing_parent_links):
119
note('%6d revisions missing parents in ancestry',
120
len(self.missing_parent_links))
122
for link, linkers in self.missing_parent_links.items():
123
note(' %s should be in the ancestry for:', link)
124
for linker in linkers:
125
note(' * %s', linker)
127
def check_one_rev(self, rev_id):
128
"""Check one revision.
130
rev_id - the one to check
132
last_rev_id - the previous one on the mainline, if any.
135
# mutter(' revision {%s}', rev_id)
138
rev_history_position = self.history.index(rev_id)
140
rev_history_position = None
142
if rev_history_position:
143
rev = branch.repository.get_revision(rev_id)
144
if rev_history_position > 0:
145
last_rev_id = self.history[rev_history_position - 1]
147
rev = branch.repository.get_revision(rev_id)
149
if rev.revision_id != rev_id:
150
raise BzrCheckError('wrong internal revision id in revision {%s}'
153
# check the previous history entry is a parent of this entry
155
if last_rev_id is not None:
156
for parent_id in rev.parent_ids:
157
if parent_id == last_rev_id:
160
raise BzrCheckError("previous revision {%s} not listed among "
162
% (last_rev_id, rev_id))
163
for parent in rev.parent_ids:
164
if not parent in self.planned_revisions:
165
missing_links = self.missing_parent_links.get(parent, [])
166
missing_links.append(rev_id)
167
self.missing_parent_links[parent] = missing_links
168
# list based so somewhat slow,
169
# TODO have a planned_revisions list and set.
170
if self.branch.repository.has_revision(parent):
171
missing_ancestry = self.repository.get_ancestry(parent)
172
for missing in missing_ancestry:
173
if (missing is not None
174
and missing not in self.planned_revisions):
175
self.planned_revisions.append(missing)
177
self.ghosts.append(rev_id)
179
raise BzrCheckError("revision {%s} has no parents listed "
180
"but preceded by {%s}"
181
% (rev_id, last_rev_id))
183
if rev.inventory_sha1:
184
inv_sha1 = branch.repository.get_inventory_sha1(rev_id)
185
if inv_sha1 != rev.inventory_sha1:
186
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
187
' value in revision {%s}' % rev_id)
189
missing_inventory_sha_cnt += 1
190
mutter("no inventory_sha1 on revision {%s}", rev_id)
191
self._check_revision_tree(rev_id)
192
self.checked_rev_cnt += 1
194
def check_weaves(self):
195
"""Check all the weaves we can get our hands on.
199
if self.branch.repository.weave_store.listable():
200
weave_ids = list(self.branch.repository.weave_store)
201
n_weaves = len(weave_ids)
202
self.progress.update('checking weave', 0, n_weaves)
203
self.inventory_weave.check(progress_bar=self.progress)
204
for i, weave_id in enumerate(weave_ids):
205
self.progress.update('checking weave', i, n_weaves)
206
w = self.branch.repository.weave_store.get_weave(weave_id,
207
self.branch.repository.get_transaction())
208
# No progress here, because it looks ugly.
210
self.checked_weaves[weave_id] = True
212
def _check_revision_tree(self, rev_id):
213
tree = self.branch.repository.revision_tree(rev_id)
217
if file_id in seen_ids:
218
raise BzrCheckError('duplicated file_id {%s} '
219
'in inventory for revision {%s}'
221
seen_ids[file_id] = True
224
ie.check(self, rev_id, inv, tree)
226
for path, ie in inv.iter_entries():
227
if path in seen_names:
228
raise BzrCheckError('duplicated path %s '
229
'in inventory for revision {%s}'
231
seen_names[path] = True
234
def check(branch, verbose):
235
"""Run consistency checks on a branch."""
236
checker = Check(branch)
238
checker.report_results(verbose)