1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2004, 2005 by Martin Pool
2
# Copyright (C) 2005 by Canonical Ltd
3
4
# This program is free software; you can redistribute it and/or modify
4
5
# it under the terms of the GNU General Public License as published by
5
6
# the Free Software Foundation; either version 2 of the License, or
6
7
# (at your option) any later version.
8
9
# This program is distributed in the hope that it will be useful,
9
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
12
# GNU General Public License for more details.
13
14
# You should have received a copy of the GNU General Public License
14
15
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Perhaps have a way to record errors other than by raising exceptions;
18
# would perhaps be enough to accumulate exception objects in a list without
19
# raising them. If there's more than one exception it'd be good to see them
22
"""Checking of bzr objects.
24
check_refs is a concept used for optimising check. Objects that depend on other
25
objects (e.g. tree on repository) can list the objects they would be requesting
26
so that when the dependent object is checked, matches can be pulled out and
27
evaluated in-line rather than re-reading the same data many times.
28
check_refs are tuples (kind, value). Currently defined kinds are:
30
* 'trees', where value is a revid and the looked up objects are revision trees.
31
* 'lefthand-distance', where value is a revid and the looked up objects are the
32
distance along the lefthand path to NULL for that revid.
33
* 'revision-existence', where value is a revid, and the result is True or False
34
indicating that the revision was found/not found.
42
from .controldir import ControlDir
43
from .trace import note
44
from .i18n import gettext
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# TODO: Check ancestries are correct for every revision: includes
19
# every committed so far, and in a reasonable order.
21
# TODO: Also check non-mainline revisions mentioned as parents.
23
# TODO: Check for extra files in the control directory.
25
# TODO: Check revision, inventory and entry objects have all
28
# TODO: Get every revision in the revision-store even if they're not
29
# referenced by history and make sure they're all valid.
32
from bzrlib.trace import note, warning
33
from bzrlib.osutils import rename, sha_string, fingerprint_file
34
from bzrlib.trace import mutter
35
from bzrlib.errors import BzrCheckError, NoSuchRevision
36
from bzrlib.inventory import ROOT_ID
47
39
class Check(object):
48
"""Check a repository"""
50
def __init__(self, repository, check_repo=True):
51
self.repository = repository
42
def __init__(self, branch):
44
self.checked_text_cnt = 0
45
self.checked_rev_cnt = 0
47
self.repeated_text_cnt = 0
48
self.missing_parent_links = {}
49
self.missing_inventory_sha_cnt = 0
50
self.missing_revision_cnt = 0
51
# maps (file-id, version) -> sha1
52
self.checked_texts = {}
55
self.branch.lock_read()
56
self.progress = bzrlib.ui.ui_factory.progress_bar()
58
self.progress.update('retrieving inventory', 0, 0)
59
# do not put in init, as it should be done with progess,
60
# and inside the lock.
61
self.inventory_weave = self.branch._get_inventory_weave()
62
self.history = self.branch.revision_history()
63
if not len(self.history):
68
while revno < len(self.planned_revisions):
69
rev_id = self.planned_revisions[revno]
70
self.progress.update('checking revision', revno,
71
len(self.planned_revisions))
73
self.check_one_rev(rev_id)
78
def plan_revisions(self):
79
if not self.branch.revision_store.listable():
80
self.planned_revisions = self.branch.get_ancestry(self.history[-1])
81
self.planned_revisions.remove(None)
82
# FIXME progress bars should support this more nicely.
84
print ("Checking reachable history -"
85
" for a complete check use a local branch.")
88
self.planned_revisions = set(self.branch.revision_store)
89
inventoried = set(self.inventory_weave.names())
90
awol = self.planned_revisions - inventoried
92
raise BzrCheckError('Stored revisions missing from inventory'
93
'{%s}' % ','.join([f for f in awol]))
94
self.planned_revisions = list(self.planned_revisions)
53
96
def report_results(self, verbose):
54
raise NotImplementedError(self.report_results)
57
def scan_branch(branch, needed_refs, exit_stack):
58
"""Scan a branch for refs.
60
:param branch: The branch to schedule for checking.
61
:param needed_refs: Refs we are accumulating.
62
:param exit_stack: The exit stack accumulating.
64
note(gettext("Checking branch at '%s'.") % (branch.base,))
65
exit_stack.enter_context(branch.lock_read())
66
branch_refs = branch._get_check_refs()
67
for ref in branch_refs:
68
reflist = needed_refs.setdefault(ref, [])
69
reflist.append(branch)
72
def scan_tree(base_tree, tree, needed_refs, exit_stack):
73
"""Scan a tree for refs.
75
:param base_tree: The original tree check opened, used to detect duplicate
77
:param tree: The tree to schedule for checking.
78
:param needed_refs: Refs we are accumulating.
79
:param exit_stack: The exit stack accumulating.
81
if base_tree is not None and tree.basedir == base_tree.basedir:
83
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
84
exit_stack.enter_context(tree.lock_read())
85
tree_refs = tree._get_check_refs()
87
reflist = needed_refs.setdefault(ref, [])
91
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
92
"""Check multiple objects.
94
If errors occur they are accumulated and reported as far as possible, and
95
an exception raised at the end of the process.
98
base_tree, branch, repo, relpath = \
99
ControlDir.open_containing_tree_branch_or_repository(path)
100
except errors.NotBranchError:
101
base_tree = branch = repo = None
103
with contextlib.ExitStack() as exit_stack:
105
if base_tree is not None:
106
# If the tree is a lightweight checkout we won't see it in
107
# repo.find_branches - add now.
109
scan_tree(None, base_tree, needed_refs, exit_stack)
110
branch = base_tree.branch
111
if branch is not None:
114
# The branch is in a shared repository
115
repo = branch.repository
117
exit_stack.enter_context(repo.lock_read())
118
branches = list(repo.find_branches(using=True))
120
if do_branch or do_tree:
121
for branch in branches:
124
tree = branch.controldir.open_workingtree()
126
except (errors.NotLocalUrl, errors.NoWorkingTree):
129
scan_tree(base_tree, tree, needed_refs, exit_stack)
131
scan_branch(branch, needed_refs, exit_stack)
132
if do_branch and not branches:
133
note(gettext("No branch found at specified location."))
134
if do_tree and base_tree is None and not saw_tree:
135
note(gettext("No working tree found at specified location."))
136
if do_repo or do_branch or do_tree:
138
note(gettext("Checking repository at '%s'.")
140
result = repo.check(None, callback_refs=needed_refs,
142
result.report_results(verbose)
145
note(gettext("No working tree found at specified location."))
147
note(gettext("No branch found at specified location."))
149
note(gettext("No repository found at specified location."))
97
note('checked branch %s format %d',
99
self.branch._branch_format)
101
note('%6d revisions', self.checked_rev_cnt)
102
note('%6d unique file texts', self.checked_text_cnt)
103
note('%6d repeated file texts', self.repeated_text_cnt)
104
if self.missing_inventory_sha_cnt:
105
note('%6d revisions are missing inventory_sha1',
106
self.missing_inventory_sha_cnt)
107
if self.missing_revision_cnt:
108
note('%6d revisions are mentioned but not present',
109
self.missing_revision_cnt)
111
note('%6d ghost revisions', len(self.ghosts))
113
for ghost in self.ghosts:
115
if len(self.missing_parent_links):
116
note('%6d revisions missing parents in ancestry',
117
len(self.missing_parent_links))
119
for link, linkers in self.missing_parent_links.items():
120
note(' %s should be in the ancestry for:', link)
121
for linker in linkers:
122
note(' * %s', linker)
124
def check_one_rev(self, rev_id):
125
"""Check one revision.
127
rev_id - the one to check
129
last_rev_id - the previous one on the mainline, if any.
132
# mutter(' revision {%s}', rev_id)
135
rev_history_position = self.history.index(rev_id)
137
rev_history_position = None
139
if rev_history_position:
140
rev = branch.get_revision(rev_id)
141
if rev_history_position > 0:
142
last_rev_id = self.history[rev_history_position - 1]
144
rev = branch.get_revision(rev_id)
146
if rev.revision_id != rev_id:
147
raise BzrCheckError('wrong internal revision id in revision {%s}'
150
# check the previous history entry is a parent of this entry
152
if last_rev_id is not None:
153
for parent_id in rev.parent_ids:
154
if parent_id == last_rev_id:
157
raise BzrCheckError("previous revision {%s} not listed among "
159
% (last_rev_id, rev_id))
160
for parent in rev.parent_ids:
161
if not parent in self.planned_revisions:
162
missing_links = self.missing_parent_links.get(parent, [])
163
missing_links.append(rev_id)
164
self.missing_parent_links[parent] = missing_links
165
# list based so somewhat slow,
166
# TODO have a planned_revisions list and set.
167
if self.branch.has_revision(parent):
168
missing_ancestry = self.branch.get_ancestry(parent)
169
for missing in missing_ancestry:
170
if (missing is not None
171
and missing not in self.planned_revisions):
172
self.planned_revisions.append(missing)
174
self.ghosts.append(rev_id)
176
raise BzrCheckError("revision {%s} has no parents listed "
177
"but preceded by {%s}"
178
% (rev_id, last_rev_id))
180
if rev.inventory_sha1:
181
inv_sha1 = branch.get_inventory_sha1(rev_id)
182
if inv_sha1 != rev.inventory_sha1:
183
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
184
' value in revision {%s}' % rev_id)
186
missing_inventory_sha_cnt += 1
187
mutter("no inventory_sha1 on revision {%s}", rev_id)
188
self._check_revision_tree(rev_id)
189
self.checked_rev_cnt += 1
191
def _check_revision_tree(self, rev_id):
192
tree = self.branch.revision_tree(rev_id)
196
if file_id in seen_ids:
197
raise BzrCheckError('duplicated file_id {%s} '
198
'in inventory for revision {%s}'
200
seen_ids[file_id] = True
203
ie.check(self, rev_id, inv, tree)
205
for path, ie in inv.iter_entries():
206
if path in seen_names:
207
raise BzrCheckError('duplicated path %s '
208
'in inventory for revision {%s}'
210
seen_names[path] = True
213
def check(branch, verbose):
214
"""Run consistency checks on a branch."""
215
checker = Check(branch)
217
checker.report_results(verbose)