13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
17
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
18
31
# would perhaps be enough to accumulate exception objects in a list without
19
32
# raising them. If there's more than one exception it'd be good to see them
22
"""Checking of bzr objects.
24
check_refs is a concept used for optimising check. Objects that depend on other
25
objects (e.g. tree on repository) can list the objects they would be requesting
26
so that when the dependent object is checked, matches can be pulled out and
27
evaluated in-line rather than re-reading the same data many times.
28
check_refs are tuples (kind, value). Currently defined kinds are:
30
* 'trees', where value is a revid and the looked up objects are revision trees.
31
* 'lefthand-distance', where value is a revid and the looked up objects are the
32
distance along the lefthand path to NULL for that revid.
33
* 'revision-existence', where value is a revid, and the result is True or False
34
indicating that the revision was found/not found.
37
from __future__ import absolute_import
43
from .branch import Branch
44
from .controldir import ControlDir
45
from .revision import NULL_REVISION
49
from .trace import note
50
from .workingtree import WorkingTree
51
from .i18n import gettext
35
from bzrlib.errors import BzrCheckError
37
from bzrlib.trace import note
53
39
class Check(object):
54
40
"""Check a repository"""
56
def __init__(self, repository, check_repo=True):
42
# The Check object interacts with InventoryEntry.check, etc.
44
def __init__(self, repository):
57
45
self.repository = repository
46
self.checked_text_cnt = 0
47
self.checked_rev_cnt = 0
49
self.repeated_text_cnt = 0
50
self.missing_parent_links = {}
51
self.missing_inventory_sha_cnt = 0
52
self.missing_revision_cnt = 0
53
# maps (file-id, version) -> sha1; used by InventoryFile._check
54
self.checked_texts = {}
55
self.checked_weaves = {}
58
self.repository.lock_read()
59
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
61
self.progress.update('retrieving inventory', 0, 0)
62
# do not put in init, as it should be done with progess,
63
# and inside the lock.
64
self.inventory_weave = self.repository.get_inventory_weave()
68
while revno < len(self.planned_revisions):
69
rev_id = self.planned_revisions[revno]
70
self.progress.update('checking revision', revno,
71
len(self.planned_revisions))
73
self.check_one_rev(rev_id)
75
self.progress.finished()
76
self.repository.unlock()
78
def plan_revisions(self):
79
repository = self.repository
80
self.planned_revisions = set(repository.all_revision_ids())
82
inventoried = set(self.inventory_weave.versions())
83
awol = self.planned_revisions - inventoried
85
raise BzrCheckError('Stored revisions missing from inventory'
86
'{%s}' % ','.join([f for f in awol]))
87
self.planned_revisions = list(self.planned_revisions)
59
89
def report_results(self, verbose):
60
raise NotImplementedError(self.report_results)
63
def scan_branch(branch, needed_refs, to_unlock):
64
"""Scan a branch for refs.
66
:param branch: The branch to schedule for checking.
67
:param needed_refs: Refs we are accumulating.
68
:param to_unlock: The unlock list accumulating.
90
note('checked repository %s format %s',
91
self.repository.bzrdir.root_transport,
92
self.repository._format)
93
note('%6d revisions', self.checked_rev_cnt)
94
note('%6d unique file texts', self.checked_text_cnt)
95
note('%6d repeated file texts', self.repeated_text_cnt)
96
note('%6d weaves', len(self.checked_weaves))
97
if self.missing_inventory_sha_cnt:
98
note('%6d revisions are missing inventory_sha1',
99
self.missing_inventory_sha_cnt)
100
if self.missing_revision_cnt:
101
note('%6d revisions are mentioned but not present',
102
self.missing_revision_cnt)
104
note('%6d ghost revisions', len(self.ghosts))
106
for ghost in self.ghosts:
108
if len(self.missing_parent_links):
109
note('%6d revisions missing parents in ancestry',
110
len(self.missing_parent_links))
112
for link, linkers in self.missing_parent_links.items():
113
note(' %s should be in the ancestry for:', link)
114
for linker in linkers:
115
note(' * %s', linker)
117
def check_one_rev(self, rev_id):
118
"""Check one revision.
120
rev_id - the one to check
122
rev = self.repository.get_revision(rev_id)
124
if rev.revision_id != rev_id:
125
raise BzrCheckError('wrong internal revision id in revision {%s}'
128
for parent in rev.parent_ids:
129
if not parent in self.planned_revisions:
130
missing_links = self.missing_parent_links.get(parent, [])
131
missing_links.append(rev_id)
132
self.missing_parent_links[parent] = missing_links
133
# list based so somewhat slow,
134
# TODO have a planned_revisions list and set.
135
if self.repository.has_revision(parent):
136
missing_ancestry = self.repository.get_ancestry(parent)
137
for missing in missing_ancestry:
138
if (missing is not None
139
and missing not in self.planned_revisions):
140
self.planned_revisions.append(missing)
142
self.ghosts.append(rev_id)
144
if rev.inventory_sha1:
145
inv_sha1 = self.repository.get_inventory_sha1(rev_id)
146
if inv_sha1 != rev.inventory_sha1:
147
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
148
' value in revision {%s}' % rev_id)
149
self._check_revision_tree(rev_id)
150
self.checked_rev_cnt += 1
152
def check_weaves(self):
153
"""Check all the weaves we can get our hands on.
157
if self.repository.weave_store.listable():
158
weave_ids = list(self.repository.weave_store)
159
n_weaves = len(weave_ids)
160
self.progress.update('checking weave', 0, n_weaves)
161
self.inventory_weave.check(progress_bar=self.progress)
162
for i, weave_id in enumerate(weave_ids):
163
self.progress.update('checking weave', i, n_weaves)
164
w = self.repository.weave_store.get_weave(weave_id,
165
self.repository.get_transaction())
166
# No progress here, because it looks ugly.
168
self.checked_weaves[weave_id] = True
170
def _check_revision_tree(self, rev_id):
171
tree = self.repository.revision_tree(rev_id)
175
if file_id in seen_ids:
176
raise BzrCheckError('duplicated file_id {%s} '
177
'in inventory for revision {%s}'
179
seen_ids[file_id] = True
182
ie.check(self, rev_id, inv, tree)
184
for path, ie in inv.iter_entries():
185
if path in seen_names:
186
raise BzrCheckError('duplicated path %s '
187
'in inventory for revision {%s}'
189
seen_names[path] = True
192
def check(branch, verbose):
193
"""Run consistency checks on a branch.
195
Results are reported through logging.
197
:raise BzrCheckError: if there's a consistency error.
70
note(gettext("Checking branch at '%s'.") % (branch.base,))
71
199
branch.lock_read()
72
to_unlock.append(branch)
73
branch_refs = branch._get_check_refs()
74
for ref in branch_refs:
75
reflist = needed_refs.setdefault(ref, [])
76
reflist.append(branch)
79
def scan_tree(base_tree, tree, needed_refs, to_unlock):
80
"""Scan a tree for refs.
82
:param base_tree: The original tree check opened, used to detect duplicate
84
:param tree: The tree to schedule for checking.
85
:param needed_refs: Refs we are accumulating.
86
:param to_unlock: The unlock list accumulating.
88
if base_tree is not None and tree.basedir == base_tree.basedir:
90
note(gettext("Checking working tree at '%s'.") % (tree.basedir,))
92
to_unlock.append(tree)
93
tree_refs = tree._get_check_refs()
95
reflist = needed_refs.setdefault(ref, [])
99
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
100
"""Check multiple objects.
102
If errors occur they are accumulated and reported as far as possible, and
103
an exception raised at the end of the process.
106
base_tree, branch, repo, relpath = \
107
ControlDir.open_containing_tree_branch_or_repository(path)
108
except errors.NotBranchError:
109
base_tree = branch = repo = None
114
if base_tree is not None:
115
# If the tree is a lightweight checkout we won't see it in
116
# repo.find_branches - add now.
118
scan_tree(None, base_tree, needed_refs, to_unlock)
119
branch = base_tree.branch
120
if branch is not None:
123
# The branch is in a shared repository
124
repo = branch.repository
127
to_unlock.append(repo)
128
branches = repo.find_branches(using=True)
130
if do_branch or do_tree:
131
for branch in branches:
134
tree = branch.controldir.open_workingtree()
136
except (errors.NotLocalUrl, errors.NoWorkingTree):
139
scan_tree(base_tree, tree, needed_refs, to_unlock)
141
scan_branch(branch, needed_refs, to_unlock)
142
if do_branch and not branches:
143
note(gettext("No branch found at specified location."))
144
if do_tree and base_tree is None and not saw_tree:
145
note(gettext("No working tree found at specified location."))
146
if do_repo or do_branch or do_tree:
148
note(gettext("Checking repository at '%s'.")
150
result = repo.check(None, callback_refs=needed_refs,
152
result.report_results(verbose)
155
note(gettext("No working tree found at specified location."))
157
note(gettext("No branch found at specified location."))
159
note(gettext("No repository found at specified location."))
201
branch_result = branch.check()
202
repo_result = branch.repository.check([branch.last_revision()])
161
for thing in to_unlock:
205
branch_result.report_results(verbose)
206
repo_result.report_results(verbose)