1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
20
# TODO: Also check non-mainline revisions mentioned as parents.
22
# TODO: Check for extra files in the control directory.
24
# TODO: Check revision, inventory and entry objects have all
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them. If there's more than one exception it'd be good to see them
35
"""Checking of bzr objects.
37
check_refs is a concept used for optimising check. Objects that depend on other
38
objects (e.g. tree on repository) can list the objects they would be requesting
39
so that when the dependent object is checked, matches can be pulled out and
40
evaluated in-line rather than re-reading the same data many times.
41
check_refs are tuples (kind, value). Currently defined kinds are:
42
* 'trees', where value is a revid and the looked up objects are revision trees.
43
* 'lefthand-distance', where value is a revid and the looked up objects are the
44
distance along the lefthand path to NULL for that revid.
45
* 'revision-existence', where value is a revid, and the result is True or False
46
indicating that the revision was found/not found.
49
from bzrlib import errors, osutils
50
from bzrlib import repository as _mod_repository
51
from bzrlib import revision
52
from bzrlib.branch import Branch
53
from bzrlib.bzrdir import BzrDir
54
from bzrlib.errors import BzrCheckError
55
from bzrlib.repository import Repository
56
from bzrlib.symbol_versioning import deprecated_function, deprecated_in
57
from bzrlib.trace import log_error, note
59
from bzrlib.workingtree import WorkingTree
62
"""Check a repository"""
64
# The Check object interacts with InventoryEntry.check, etc.
66
def __init__(self, repository, check_repo=True):
67
self.repository = repository
68
self.checked_text_cnt = 0
69
self.checked_rev_cnt = 0
71
self.repeated_text_cnt = 0
72
self.missing_parent_links = {}
73
self.missing_inventory_sha_cnt = 0
74
self.missing_revision_cnt = 0
75
# maps (file-id, version) -> sha1; used by InventoryFile._check
76
self.checked_texts = {}
77
self.checked_weaves = set()
78
self.unreferenced_versions = set()
79
self.inconsistent_parents = []
80
self.rich_roots = repository.supports_rich_root()
81
self.text_key_references = {}
82
self.check_repo = check_repo
83
self.other_results = []
85
def check(self, callback_refs=None, check_repo=True):
86
if callback_refs is None:
88
self.repository.lock_read()
89
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
92
self.progress.update('retrieving inventory', 0, 2)
93
# do not put in init, as it should be done with progess,
94
# and inside the lock.
95
self.inventory_weave = self.repository.inventories
96
self.progress.update('checking revision graph', 1)
97
self.check_revision_graph()
100
while revno < len(self.planned_revisions):
101
rev_id = self.planned_revisions[revno]
102
self.progress.update('checking revision', revno,
103
len(self.planned_revisions))
105
self.check_one_rev(rev_id)
106
# check_weaves is done after the revision scan so that
107
# revision index is known to be valid.
110
repo = self.repository
111
# calculate all refs, and callback the objects requesting them.
113
wanting_items = set()
114
# Current crude version calculates everything and calls
115
# everything at once. Doing a queue and popping as things are
116
# satisfied would be cheaper on memory [but few people have
117
# huge numbers of working trees today. TODO: fix before
121
for ref, wantlist in callback_refs.iteritems():
122
wanting_items.update(wantlist)
125
refs[ref] = repo.revision_tree(value)
126
elif kind == 'lefthand-distance':
128
elif kind == 'revision-existence':
129
existences.add(value)
131
raise AssertionError(
132
'unknown ref kind for ref %s' % ref)
133
node_distances = repo.get_graph().find_lefthand_distances(distances)
134
for key, distance in node_distances.iteritems():
135
refs[('lefthand-distance', key)] = distance
136
if key in existences and distance > 0:
137
refs[('revision-existence', key)] = True
138
existences.remove(key)
139
parent_map = repo.get_graph().get_parent_map(existences)
140
for key in parent_map:
141
refs[('revision-existence', key)] = True
142
existences.remove(key)
143
for key in existences:
144
refs[('revision-existence', key)] = False
145
for item in wanting_items:
146
if isinstance(item, WorkingTree):
148
if isinstance(item, Branch):
149
self.other_results.append(item.check(refs))
151
self.progress.finished()
152
self.repository.unlock()
154
def check_revision_graph(self):
155
if not self.repository.revision_graph_can_have_wrong_parents():
156
# This check is not necessary.
157
self.revs_with_bad_parents_in_index = None
159
bad_revisions = self.repository._find_inconsistent_revision_parents()
160
self.revs_with_bad_parents_in_index = list(bad_revisions)
162
def plan_revisions(self):
163
repository = self.repository
164
self.planned_revisions = repository.all_revision_ids()
165
self.progress.clear()
166
inventoried = set(key[-1] for key in self.inventory_weave.keys())
167
awol = set(self.planned_revisions) - inventoried
169
raise BzrCheckError('Stored revisions missing from inventory'
170
'{%s}' % ','.join([f for f in awol]))
172
def report_results(self, verbose):
174
self._report_repo_results(verbose)
175
for result in self.other_results:
176
result.report_results(verbose)
178
def _report_repo_results(self, verbose):
179
note('checked repository %s format %s',
180
self.repository.bzrdir.root_transport,
181
self.repository._format)
182
note('%6d revisions', self.checked_rev_cnt)
183
note('%6d file-ids', len(self.checked_weaves))
184
note('%6d unique file texts', self.checked_text_cnt)
185
note('%6d repeated file texts', self.repeated_text_cnt)
186
note('%6d unreferenced text versions',
187
len(self.unreferenced_versions))
188
if self.missing_inventory_sha_cnt:
189
note('%6d revisions are missing inventory_sha1',
190
self.missing_inventory_sha_cnt)
191
if self.missing_revision_cnt:
192
note('%6d revisions are mentioned but not present',
193
self.missing_revision_cnt)
195
note('%6d ghost revisions', len(self.ghosts))
197
for ghost in self.ghosts:
199
if len(self.missing_parent_links):
200
note('%6d revisions missing parents in ancestry',
201
len(self.missing_parent_links))
203
for link, linkers in self.missing_parent_links.items():
204
note(' %s should be in the ancestry for:', link)
205
for linker in linkers:
206
note(' * %s', linker)
208
for file_id, revision_id in self.unreferenced_versions:
209
log_error('unreferenced version: {%s} in %s', revision_id,
211
if len(self.inconsistent_parents):
212
note('%6d inconsistent parents', len(self.inconsistent_parents))
214
for info in self.inconsistent_parents:
215
revision_id, file_id, found_parents, correct_parents = info
216
note(' * %s version %s has parents %r '
218
% (file_id, revision_id, found_parents,
220
if self.revs_with_bad_parents_in_index:
221
note('%6d revisions have incorrect parents in the revision index',
222
len(self.revs_with_bad_parents_in_index))
224
for item in self.revs_with_bad_parents_in_index:
225
revision_id, index_parents, actual_parents = item
227
' %s has wrong parents in index: '
229
revision_id, index_parents, actual_parents)
231
def check_one_rev(self, rev_id):
232
"""Check one revision.
234
rev_id - the one to check
236
rev = self.repository.get_revision(rev_id)
238
if rev.revision_id != rev_id:
239
raise BzrCheckError('wrong internal revision id in revision {%s}'
242
for parent in rev.parent_ids:
243
if not parent in self.planned_revisions:
244
# rev has a parent we didn't know about.
245
missing_links = self.missing_parent_links.get(parent, [])
246
missing_links.append(rev_id)
247
self.missing_parent_links[parent] = missing_links
248
# list based so somewhat slow,
249
# TODO have a planned_revisions list and set.
250
if self.repository.has_revision(parent):
251
missing_ancestry = self.repository.get_ancestry(parent)
252
for missing in missing_ancestry:
253
if (missing is not None
254
and missing not in self.planned_revisions):
255
self.planned_revisions.append(missing)
257
self.ghosts.append(rev_id)
259
if rev.inventory_sha1:
260
# Loopback - this is currently circular logic as the
261
# knit get_inventory_sha1 call returns rev.inventory_sha1.
262
# Repository.py's get_inventory_sha1 should instead return
263
# inventories.get_record_stream([(revid,)]).next().sha1 or
265
inv_sha1 = self.repository.get_inventory_sha1(rev_id)
266
if inv_sha1 != rev.inventory_sha1:
267
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
268
' value in revision {%s}' % rev_id)
269
self._check_revision_tree(rev_id)
270
self.checked_rev_cnt += 1
272
def check_weaves(self):
273
"""Check all the weaves we can get our hands on.
276
self.progress.update('checking inventory', 0, 2)
277
self.inventory_weave.check(progress_bar=self.progress)
278
self.progress.update('checking text storage', 1, 2)
279
self.repository.texts.check(progress_bar=self.progress)
280
weave_checker = self.repository._get_versioned_file_checker(
281
text_key_references=self.text_key_references)
282
result = weave_checker.check_file_version_parents(
283
self.repository.texts, progress_bar=self.progress)
284
self.checked_weaves = weave_checker.file_ids
285
bad_parents, unused_versions = result
286
bad_parents = bad_parents.items()
287
for text_key, (stored_parents, correct_parents) in bad_parents:
288
# XXX not ready for id join/split operations.
289
weave_id = text_key[0]
290
revision_id = text_key[-1]
291
weave_parents = tuple([parent[-1] for parent in stored_parents])
292
correct_parents = tuple([parent[-1] for parent in correct_parents])
293
self.inconsistent_parents.append(
294
(revision_id, weave_id, weave_parents, correct_parents))
295
self.unreferenced_versions.update(unused_versions)
297
def _check_revision_tree(self, rev_id):
298
tree = self.repository.revision_tree(rev_id)
302
for path, ie in inv.iter_entries():
303
self._add_entry_to_text_key_references(inv, ie)
305
if file_id in seen_ids:
306
raise BzrCheckError('duplicated file_id {%s} '
307
'in inventory for revision {%s}'
309
seen_ids.add(file_id)
310
ie.check(self, rev_id, inv, tree)
311
if path in seen_names:
312
raise BzrCheckError('duplicated path %s '
313
'in inventory for revision {%s}'
317
def _add_entry_to_text_key_references(self, inv, entry):
318
if not self.rich_roots and entry == inv.root:
320
key = (entry.file_id, entry.revision)
321
self.text_key_references.setdefault(key, False)
322
if entry.revision == inv.revision_id:
323
self.text_key_references[key] = True
326
@deprecated_function(deprecated_in((1,6,0)))
327
def check(branch, verbose):
328
"""Run consistency checks on a branch.
330
Results are reported through logging.
332
Deprecated in 1.6. Please use check_dwim instead.
334
:raise BzrCheckError: if there's a consistency error.
336
check_branch(branch, verbose)
339
@deprecated_function(deprecated_in((1,16,0)))
340
def check_branch(branch, verbose):
341
"""Run consistency checks on a branch.
343
Results are reported through logging.
345
:raise BzrCheckError: if there's a consistency error.
350
for ref in branch._get_check_refs():
351
needed_refs.setdefault(ref, []).append(branch)
352
result = branch.repository.check([branch.last_revision()], needed_refs)
353
branch_result = result.other_results[0]
356
branch_result.report_results(verbose)
359
def scan_branch(branch, needed_refs, to_unlock):
360
"""Scan a branch for refs.
362
:param branch: The branch to schedule for checking.
363
:param needed_refs: Refs we are accumulating.
364
:param to_unlock: The unlock list accumulating.
366
note("Checking branch at '%s'." % (branch.base,))
368
to_unlock.append(branch)
369
branch_refs = branch._get_check_refs()
370
for ref in branch_refs:
371
reflist = needed_refs.setdefault(ref, [])
372
reflist.append(branch)
375
def scan_tree(base_tree, tree, needed_refs, to_unlock):
376
"""Scan a tree for refs.
378
:param base_tree: The original tree check opened, used to detect duplicate
380
:param tree: The tree to schedule for checking.
381
:param needed_refs: Refs we are accumulating.
382
:param to_unlock: The unlock list accumulating.
384
if base_tree is not None and tree.basedir == base_tree.basedir:
386
note("Checking working tree at '%s'." % (tree.basedir,))
388
to_unlock.append(tree)
389
tree_refs = tree._get_check_refs()
390
for ref in tree_refs:
391
reflist = needed_refs.setdefault(ref, [])
395
def check_dwim(path, verbose, do_branch=False, do_repo=False, do_tree=False):
397
base_tree, branch, repo, relpath = \
398
BzrDir.open_containing_tree_branch_or_repository(path)
399
except errors.NotBranchError:
400
base_tree = branch = repo = None
405
if base_tree is not None:
406
# If the tree is a lightweight checkout we won't see it in
407
# repo.find_branches - add now.
409
scan_tree(None, base_tree, needed_refs, to_unlock)
410
branch = base_tree.branch
411
if branch is not None:
414
# The branch is in a shared repository
415
repo = branch.repository
418
to_unlock.append(repo)
419
branches = repo.find_branches(using=True)
421
if do_branch or do_tree:
422
for branch in branches:
425
tree = branch.bzrdir.open_workingtree()
427
except (errors.NotLocalUrl, errors.NoWorkingTree):
430
scan_tree(base_tree, tree, needed_refs, to_unlock)
432
scan_branch(branch, needed_refs, to_unlock)
433
if do_branch and not branches:
434
log_error("No branch found at specified location.")
435
if do_tree and base_tree is None and not saw_tree:
436
log_error("No working tree found at specified location.")
437
if do_repo or do_branch or do_tree:
439
note("Checking repository at '%s'."
440
% (repo.bzrdir.root_transport.base,))
441
result = repo.check(None, callback_refs=needed_refs,
443
result.report_results(verbose)
446
log_error("No working tree found at specified location.")
448
log_error("No branch found at specified location.")
450
log_error("No repository found at specified location.")
452
for thing in to_unlock: