90
88
self.repository.lock_read()
91
89
self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
93
self.progress.update('check', 0, 4)
94
91
if self.check_repo:
95
self.progress.update('checking revisions', 0)
96
self.check_revisions()
97
self.progress.update('checking commit contents', 1)
98
self.repository._check_inventories(self)
99
self.progress.update('checking file graphs', 2)
92
self.progress.update('retrieving inventory', 0, 2)
93
# do not put in init, as it should be done with progess,
94
# and inside the lock.
95
self.inventory_weave = self.repository.inventories
96
self.progress.update('checking revision graph', 1)
97
self.check_revision_graph()
100
while revno < len(self.planned_revisions):
101
rev_id = self.planned_revisions[revno]
102
self.progress.update('checking revision', revno,
103
len(self.planned_revisions))
105
self.check_one_rev(rev_id)
100
106
# check_weaves is done after the revision scan so that
101
107
# revision index is known to be valid.
102
108
self.check_weaves()
103
self.progress.update('checking branches and trees', 3)
104
109
if callback_refs:
105
110
repo = self.repository
106
111
# calculate all refs, and callback the objects requesting them.
146
151
self.progress.finished()
147
152
self.repository.unlock()
149
def _check_revisions(self, revisions_iterator):
150
"""Check revision objects by decorating a generator.
152
:param revisions_iterator: An iterator of(revid, Revision-or-None).
153
:return: A generator of the contents of revisions_iterator.
155
self.planned_revisions = set()
156
for revid, revision in revisions_iterator:
157
yield revid, revision
158
self._check_one_rev(revid, revision)
159
# Flatten the revisions we found to guarantee consistent later
161
self.planned_revisions = list(self.planned_revisions)
162
# TODO: extract digital signatures as items to callback on too.
164
def check_revisions(self):
165
"""Scan revisions, checking data directly available as we go."""
166
revision_iterator = self.repository._iter_revisions(None)
167
revision_iterator = self._check_revisions(revision_iterator)
168
# We read the all revisions here:
169
# - doing this allows later code to depend on the revision index.
170
# - we can fill out existence flags at this point
171
# - we can read the revision inventory sha at this point
172
# - we can check properties and serialisers etc.
154
def check_revision_graph(self):
173
155
if not self.repository.revision_graph_can_have_wrong_parents():
174
# The check against the index isn't needed.
156
# This check is not necessary.
175
157
self.revs_with_bad_parents_in_index = None
176
for thing in revision_iterator:
179
bad_revisions = self.repository._find_inconsistent_revision_parents(
181
self.revs_with_bad_parents_in_index = list(bad_revisions)
159
bad_revisions = self.repository._find_inconsistent_revision_parents()
160
self.revs_with_bad_parents_in_index = list(bad_revisions)
162
def plan_revisions(self):
163
repository = self.repository
164
self.planned_revisions = repository.all_revision_ids()
165
self.progress.clear()
166
inventoried = set(key[-1] for key in self.inventory_weave.keys())
167
awol = set(self.planned_revisions) - inventoried
169
raise BzrCheckError('Stored revisions missing from inventory'
170
'{%s}' % ','.join([f for f in awol]))
183
172
def report_results(self, verbose):
184
173
if self.check_repo:
189
178
def _report_repo_results(self, verbose):
190
179
note('checked repository %s format %s',
191
self.repository.user_url,
192
self.repository._format)
180
self.repository.bzrdir.root_transport,
181
self.repository._format)
193
182
note('%6d revisions', self.checked_rev_cnt)
194
183
note('%6d file-ids', len(self.checked_weaves))
196
note('%6d unreferenced text versions',
197
len(self.unreferenced_versions))
198
if verbose and len(self.unreferenced_versions):
199
for file_id, revision_id in self.unreferenced_versions:
200
note('unreferenced version: {%s} in %s', revision_id,
184
note('%6d unique file texts', self.checked_text_cnt)
185
note('%6d repeated file texts', self.repeated_text_cnt)
186
note('%6d unreferenced text versions',
187
len(self.unreferenced_versions))
202
188
if self.missing_inventory_sha_cnt:
203
189
note('%6d revisions are missing inventory_sha1',
204
190
self.missing_inventory_sha_cnt)
237
227
' %s has wrong parents in index: '
238
228
'%r should be %r',
239
229
revision_id, index_parents, actual_parents)
240
for item in self._report_items:
243
def _check_one_rev(self, rev_id, rev):
244
"""Cross-check one revision.
246
:param rev_id: A revision id to check.
247
:param rev: A revision or None to indicate a missing revision.
231
def check_one_rev(self, rev_id):
232
"""Check one revision.
234
rev_id - the one to check
236
rev = self.repository.get_revision(rev_id)
249
238
if rev.revision_id != rev_id:
250
self._report_items.append(
251
'Mismatched internal revid {%s} and index revid {%s}' % (
252
rev.revision_id, rev_id))
253
rev_id = rev.revision_id
254
# Check this revision tree etc, and count as seen when we encounter a
256
self.planned_revisions.add(rev_id)
258
self.ghosts.discard(rev_id)
259
# Count all parents as ghosts if we haven't seen them yet.
239
raise BzrCheckError('wrong internal revision id in revision {%s}'
260
242
for parent in rev.parent_ids:
261
243
if not parent in self.planned_revisions:
262
self.ghosts.add(parent)
264
self.ancestors[rev_id] = tuple(rev.parent_ids) or (NULL_REVISION,)
265
self.add_pending_item(rev_id, ('inventories', rev_id), 'inventory',
244
# rev has a parent we didn't know about.
245
missing_links = self.missing_parent_links.get(parent, [])
246
missing_links.append(rev_id)
247
self.missing_parent_links[parent] = missing_links
248
# list based so somewhat slow,
249
# TODO have a planned_revisions list and set.
250
if self.repository.has_revision(parent):
251
missing_ancestry = self.repository.get_ancestry(parent)
252
for missing in missing_ancestry:
253
if (missing is not None
254
and missing not in self.planned_revisions):
255
self.planned_revisions.append(missing)
257
self.ghosts.append(rev_id)
259
if rev.inventory_sha1:
260
# Loopback - this is currently circular logic as the
261
# knit get_inventory_sha1 call returns rev.inventory_sha1.
262
# Repository.py's get_inventory_sha1 should instead return
263
# inventories.get_record_stream([(revid,)]).next().sha1 or
265
inv_sha1 = self.repository.get_inventory_sha1(rev_id)
266
if inv_sha1 != rev.inventory_sha1:
267
raise BzrCheckError('Inventory sha1 hash doesn\'t match'
268
' value in revision {%s}' % rev_id)
269
self._check_revision_tree(rev_id)
267
270
self.checked_rev_cnt += 1
269
def add_pending_item(self, referer, key, kind, sha1):
270
"""Add a reference to a sha1 to be cross checked against a key.
272
:param referer: The referer that expects key to have sha1.
273
:param key: A storage key e.g. ('texts', 'foo@bar-20040504-1234')
274
:param kind: revision/inventory/text/map/signature
275
:param sha1: A hex sha1 or None if no sha1 is known.
277
existing = self.pending_keys.get(key)
279
if sha1 != existing[1]:
280
self._report_items.append('Multiple expected sha1s for %s. {%s}'
281
' expects {%s}, {%s} expects {%s}', (
282
key, referer, sha1, existing[1], existing[0]))
284
self.pending_keys[key] = (kind, sha1, referer)
286
272
def check_weaves(self):
287
273
"""Check all the weaves we can get our hands on.
290
storebar = bzrlib.ui.ui_factory.nested_progress_bar()
292
self._check_weaves(storebar)
296
def _check_weaves(self, storebar):
297
storebar.update('text-index', 0, 2)
298
if self.repository._format.fast_deltas:
299
# We haven't considered every fileid instance so far.
300
weave_checker = self.repository._get_versioned_file_checker(
301
ancestors=self.ancestors)
303
weave_checker = self.repository._get_versioned_file_checker(
304
text_key_references=self.text_key_references,
305
ancestors=self.ancestors)
306
storebar.update('file-graph', 1)
276
self.progress.update('checking inventory', 0, 2)
277
self.inventory_weave.check(progress_bar=self.progress)
278
self.progress.update('checking text storage', 1, 2)
279
self.repository.texts.check(progress_bar=self.progress)
280
weave_checker = self.repository._get_versioned_file_checker(
281
text_key_references=self.text_key_references)
307
282
result = weave_checker.check_file_version_parents(
308
self.repository.texts)
283
self.repository.texts, progress_bar=self.progress)
309
284
self.checked_weaves = weave_checker.file_ids
310
285
bad_parents, unused_versions = result
311
286
bad_parents = bad_parents.items()
319
294
(revision_id, weave_id, weave_parents, correct_parents))
320
295
self.unreferenced_versions.update(unused_versions)
297
def _check_revision_tree(self, rev_id):
298
tree = self.repository.revision_tree(rev_id)
302
for path, ie in inv.iter_entries():
303
self._add_entry_to_text_key_references(inv, ie)
305
if file_id in seen_ids:
306
raise BzrCheckError('duplicated file_id {%s} '
307
'in inventory for revision {%s}'
309
seen_ids.add(file_id)
310
ie.check(self, rev_id, inv, tree)
311
if path in seen_names:
312
raise BzrCheckError('duplicated path %s '
313
'in inventory for revision {%s}'
322
317
def _add_entry_to_text_key_references(self, inv, entry):
323
if not self.rich_roots and entry.name == '':
318
if not self.rich_roots and entry == inv.root:
325
320
key = (entry.file_id, entry.revision)
326
321
self.text_key_references.setdefault(key, False)
442
432
scan_branch(branch, needed_refs, to_unlock)
443
433
if do_branch and not branches:
444
note("No branch found at specified location.")
434
log_error("No branch found at specified location.")
445
435
if do_tree and base_tree is None and not saw_tree:
446
note("No working tree found at specified location.")
436
log_error("No working tree found at specified location.")
447
437
if do_repo or do_branch or do_tree:
449
439
note("Checking repository at '%s'."
440
% (repo.bzrdir.root_transport.base,))
451
441
result = repo.check(None, callback_refs=needed_refs,
452
442
check_repo=do_repo)
453
443
result.report_results(verbose)
456
note("No working tree found at specified location.")
446
log_error("No working tree found at specified location.")
458
note("No branch found at specified location.")
448
log_error("No branch found at specified location.")
460
note("No repository found at specified location.")
450
log_error("No repository found at specified location.")
462
452
for thing in to_unlock: