1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Reconcilers are able to fix some potential data errors in a branch."""
35
from bzrlib.trace import mutter, note
36
from bzrlib.tsort import TopoSorter
39
def reconcile(dir, other=None):
40
"""Reconcile the data in dir.
42
Currently this is limited to a inventory 'reweave'.
44
This is a convenience method, for using a Reconciler object.
46
Directly using Reconciler is recommended for library users that
47
desire fine grained control or analysis of the found issues.
49
:param other: another bzrdir to reconcile against.
51
reconciler = Reconciler(dir, other=other)
52
reconciler.reconcile()
55
class Reconciler(object):
56
"""Reconcilers are used to reconcile existing data."""
58
def __init__(self, dir, other=None):
59
"""Create a Reconciler."""
63
"""Perform reconciliation.
65
After reconciliation the following attributes document found issues:
66
inconsistent_parents: The number of revisions in the repository whose
67
ancestry was being reported incorrectly.
68
garbage_inventories: The number of inventory objects without revisions
69
that were garbage collected.
70
fixed_branch_history: None if there was no branch, False if the branch
71
history was correct, True if the branch history
72
needed to be re-normalized.
74
self.pb = ui.ui_factory.nested_progress_bar()
81
"""Helper function for performing reconciliation."""
82
self._reconcile_branch()
83
self._reconcile_repository()
85
def _reconcile_branch(self):
87
self.branch = self.bzrdir.open_branch()
88
except errors.NotBranchError:
89
# Nothing to check here
90
self.fixed_branch_history = None
92
self.pb.note('Reconciling branch %s',
94
branch_reconciler = self.branch.reconcile(thorough=True)
95
self.fixed_branch_history = branch_reconciler.fixed_history
97
def _reconcile_repository(self):
98
self.repo = self.bzrdir.find_repository()
99
self.pb.note('Reconciling repository %s',
100
self.repo.bzrdir.root_transport.base)
101
self.pb.update("Reconciling repository", 0, 1)
102
repo_reconciler = self.repo.reconcile(thorough=True)
103
self.inconsistent_parents = repo_reconciler.inconsistent_parents
104
self.garbage_inventories = repo_reconciler.garbage_inventories
105
if repo_reconciler.aborted:
107
'Reconcile aborted: revision index has inconsistent parents.')
109
'Run "bzr check" for more details.')
111
self.pb.note('Reconciliation complete.')
114
class BranchReconciler(object):
115
"""Reconciler that works on a branch."""
117
def __init__(self, a_branch, thorough=False):
118
self.fixed_history = None
119
self.thorough = thorough
120
self.branch = a_branch
123
self.branch.lock_write()
125
self.pb = ui.ui_factory.nested_progress_bar()
127
self._reconcile_steps()
133
def _reconcile_steps(self):
134
self._reconcile_revision_history()
136
def _reconcile_revision_history(self):
137
repo = self.branch.repository
138
last_revno, last_revision_id = self.branch.last_revision_info()
139
real_history = list(repo.iter_reverse_revision_history(
141
real_history.reverse()
142
if last_revno != len(real_history):
143
self.fixed_history = True
144
# Technically for Branch5 formats, it is more efficient to use
145
# set_revision_history, as this will regenerate it again.
146
# Not really worth a whole BranchReconciler class just for this,
148
self.pb.note('Fixing last revision info %s => %s',
149
last_revno, len(real_history))
150
self.branch.set_last_revision_info(len(real_history),
153
self.fixed_history = False
154
self.pb.note('revision_history ok.')
157
class RepoReconciler(object):
158
"""Reconciler that reconciles a repository.
160
The goal of repository reconciliation is to make any derived data
161
consistent with the core data committed by a user. This can involve
162
reindexing, or removing unreferenced data if that can interfere with
163
queries in a given repository.
165
Currently this consists of an inventory reweave with revision cross-checks.
168
def __init__(self, repo, other=None, thorough=False):
169
"""Construct a RepoReconciler.
171
:param thorough: perform a thorough check which may take longer but
172
will correct non-data loss issues such as incorrect
175
self.garbage_inventories = 0
176
self.inconsistent_parents = 0
179
self.thorough = thorough
182
"""Perform reconciliation.
184
After reconciliation the following attributes document found issues:
185
inconsistent_parents: The number of revisions in the repository whose
186
ancestry was being reported incorrectly.
187
garbage_inventories: The number of inventory objects without revisions
188
that were garbage collected.
190
self.repo.lock_write()
192
self.pb = ui.ui_factory.nested_progress_bar()
194
self._reconcile_steps()
200
def _reconcile_steps(self):
201
"""Perform the steps to reconcile this repository."""
202
self._reweave_inventory()
204
def _reweave_inventory(self):
205
"""Regenerate the inventory weave for the repository from scratch.
207
This is a smart function: it will only do the reweave if doing it
208
will correct data issues. The self.thorough flag controls whether
209
only data-loss causing issues (!self.thorough) or all issues
210
(self.thorough) are treated as requiring the reweave.
212
# local because needing to know about WeaveFile is a wart we want to hide
213
from bzrlib.weave import WeaveFile, Weave
214
transaction = self.repo.get_transaction()
215
self.pb.update('Reading inventory data.')
216
self.inventory = self.repo.get_inventory_weave()
217
# the total set of revisions to process
218
self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
220
# mapping from revision_id to parents
222
# errors that we detect
223
self.inconsistent_parents = 0
224
# we need the revision id of each revision and its available parents list
225
self._setup_steps(len(self.pending))
226
for rev_id in self.pending:
227
# put a revision into the graph.
228
self._graph_revision(rev_id)
229
self._check_garbage_inventories()
230
# if there are no inconsistent_parents and
231
# (no garbage inventories or we are not doing a thorough check)
232
if (not self.inconsistent_parents and
233
(not self.garbage_inventories or not self.thorough)):
234
self.pb.note('Inventory ok.')
236
self.pb.update('Backing up inventory...', 0, 0)
237
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
238
self.pb.note('Backup Inventory created.')
239
# asking for '' should never return a non-empty weave
240
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
241
self.repo.get_transaction())
243
# we have topological order of revisions and non ghost parents ready.
244
self._setup_steps(len(self._rev_graph))
245
for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
246
parents = self._rev_graph[rev_id]
247
# double check this really is in topological order.
248
unavailable = [p for p in parents if p not in new_inventory_vf]
249
assert len(unavailable) == 0
250
# this entry has all the non ghost parents in the inventory
252
self._reweave_step('adding inventories')
253
if isinstance(new_inventory_vf, WeaveFile):
254
# It's really a WeaveFile, but we call straight into the
255
# Weave's add method to disable the auto-write-out behaviour.
256
# This is done to avoid a revision_count * time-to-write additional overhead on
258
new_inventory_vf._check_write_ok()
259
Weave._add_lines(new_inventory_vf, rev_id, parents,
260
self.inventory.get_lines(rev_id), None, None, None, False, True)
262
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
264
if isinstance(new_inventory_vf, WeaveFile):
265
new_inventory_vf._save()
266
# if this worked, the set of new_inventory_vf.names should equal
268
assert set(new_inventory_vf.versions()) == self.pending
269
self.pb.update('Writing weave')
270
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
271
self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
272
self.inventory = None
273
self.pb.note('Inventory regenerated.')
275
def _setup_steps(self, new_total):
276
"""Setup the markers we need to control the progress bar."""
277
self.total = new_total
280
def _graph_revision(self, rev_id):
281
"""Load a revision into the revision graph."""
282
# pick a random revision
283
# analyse revision id rev_id and put it in the stack.
284
self._reweave_step('loading revisions')
285
rev = self.repo.get_revision_reconcile(rev_id)
286
assert rev.revision_id == rev_id
288
for parent in rev.parent_ids:
289
if self._parent_is_available(parent):
290
parents.append(parent)
292
mutter('found ghost %s', parent)
293
self._rev_graph[rev_id] = parents
294
if self._parents_are_inconsistent(rev_id, parents):
295
self.inconsistent_parents += 1
296
mutter('Inconsistent inventory parents: id {%s} '
297
'inventory claims %r, '
298
'available parents are %r, '
299
'unavailable parents are %r',
301
set(self.inventory.get_parent_map([rev_id])[rev_id]),
303
set(rev.parent_ids).difference(set(parents)))
305
def _parents_are_inconsistent(self, rev_id, parents):
306
"""Return True if the parents list of rev_id does not match the weave.
308
This detects inconsistencies based on the self.thorough value:
309
if thorough is on, the first parent value is checked as well as ghost
311
Otherwise only the ghost differences are evaluated.
313
weave_parents = self.inventory.get_parent_map([rev_id])[rev_id]
314
weave_missing_old_ghosts = set(weave_parents) != set(parents)
315
first_parent_is_wrong = (
316
len(weave_parents) and len(parents) and
317
parents[0] != weave_parents[0])
319
return weave_missing_old_ghosts or first_parent_is_wrong
321
return weave_missing_old_ghosts
323
def _check_garbage_inventories(self):
324
"""Check for garbage inventories which we cannot trust
326
We cant trust them because their pre-requisite file data may not
327
be present - all we know is that their revision was not installed.
329
if not self.thorough:
331
inventories = set(self.inventory.versions())
332
revisions = set(self._rev_graph.keys())
333
garbage = inventories.difference(revisions)
334
self.garbage_inventories = len(garbage)
335
for revision_id in garbage:
336
mutter('Garbage inventory {%s} found.', revision_id)
338
def _parent_is_available(self, parent):
339
"""True if parent is a fully available revision
341
A fully available revision has a inventory and a revision object in the
344
return (parent in self._rev_graph or
345
(parent in self.inventory and self.repo.has_revision(parent)))
347
def _reweave_step(self, message):
348
"""Mark a single step of regeneration complete."""
349
self.pb.update(message, self.count, self.total)
353
class KnitReconciler(RepoReconciler):
354
"""Reconciler that reconciles a knit format repository.
356
This will detect garbage inventories and remove them in thorough mode.
359
def _reconcile_steps(self):
360
"""Perform the steps to reconcile this repository."""
364
except errors.BzrCheckError:
367
# knits never suffer this
369
self._fix_text_parents()
371
def _load_indexes(self):
372
"""Load indexes for the reconciliation."""
373
self.transaction = self.repo.get_transaction()
374
self.pb.update('Reading indexes.', 0, 2)
375
self.inventory = self.repo.get_inventory_weave()
376
self.pb.update('Reading indexes.', 1, 2)
377
self.repo._check_for_inconsistent_revision_parents()
378
self.revisions = self.repo._revision_store.get_revision_file(self.transaction)
379
self.pb.update('Reading indexes.', 2, 2)
381
def _gc_inventory(self):
382
"""Remove inventories that are not referenced from the revision store."""
383
self.pb.update('Checking unused inventories.', 0, 1)
384
self._check_garbage_inventories()
385
self.pb.update('Checking unused inventories.', 1, 3)
386
if not self.garbage_inventories:
387
self.pb.note('Inventory ok.')
389
self.pb.update('Backing up inventory...', 0, 0)
390
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
391
self.pb.note('Backup Inventory created.')
392
# asking for '' should never return a non-empty weave
393
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
396
# we have topological order of revisions and non ghost parents ready.
397
self._setup_steps(len(self.revisions))
398
revision_ids = self.revisions.versions()
399
graph = self.revisions.get_parent_map(revision_ids)
400
for rev_id in TopoSorter(graph.items()).iter_topo_order():
401
parents = graph[rev_id]
402
# double check this really is in topological order, ignoring existing ghosts.
403
unavailable = [p for p in parents if p not in new_inventory_vf and
405
assert len(unavailable) == 0
406
# this entry has all the non ghost parents in the inventory
408
self._reweave_step('adding inventories')
409
# ugly but needed, weaves are just way tooooo slow else.
410
new_inventory_vf.add_lines_with_ghosts(rev_id, parents,
411
self.inventory.get_lines(rev_id))
413
# if this worked, the set of new_inventory_vf.names should equal
415
assert set(new_inventory_vf.versions()) == set(self.revisions.versions())
416
self.pb.update('Writing weave')
417
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
418
self.repo.control_weaves.delete('inventory.new', self.transaction)
419
self.inventory = None
420
self.pb.note('Inventory regenerated.')
422
def _check_garbage_inventories(self):
423
"""Check for garbage inventories which we cannot trust
425
We cant trust them because their pre-requisite file data may not
426
be present - all we know is that their revision was not installed.
428
inventories = set(self.inventory.versions())
429
revisions = set(self.revisions.versions())
430
garbage = inventories.difference(revisions)
431
self.garbage_inventories = len(garbage)
432
for revision_id in garbage:
433
mutter('Garbage inventory {%s} found.', revision_id)
435
def _fix_text_parents(self):
436
"""Fix bad versionedfile parent entries.
438
It is possible for the parents entry in a versionedfile entry to be
439
inconsistent with the values in the revision and inventory.
441
This method finds entries with such inconsistencies, corrects their
442
parent lists, and replaces the versionedfile with a corrected version.
444
transaction = self.repo.get_transaction()
445
versions = self.revisions.versions()
446
mutter('Prepopulating revision text cache with %d revisions',
448
vf_checker = self.repo._get_versioned_file_checker()
449
# List all weaves before altering, to avoid race conditions when we
450
# delete unused weaves.
451
weaves = list(enumerate(self.repo.weave_store))
452
for num, file_id in weaves:
453
self.pb.update('Fixing text parents', num,
454
len(self.repo.weave_store))
455
vf = self.repo.weave_store.get_weave(file_id, transaction)
456
versions_with_bad_parents, unused_versions = \
457
vf_checker.check_file_version_parents(vf, file_id)
458
if (len(versions_with_bad_parents) == 0 and
459
len(unused_versions) == 0):
461
full_text_versions = set()
462
self._fix_text_parent(file_id, vf, versions_with_bad_parents,
463
full_text_versions, unused_versions)
465
def _fix_text_parent(self, file_id, vf, versions_with_bad_parents,
466
full_text_versions, unused_versions):
467
"""Fix bad versionedfile entries in a single versioned file."""
468
mutter('fixing text parent: %r (%d versions)', file_id,
469
len(versions_with_bad_parents))
470
mutter('(%d need to be full texts, %d are unused)',
471
len(full_text_versions), len(unused_versions))
472
new_vf = self.repo.weave_store.get_empty('temp:%s' % file_id,
475
for version in vf.versions():
476
if version in unused_versions:
478
elif version in versions_with_bad_parents:
479
parents = versions_with_bad_parents[version][1]
481
parents = vf.get_parent_map([version])[version]
482
new_parents[version] = parents
483
if not len(new_parents):
484
# No used versions, remove the VF.
485
self.repo.weave_store.delete(file_id, self.transaction)
487
for version in TopoSorter(new_parents.items()).iter_topo_order():
488
lines = vf.get_lines(version)
489
parents = new_parents[version]
490
if parents and (parents[0] in full_text_versions):
491
# Force this record to be a fulltext, not a delta.
492
new_vf._add(version, lines, parents, False,
493
None, None, None, False)
495
new_vf.add_lines(version, parents, lines)
496
self.repo.weave_store.copy(new_vf, file_id, self.transaction)
497
self.repo.weave_store.delete('temp:%s' % file_id, self.transaction)
500
class PackReconciler(RepoReconciler):
501
"""Reconciler that reconciles a pack based repository.
503
Garbage inventories do not affect ancestry queries, and removal is
504
considerably more expensive as there is no separate versioned file for
505
them, so they are not cleaned. In short it is currently a no-op.
507
In future this may be a good place to hook in annotation cache checking,
508
index recreation etc.
511
# XXX: The index corruption that _fix_text_parents performs is needed for
512
# packs, but not yet implemented. The basic approach is to:
513
# - lock the names list
514
# - perform a customised pack() that regenerates data as needed
515
# - unlock the names list
516
# https://bugs.edge.launchpad.net/bzr/+bug/154173
518
def _reconcile_steps(self):
519
"""Perform the steps to reconcile this repository."""
520
if not self.thorough:
522
collection = self.repo._pack_collection
523
collection.ensure_loaded()
524
collection.lock_names()
526
packs = collection.all_packs()
527
all_revisions = self.repo.all_revision_ids()
528
total_inventories = len(list(
529
collection.inventory_index.combined_index.iter_all_entries()))
530
if len(all_revisions):
531
self._packer = repofmt.pack_repo.ReconcilePacker(
532
collection, packs, ".reconcile", all_revisions)
533
new_pack = self._packer.pack(pb=self.pb)
534
if new_pack is not None:
535
self._discard_and_save(packs)
537
# only make a new pack when there is data to copy.
538
self._discard_and_save(packs)
539
self.garbage_inventories = total_inventories - len(list(
540
collection.inventory_index.combined_index.iter_all_entries()))
542
collection._unlock_names()
544
def _discard_and_save(self, packs):
545
"""Discard some packs from the repository.
547
This removes them from the memory index, saves the in-memory index
548
which makes the newly reconciled pack visible and hides the packs to be
549
discarded, and finally renames the packs being discarded into the
550
obsolete packs directory.
552
:param packs: The packs to discard.
555
self.repo._pack_collection._remove_pack_from_memory(pack)
556
self.repo._pack_collection._save_pack_names()
557
self.repo._pack_collection._obsolete_packs(packs)