1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Reconcilers are able to fix some potential data errors in a branch."""
35
from bzrlib.trace import mutter, note
36
from bzrlib.tsort import TopoSorter
39
def reconcile(dir, other=None):
40
"""Reconcile the data in dir.
42
Currently this is limited to a inventory 'reweave'.
44
This is a convenience method, for using a Reconciler object.
46
Directly using Reconciler is recommended for library users that
47
desire fine grained control or analysis of the found issues.
49
:param other: another bzrdir to reconcile against.
51
reconciler = Reconciler(dir, other=other)
52
reconciler.reconcile()
55
class Reconciler(object):
56
"""Reconcilers are used to reconcile existing data."""
58
def __init__(self, dir, other=None):
59
"""Create a Reconciler."""
63
"""Perform reconciliation.
65
After reconciliation the following attributes document found issues:
66
inconsistent_parents: The number of revisions in the repository whose
67
ancestry was being reported incorrectly.
68
garbage_inventories: The number of inventory objects without revisions
69
that were garbage collected.
71
self.pb = ui.ui_factory.nested_progress_bar()
78
"""Helper function for performing reconciliation."""
79
self._reconcile_branch()
80
self._reconcile_repository()
82
def _reconcile_branch(self):
84
self.branch = self.bzrdir.open_branch()
85
except errors.NotBranchError:
86
# Nothing to check here
88
self.pb.note('Reconciling branch %s',
90
branch_reconciler = self.branch.reconcile(thorough=True)
91
self.fixed_branch_history = branch_reconciler.fixed_history
93
def _reconcile_repository(self):
94
self.repo = self.bzrdir.find_repository()
95
self.pb.note('Reconciling repository %s',
96
self.repo.bzrdir.root_transport.base)
97
self.pb.update("Reconciling repository", 0, 1)
98
repo_reconciler = self.repo.reconcile(thorough=True)
99
self.inconsistent_parents = repo_reconciler.inconsistent_parents
100
self.garbage_inventories = repo_reconciler.garbage_inventories
101
if repo_reconciler.aborted:
103
'Reconcile aborted: revision index has inconsistent parents.')
105
'Run "bzr check" for more details.')
107
self.pb.note('Reconciliation complete.')
110
class BranchReconciler(object):
111
"""Reconciler that works on a branch."""
113
def __init__(self, a_branch, thorough=False):
114
self.fixed_history = None
115
self.thorough = thorough
116
self.branch = a_branch
119
self.branch.lock_write()
121
self.pb = ui.ui_factory.nested_progress_bar()
123
self._reconcile_steps()
129
def _reconcile_steps(self):
130
self._reconcile_revision_history()
132
def _reconcile_revision_history(self):
133
repo = self.branch.repository
134
last_revno, last_revision_id = self.branch.last_revision_info()
135
real_history = list(repo.iter_reverse_revision_history(
137
real_history.reverse()
138
if last_revno != len(real_history):
139
self.fixed_history = True
140
# Technically for Branch5 formats, it is more efficient to use
141
# set_revision_history, as this will regenerate it again.
142
# Not really worth a whole BranchReconciler class just for this,
144
self.pb.note('Fixing last revision info %s => %s',
145
last_revno, len(real_history))
146
self.branch.set_last_revision_info(len(real_history),
149
self.fixed_history = False
150
self.pb.note('revision_history ok.')
153
class RepoReconciler(object):
154
"""Reconciler that reconciles a repository.
156
The goal of repository reconciliation is to make any derived data
157
consistent with the core data committed by a user. This can involve
158
reindexing, or removing unreferenced data if that can interfere with
159
queries in a given repository.
161
Currently this consists of an inventory reweave with revision cross-checks.
164
def __init__(self, repo, other=None, thorough=False):
165
"""Construct a RepoReconciler.
167
:param thorough: perform a thorough check which may take longer but
168
will correct non-data loss issues such as incorrect
171
self.garbage_inventories = 0
172
self.inconsistent_parents = 0
175
self.thorough = thorough
178
"""Perform reconciliation.
180
After reconciliation the following attributes document found issues:
181
inconsistent_parents: The number of revisions in the repository whose
182
ancestry was being reported incorrectly.
183
garbage_inventories: The number of inventory objects without revisions
184
that were garbage collected.
186
self.repo.lock_write()
188
self.pb = ui.ui_factory.nested_progress_bar()
190
self._reconcile_steps()
196
def _reconcile_steps(self):
197
"""Perform the steps to reconcile this repository."""
198
self._reweave_inventory()
200
def _reweave_inventory(self):
201
"""Regenerate the inventory weave for the repository from scratch.
203
This is a smart function: it will only do the reweave if doing it
204
will correct data issues. The self.thorough flag controls whether
205
only data-loss causing issues (!self.thorough) or all issues
206
(self.thorough) are treated as requiring the reweave.
208
# local because needing to know about WeaveFile is a wart we want to hide
209
from bzrlib.weave import WeaveFile, Weave
210
transaction = self.repo.get_transaction()
211
self.pb.update('Reading inventory data.')
212
self.inventory = self.repo.get_inventory_weave()
213
# the total set of revisions to process
214
self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
216
# mapping from revision_id to parents
218
# errors that we detect
219
self.inconsistent_parents = 0
220
# we need the revision id of each revision and its available parents list
221
self._setup_steps(len(self.pending))
222
for rev_id in self.pending:
223
# put a revision into the graph.
224
self._graph_revision(rev_id)
225
self._check_garbage_inventories()
226
# if there are no inconsistent_parents and
227
# (no garbage inventories or we are not doing a thorough check)
228
if (not self.inconsistent_parents and
229
(not self.garbage_inventories or not self.thorough)):
230
self.pb.note('Inventory ok.')
232
self.pb.update('Backing up inventory...', 0, 0)
233
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
234
self.pb.note('Backup Inventory created.')
235
# asking for '' should never return a non-empty weave
236
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
237
self.repo.get_transaction())
239
# we have topological order of revisions and non ghost parents ready.
240
self._setup_steps(len(self._rev_graph))
241
for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
242
parents = self._rev_graph[rev_id]
243
# double check this really is in topological order.
244
unavailable = [p for p in parents if p not in new_inventory_vf]
245
assert len(unavailable) == 0
246
# this entry has all the non ghost parents in the inventory
248
self._reweave_step('adding inventories')
249
if isinstance(new_inventory_vf, WeaveFile):
250
# It's really a WeaveFile, but we call straight into the
251
# Weave's add method to disable the auto-write-out behaviour.
252
# This is done to avoid a revision_count * time-to-write additional overhead on
254
new_inventory_vf._check_write_ok()
255
Weave._add_lines(new_inventory_vf, rev_id, parents,
256
self.inventory.get_lines(rev_id), None, None, None, False, True)
258
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
260
if isinstance(new_inventory_vf, WeaveFile):
261
new_inventory_vf._save()
262
# if this worked, the set of new_inventory_vf.names should equal
264
assert set(new_inventory_vf.versions()) == self.pending
265
self.pb.update('Writing weave')
266
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
267
self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
268
self.inventory = None
269
self.pb.note('Inventory regenerated.')
271
def _setup_steps(self, new_total):
272
"""Setup the markers we need to control the progress bar."""
273
self.total = new_total
276
def _graph_revision(self, rev_id):
277
"""Load a revision into the revision graph."""
278
# pick a random revision
279
# analyse revision id rev_id and put it in the stack.
280
self._reweave_step('loading revisions')
281
rev = self.repo.get_revision_reconcile(rev_id)
282
assert rev.revision_id == rev_id
284
for parent in rev.parent_ids:
285
if self._parent_is_available(parent):
286
parents.append(parent)
288
mutter('found ghost %s', parent)
289
self._rev_graph[rev_id] = parents
290
if self._parents_are_inconsistent(rev_id, parents):
291
self.inconsistent_parents += 1
292
mutter('Inconsistent inventory parents: id {%s} '
293
'inventory claims %r, '
294
'available parents are %r, '
295
'unavailable parents are %r',
297
set(self.inventory.get_parent_map([rev_id])[rev_id]),
299
set(rev.parent_ids).difference(set(parents)))
301
def _parents_are_inconsistent(self, rev_id, parents):
302
"""Return True if the parents list of rev_id does not match the weave.
304
This detects inconsistencies based on the self.thorough value:
305
if thorough is on, the first parent value is checked as well as ghost
307
Otherwise only the ghost differences are evaluated.
309
weave_parents = self.inventory.get_parent_map([rev_id])[rev_id]
310
weave_missing_old_ghosts = set(weave_parents) != set(parents)
311
first_parent_is_wrong = (
312
len(weave_parents) and len(parents) and
313
parents[0] != weave_parents[0])
315
return weave_missing_old_ghosts or first_parent_is_wrong
317
return weave_missing_old_ghosts
319
def _check_garbage_inventories(self):
320
"""Check for garbage inventories which we cannot trust
322
We cant trust them because their pre-requisite file data may not
323
be present - all we know is that their revision was not installed.
325
if not self.thorough:
327
inventories = set(self.inventory.versions())
328
revisions = set(self._rev_graph.keys())
329
garbage = inventories.difference(revisions)
330
self.garbage_inventories = len(garbage)
331
for revision_id in garbage:
332
mutter('Garbage inventory {%s} found.', revision_id)
334
def _parent_is_available(self, parent):
335
"""True if parent is a fully available revision
337
A fully available revision has a inventory and a revision object in the
340
return (parent in self._rev_graph or
341
(parent in self.inventory and self.repo.has_revision(parent)))
343
def _reweave_step(self, message):
344
"""Mark a single step of regeneration complete."""
345
self.pb.update(message, self.count, self.total)
349
class KnitReconciler(RepoReconciler):
350
"""Reconciler that reconciles a knit format repository.
352
This will detect garbage inventories and remove them in thorough mode.
355
def _reconcile_steps(self):
356
"""Perform the steps to reconcile this repository."""
360
except errors.BzrCheckError:
363
# knits never suffer this
365
self._fix_text_parents()
367
def _load_indexes(self):
368
"""Load indexes for the reconciliation."""
369
self.transaction = self.repo.get_transaction()
370
self.pb.update('Reading indexes.', 0, 2)
371
self.inventory = self.repo.get_inventory_weave()
372
self.pb.update('Reading indexes.', 1, 2)
373
self.repo._check_for_inconsistent_revision_parents()
374
self.revisions = self.repo._revision_store.get_revision_file(self.transaction)
375
self.pb.update('Reading indexes.', 2, 2)
377
def _gc_inventory(self):
378
"""Remove inventories that are not referenced from the revision store."""
379
self.pb.update('Checking unused inventories.', 0, 1)
380
self._check_garbage_inventories()
381
self.pb.update('Checking unused inventories.', 1, 3)
382
if not self.garbage_inventories:
383
self.pb.note('Inventory ok.')
385
self.pb.update('Backing up inventory...', 0, 0)
386
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
387
self.pb.note('Backup Inventory created.')
388
# asking for '' should never return a non-empty weave
389
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
392
# we have topological order of revisions and non ghost parents ready.
393
self._setup_steps(len(self.revisions))
394
revision_ids = self.revisions.versions()
395
graph = self.revisions.get_parent_map(revision_ids)
396
for rev_id in TopoSorter(graph.items()).iter_topo_order():
397
parents = graph[rev_id]
398
# double check this really is in topological order, ignoring existing ghosts.
399
unavailable = [p for p in parents if p not in new_inventory_vf and
401
assert len(unavailable) == 0
402
# this entry has all the non ghost parents in the inventory
404
self._reweave_step('adding inventories')
405
# ugly but needed, weaves are just way tooooo slow else.
406
new_inventory_vf.add_lines_with_ghosts(rev_id, parents,
407
self.inventory.get_lines(rev_id))
409
# if this worked, the set of new_inventory_vf.names should equal
411
assert set(new_inventory_vf.versions()) == set(self.revisions.versions())
412
self.pb.update('Writing weave')
413
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
414
self.repo.control_weaves.delete('inventory.new', self.transaction)
415
self.inventory = None
416
self.pb.note('Inventory regenerated.')
418
def _check_garbage_inventories(self):
419
"""Check for garbage inventories which we cannot trust
421
We cant trust them because their pre-requisite file data may not
422
be present - all we know is that their revision was not installed.
424
inventories = set(self.inventory.versions())
425
revisions = set(self.revisions.versions())
426
garbage = inventories.difference(revisions)
427
self.garbage_inventories = len(garbage)
428
for revision_id in garbage:
429
mutter('Garbage inventory {%s} found.', revision_id)
431
def _fix_text_parents(self):
432
"""Fix bad versionedfile parent entries.
434
It is possible for the parents entry in a versionedfile entry to be
435
inconsistent with the values in the revision and inventory.
437
This method finds entries with such inconsistencies, corrects their
438
parent lists, and replaces the versionedfile with a corrected version.
440
transaction = self.repo.get_transaction()
441
versions = self.revisions.versions()
442
mutter('Prepopulating revision text cache with %d revisions',
444
vf_checker = self.repo._get_versioned_file_checker()
445
# List all weaves before altering, to avoid race conditions when we
446
# delete unused weaves.
447
weaves = list(enumerate(self.repo.weave_store))
448
for num, file_id in weaves:
449
self.pb.update('Fixing text parents', num,
450
len(self.repo.weave_store))
451
vf = self.repo.weave_store.get_weave(file_id, transaction)
452
versions_with_bad_parents, unused_versions = \
453
vf_checker.check_file_version_parents(vf, file_id)
454
if (len(versions_with_bad_parents) == 0 and
455
len(unused_versions) == 0):
457
full_text_versions = set()
458
self._fix_text_parent(file_id, vf, versions_with_bad_parents,
459
full_text_versions, unused_versions)
461
def _fix_text_parent(self, file_id, vf, versions_with_bad_parents,
462
full_text_versions, unused_versions):
463
"""Fix bad versionedfile entries in a single versioned file."""
464
mutter('fixing text parent: %r (%d versions)', file_id,
465
len(versions_with_bad_parents))
466
mutter('(%d need to be full texts, %d are unused)',
467
len(full_text_versions), len(unused_versions))
468
new_vf = self.repo.weave_store.get_empty('temp:%s' % file_id,
471
for version in vf.versions():
472
if version in unused_versions:
474
elif version in versions_with_bad_parents:
475
parents = versions_with_bad_parents[version][1]
477
parents = vf.get_parent_map([version])[version]
478
new_parents[version] = parents
479
if not len(new_parents):
480
# No used versions, remove the VF.
481
self.repo.weave_store.delete(file_id, self.transaction)
483
for version in TopoSorter(new_parents.items()).iter_topo_order():
484
lines = vf.get_lines(version)
485
parents = new_parents[version]
486
if parents and (parents[0] in full_text_versions):
487
# Force this record to be a fulltext, not a delta.
488
new_vf._add(version, lines, parents, False,
489
None, None, None, False)
491
new_vf.add_lines(version, parents, lines)
492
self.repo.weave_store.copy(new_vf, file_id, self.transaction)
493
self.repo.weave_store.delete('temp:%s' % file_id, self.transaction)
496
class PackReconciler(RepoReconciler):
497
"""Reconciler that reconciles a pack based repository.
499
Garbage inventories do not affect ancestry queries, and removal is
500
considerably more expensive as there is no separate versioned file for
501
them, so they are not cleaned. In short it is currently a no-op.
503
In future this may be a good place to hook in annotation cache checking,
504
index recreation etc.
507
# XXX: The index corruption that _fix_text_parents performs is needed for
508
# packs, but not yet implemented. The basic approach is to:
509
# - lock the names list
510
# - perform a customised pack() that regenerates data as needed
511
# - unlock the names list
512
# https://bugs.edge.launchpad.net/bzr/+bug/154173
514
def _reconcile_steps(self):
515
"""Perform the steps to reconcile this repository."""
516
if not self.thorough:
518
collection = self.repo._pack_collection
519
collection.ensure_loaded()
520
collection.lock_names()
522
packs = collection.all_packs()
523
all_revisions = self.repo.all_revision_ids()
524
total_inventories = len(list(
525
collection.inventory_index.combined_index.iter_all_entries()))
526
if len(all_revisions):
527
self._packer = repofmt.pack_repo.ReconcilePacker(
528
collection, packs, ".reconcile", all_revisions)
529
new_pack = self._packer.pack(pb=self.pb)
530
if new_pack is not None:
531
self._discard_and_save(packs)
533
# only make a new pack when there is data to copy.
534
self._discard_and_save(packs)
535
self.garbage_inventories = total_inventories - len(list(
536
collection.inventory_index.combined_index.iter_all_entries()))
538
collection._unlock_names()
540
def _discard_and_save(self, packs):
541
"""Discard some packs from the repository.
543
This removes them from the memory index, saves the in-memory index
544
which makes the newly reconciled pack visible and hides the packs to be
545
discarded, and finally renames the packs being discarded into the
546
obsolete packs directory.
548
:param packs: The packs to discard.
551
self.repo._pack_collection._remove_pack_from_memory(pack)
552
self.repo._pack_collection._save_pack_names()
553
self.repo._pack_collection._obsolete_packs(packs)