1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Reconcilers are able to fix some potential data errors in a branch."""
35
from bzrlib.trace import mutter, note
36
from bzrlib.tsort import TopoSorter
39
def reconcile(dir, other=None):
40
"""Reconcile the data in dir.
42
Currently this is limited to a inventory 'reweave'.
44
This is a convenience method, for using a Reconciler object.
46
Directly using Reconciler is recommended for library users that
47
desire fine grained control or analysis of the found issues.
49
:param other: another bzrdir to reconcile against.
51
reconciler = Reconciler(dir, other=other)
52
reconciler.reconcile()
55
class Reconciler(object):
56
"""Reconcilers are used to reconcile existing data."""
58
def __init__(self, dir, other=None):
59
"""Create a Reconciler."""
63
"""Perform reconciliation.
65
After reconciliation the following attributes document found issues:
66
inconsistent_parents: The number of revisions in the repository whose
67
ancestry was being reported incorrectly.
68
garbage_inventories: The number of inventory objects without revisions
69
that were garbage collected.
71
self.pb = ui.ui_factory.nested_progress_bar()
78
"""Helper function for performing reconciliation."""
79
self.repo = self.bzrdir.find_repository()
80
self.pb.note('Reconciling repository %s',
81
self.repo.bzrdir.root_transport.base)
82
self.pb.update("Reconciling repository", 0, 1)
83
repo_reconciler = self.repo.reconcile(thorough=True)
84
self.inconsistent_parents = repo_reconciler.inconsistent_parents
85
self.garbage_inventories = repo_reconciler.garbage_inventories
86
if repo_reconciler.aborted:
88
'Reconcile aborted: revision index has inconsistent parents.')
90
'Run "bzr check" for more details.')
92
self.pb.note('Reconciliation complete.')
95
class RepoReconciler(object):
96
"""Reconciler that reconciles a repository.
98
The goal of repository reconciliation is to make any derived data
99
consistent with the core data committed by a user. This can involve
100
reindexing, or removing unreferenced data if that can interfere with
101
queries in a given repository.
103
Currently this consists of an inventory reweave with revision cross-checks.
106
def __init__(self, repo, other=None, thorough=False):
107
"""Construct a RepoReconciler.
109
:param thorough: perform a thorough check which may take longer but
110
will correct non-data loss issues such as incorrect
113
self.garbage_inventories = 0
114
self.inconsistent_parents = 0
117
self.thorough = thorough
120
"""Perform reconciliation.
122
After reconciliation the following attributes document found issues:
123
inconsistent_parents: The number of revisions in the repository whose
124
ancestry was being reported incorrectly.
125
garbage_inventories: The number of inventory objects without revisions
126
that were garbage collected.
128
self.repo.lock_write()
130
self.pb = ui.ui_factory.nested_progress_bar()
132
self._reconcile_steps()
138
def _reconcile_steps(self):
139
"""Perform the steps to reconcile this repository."""
140
self._reweave_inventory()
142
def _reweave_inventory(self):
143
"""Regenerate the inventory weave for the repository from scratch.
145
This is a smart function: it will only do the reweave if doing it
146
will correct data issues. The self.thorough flag controls whether
147
only data-loss causing issues (!self.thorough) or all issues
148
(self.thorough) are treated as requiring the reweave.
150
# local because needing to know about WeaveFile is a wart we want to hide
151
from bzrlib.weave import WeaveFile, Weave
152
transaction = self.repo.get_transaction()
153
self.pb.update('Reading inventory data.')
154
self.inventory = self.repo.get_inventory_weave()
155
# the total set of revisions to process
156
self.pending = set([rev_id for rev_id in self.repo._revision_store.all_revision_ids(transaction)])
158
# mapping from revision_id to parents
160
# errors that we detect
161
self.inconsistent_parents = 0
162
# we need the revision id of each revision and its available parents list
163
self._setup_steps(len(self.pending))
164
for rev_id in self.pending:
165
# put a revision into the graph.
166
self._graph_revision(rev_id)
167
self._check_garbage_inventories()
168
# if there are no inconsistent_parents and
169
# (no garbage inventories or we are not doing a thorough check)
170
if (not self.inconsistent_parents and
171
(not self.garbage_inventories or not self.thorough)):
172
self.pb.note('Inventory ok.')
174
self.pb.update('Backing up inventory...', 0, 0)
175
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.repo.get_transaction())
176
self.pb.note('Backup Inventory created.')
177
# asking for '' should never return a non-empty weave
178
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
179
self.repo.get_transaction())
181
# we have topological order of revisions and non ghost parents ready.
182
self._setup_steps(len(self._rev_graph))
183
for rev_id in TopoSorter(self._rev_graph.items()).iter_topo_order():
184
parents = self._rev_graph[rev_id]
185
# double check this really is in topological order.
186
unavailable = [p for p in parents if p not in new_inventory_vf]
188
raise AssertionError('unavailable parents: %r'
190
# this entry has all the non ghost parents in the inventory
192
self._reweave_step('adding inventories')
193
if isinstance(new_inventory_vf, WeaveFile):
194
# It's really a WeaveFile, but we call straight into the
195
# Weave's add method to disable the auto-write-out behaviour.
196
# This is done to avoid a revision_count * time-to-write additional overhead on
198
new_inventory_vf._check_write_ok()
199
Weave._add_lines(new_inventory_vf, rev_id, parents,
200
self.inventory.get_lines(rev_id), None, None, None, False, True)
202
new_inventory_vf.add_lines(rev_id, parents, self.inventory.get_lines(rev_id))
204
if isinstance(new_inventory_vf, WeaveFile):
205
new_inventory_vf._save()
206
# if this worked, the set of new_inventory_vf.names should equal
208
if not (set(new_inventory_vf.versions()) == self.pending):
209
raise AssertionError()
210
self.pb.update('Writing weave')
211
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.repo.get_transaction())
212
self.repo.control_weaves.delete('inventory.new', self.repo.get_transaction())
213
self.inventory = None
214
self.pb.note('Inventory regenerated.')
216
def _setup_steps(self, new_total):
217
"""Setup the markers we need to control the progress bar."""
218
self.total = new_total
221
def _graph_revision(self, rev_id):
222
"""Load a revision into the revision graph."""
223
# pick a random revision
224
# analyse revision id rev_id and put it in the stack.
225
self._reweave_step('loading revisions')
226
rev = self.repo.get_revision_reconcile(rev_id)
228
for parent in rev.parent_ids:
229
if self._parent_is_available(parent):
230
parents.append(parent)
232
mutter('found ghost %s', parent)
233
self._rev_graph[rev_id] = parents
234
if self._parents_are_inconsistent(rev_id, parents):
235
self.inconsistent_parents += 1
236
mutter('Inconsistent inventory parents: id {%s} '
237
'inventory claims %r, '
238
'available parents are %r, '
239
'unavailable parents are %r',
241
set(self.inventory.get_parent_map([rev_id])[rev_id]),
243
set(rev.parent_ids).difference(set(parents)))
245
def _parents_are_inconsistent(self, rev_id, parents):
246
"""Return True if the parents list of rev_id does not match the weave.
248
This detects inconsistencies based on the self.thorough value:
249
if thorough is on, the first parent value is checked as well as ghost
251
Otherwise only the ghost differences are evaluated.
253
weave_parents = self.inventory.get_parent_map([rev_id])[rev_id]
254
weave_missing_old_ghosts = set(weave_parents) != set(parents)
255
first_parent_is_wrong = (
256
len(weave_parents) and len(parents) and
257
parents[0] != weave_parents[0])
259
return weave_missing_old_ghosts or first_parent_is_wrong
261
return weave_missing_old_ghosts
263
def _check_garbage_inventories(self):
264
"""Check for garbage inventories which we cannot trust
266
We cant trust them because their pre-requisite file data may not
267
be present - all we know is that their revision was not installed.
269
if not self.thorough:
271
inventories = set(self.inventory.versions())
272
revisions = set(self._rev_graph.keys())
273
garbage = inventories.difference(revisions)
274
self.garbage_inventories = len(garbage)
275
for revision_id in garbage:
276
mutter('Garbage inventory {%s} found.', revision_id)
278
def _parent_is_available(self, parent):
279
"""True if parent is a fully available revision
281
A fully available revision has a inventory and a revision object in the
284
return (parent in self._rev_graph or
285
(parent in self.inventory and self.repo.has_revision(parent)))
287
def _reweave_step(self, message):
288
"""Mark a single step of regeneration complete."""
289
self.pb.update(message, self.count, self.total)
293
class KnitReconciler(RepoReconciler):
294
"""Reconciler that reconciles a knit format repository.
296
This will detect garbage inventories and remove them in thorough mode.
299
def _reconcile_steps(self):
300
"""Perform the steps to reconcile this repository."""
304
except errors.BzrCheckError:
307
# knits never suffer this
309
self._fix_text_parents()
311
def _load_indexes(self):
312
"""Load indexes for the reconciliation."""
313
self.transaction = self.repo.get_transaction()
314
self.pb.update('Reading indexes.', 0, 2)
315
self.inventory = self.repo.get_inventory_weave()
316
self.pb.update('Reading indexes.', 1, 2)
317
self.repo._check_for_inconsistent_revision_parents()
318
self.revisions = self.repo._revision_store.get_revision_file(self.transaction)
319
self.pb.update('Reading indexes.', 2, 2)
321
def _gc_inventory(self):
322
"""Remove inventories that are not referenced from the revision store."""
323
self.pb.update('Checking unused inventories.', 0, 1)
324
self._check_garbage_inventories()
325
self.pb.update('Checking unused inventories.', 1, 3)
326
if not self.garbage_inventories:
327
self.pb.note('Inventory ok.')
329
self.pb.update('Backing up inventory...', 0, 0)
330
self.repo.control_weaves.copy(self.inventory, 'inventory.backup', self.transaction)
331
self.pb.note('Backup Inventory created.')
332
# asking for '' should never return a non-empty weave
333
new_inventory_vf = self.repo.control_weaves.get_empty('inventory.new',
336
# we have topological order of revisions and non ghost parents ready.
337
self._setup_steps(len(self.revisions))
338
revision_ids = self.revisions.versions()
339
graph = self.revisions.get_parent_map(revision_ids)
340
for rev_id in TopoSorter(graph.items()).iter_topo_order():
341
parents = graph[rev_id]
342
# double check this really is in topological order, ignoring existing ghosts.
343
unavailable = [p for p in parents if p not in new_inventory_vf and
346
raise AssertionError(
347
'unavailable parents: %r' % (unavailable,))
348
# this entry has all the non ghost parents in the inventory
350
self._reweave_step('adding inventories')
351
# ugly but needed, weaves are just way tooooo slow else.
352
new_inventory_vf.add_lines_with_ghosts(rev_id, parents,
353
self.inventory.get_lines(rev_id))
355
# if this worked, the set of new_inventory_vf.names should equal
357
if not(set(new_inventory_vf.versions()) == set(self.revisions.versions())):
358
raise AssertionError()
359
self.pb.update('Writing weave')
360
self.repo.control_weaves.copy(new_inventory_vf, 'inventory', self.transaction)
361
self.repo.control_weaves.delete('inventory.new', self.transaction)
362
self.inventory = None
363
self.pb.note('Inventory regenerated.')
365
def _check_garbage_inventories(self):
366
"""Check for garbage inventories which we cannot trust
368
We cant trust them because their pre-requisite file data may not
369
be present - all we know is that their revision was not installed.
371
inventories = set(self.inventory.versions())
372
revisions = set(self.revisions.versions())
373
garbage = inventories.difference(revisions)
374
self.garbage_inventories = len(garbage)
375
for revision_id in garbage:
376
mutter('Garbage inventory {%s} found.', revision_id)
378
def _fix_text_parents(self):
379
"""Fix bad versionedfile parent entries.
381
It is possible for the parents entry in a versionedfile entry to be
382
inconsistent with the values in the revision and inventory.
384
This method finds entries with such inconsistencies, corrects their
385
parent lists, and replaces the versionedfile with a corrected version.
387
transaction = self.repo.get_transaction()
388
versions = self.revisions.versions()
389
mutter('Prepopulating revision text cache with %d revisions',
391
vf_checker = self.repo._get_versioned_file_checker()
392
# List all weaves before altering, to avoid race conditions when we
393
# delete unused weaves.
394
weaves = list(enumerate(self.repo.weave_store))
395
for num, file_id in weaves:
396
self.pb.update('Fixing text parents', num,
397
len(self.repo.weave_store))
398
vf = self.repo.weave_store.get_weave(file_id, transaction)
399
versions_with_bad_parents, unused_versions = \
400
vf_checker.check_file_version_parents(vf, file_id)
401
if (len(versions_with_bad_parents) == 0 and
402
len(unused_versions) == 0):
404
full_text_versions = set()
405
self._fix_text_parent(file_id, vf, versions_with_bad_parents,
406
full_text_versions, unused_versions)
408
def _fix_text_parent(self, file_id, vf, versions_with_bad_parents,
409
full_text_versions, unused_versions):
410
"""Fix bad versionedfile entries in a single versioned file."""
411
mutter('fixing text parent: %r (%d versions)', file_id,
412
len(versions_with_bad_parents))
413
mutter('(%d need to be full texts, %d are unused)',
414
len(full_text_versions), len(unused_versions))
415
new_vf = self.repo.weave_store.get_empty('temp:%s' % file_id,
418
for version in vf.versions():
419
if version in unused_versions:
421
elif version in versions_with_bad_parents:
422
parents = versions_with_bad_parents[version][1]
424
parents = vf.get_parent_map([version])[version]
425
new_parents[version] = parents
426
if not len(new_parents):
427
# No used versions, remove the VF.
428
self.repo.weave_store.delete(file_id, self.transaction)
430
for version in TopoSorter(new_parents.items()).iter_topo_order():
431
lines = vf.get_lines(version)
432
parents = new_parents[version]
433
if parents and (parents[0] in full_text_versions):
434
# Force this record to be a fulltext, not a delta.
435
new_vf._add(version, lines, parents, False,
436
None, None, None, False)
438
new_vf.add_lines(version, parents, lines)
439
self.repo.weave_store.copy(new_vf, file_id, self.transaction)
440
self.repo.weave_store.delete('temp:%s' % file_id, self.transaction)
443
class PackReconciler(RepoReconciler):
444
"""Reconciler that reconciles a pack based repository.
446
Garbage inventories do not affect ancestry queries, and removal is
447
considerably more expensive as there is no separate versioned file for
448
them, so they are not cleaned. In short it is currently a no-op.
450
In future this may be a good place to hook in annotation cache checking,
451
index recreation etc.
454
# XXX: The index corruption that _fix_text_parents performs is needed for
455
# packs, but not yet implemented. The basic approach is to:
456
# - lock the names list
457
# - perform a customised pack() that regenerates data as needed
458
# - unlock the names list
459
# https://bugs.edge.launchpad.net/bzr/+bug/154173
461
def _reconcile_steps(self):
462
"""Perform the steps to reconcile this repository."""
463
if not self.thorough:
465
collection = self.repo._pack_collection
466
collection.ensure_loaded()
467
collection.lock_names()
469
packs = collection.all_packs()
470
all_revisions = self.repo.all_revision_ids()
471
total_inventories = len(list(
472
collection.inventory_index.combined_index.iter_all_entries()))
473
if len(all_revisions):
474
self._packer = repofmt.pack_repo.ReconcilePacker(
475
collection, packs, ".reconcile", all_revisions)
476
new_pack = self._packer.pack(pb=self.pb)
477
if new_pack is not None:
478
self._discard_and_save(packs)
480
# only make a new pack when there is data to copy.
481
self._discard_and_save(packs)
482
self.garbage_inventories = total_inventories - len(list(
483
collection.inventory_index.combined_index.iter_all_entries()))
485
collection._unlock_names()
487
def _discard_and_save(self, packs):
488
"""Discard some packs from the repository.
490
This removes them from the memory index, saves the in-memory index
491
which makes the newly reconciled pack visible and hides the packs to be
492
discarded, and finally renames the packs being discarded into the
493
obsolete packs directory.
495
:param packs: The packs to discard.
498
self.repo._pack_collection._remove_pack_from_memory(pack)
499
self.repo._pack_collection._save_pack_names()
500
self.repo._pack_collection._obsolete_packs(packs)