1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Reconcilers are able to fix some potential data errors in a branch."""
23
'VersionedFileRepoReconciler',
28
revision as _mod_revision,
31
from ..reconcile import ReconcileResult
32
from ..i18n import gettext
33
from ..trace import mutter
34
from ..tsort import topo_sort
35
from .versionedfile import AdapterFactory, ChunkedContentFactory
38
class VersionedFileRepoReconciler(object):
39
"""Reconciler that reconciles a repository.
41
The goal of repository reconciliation is to make any derived data
42
consistent with the core data committed by a user. This can involve
43
reindexing, or removing unreferenced data if that can interfere with
44
queries in a given repository.
46
Currently this consists of an inventory reweave with revision cross-checks.
49
def __init__(self, repo, other=None, thorough=False):
50
"""Construct a RepoReconciler.
52
:param thorough: perform a thorough check which may take longer but
53
will correct non-data loss issues such as incorrect
56
self.garbage_inventories = 0
57
self.inconsistent_parents = 0
60
self.thorough = thorough
63
"""Perform reconciliation.
65
After reconciliation the following attributes document found issues:
67
* `inconsistent_parents`: The number of revisions in the repository
68
whose ancestry was being reported incorrectly.
69
* `garbage_inventories`: The number of inventory objects without
70
revisions that were garbage collected.
72
with self.repo.lock_write(), \
73
ui.ui_factory.nested_progress_bar() as self.pb:
74
self._reconcile_steps()
75
ret = ReconcileResult()
76
ret.aborted = self.aborted
77
ret.garbage_inventories = self.garbage_inventories
78
ret.inconsistent_parents = self.inconsistent_parents
81
def _reconcile_steps(self):
82
"""Perform the steps to reconcile this repository."""
83
self._reweave_inventory()
85
def _reweave_inventory(self):
86
"""Regenerate the inventory weave for the repository from scratch.
88
This is a smart function: it will only do the reweave if doing it
89
will correct data issues. The self.thorough flag controls whether
90
only data-loss causing issues (!self.thorough) or all issues
91
(self.thorough) are treated as requiring the reweave.
93
transaction = self.repo.get_transaction()
94
self.pb.update(gettext('Reading inventory data'))
95
self.inventory = self.repo.inventories
96
self.revisions = self.repo.revisions
97
# the total set of revisions to process
98
self.pending = {key[-1] for key in self.revisions.keys()}
100
# mapping from revision_id to parents
102
# errors that we detect
103
self.inconsistent_parents = 0
104
# we need the revision id of each revision and its available parents list
105
self._setup_steps(len(self.pending))
106
for rev_id in self.pending:
107
# put a revision into the graph.
108
self._graph_revision(rev_id)
109
self._check_garbage_inventories()
110
# if there are no inconsistent_parents and
111
# (no garbage inventories or we are not doing a thorough check)
112
if (not self.inconsistent_parents
113
and (not self.garbage_inventories or not self.thorough)):
114
ui.ui_factory.note(gettext('Inventory ok.'))
116
self.pb.update(gettext('Backing up inventory'), 0, 0)
117
self.repo._backup_inventory()
118
ui.ui_factory.note(gettext('Backup inventory created.'))
119
new_inventories = self.repo._temp_inventories()
121
# we have topological order of revisions and non ghost parents ready.
122
self._setup_steps(len(self._rev_graph))
123
revision_keys = [(rev_id,) for rev_id in topo_sort(self._rev_graph)]
124
stream = self._change_inv_parents(
125
self.inventory.get_record_stream(revision_keys, 'unordered', True),
126
self._new_inv_parents,
128
new_inventories.insert_record_stream(stream)
129
# if this worked, the set of new_inventories.keys should equal
131
if not (set(new_inventories.keys())
132
== {(revid,) for revid in self.pending}):
133
raise AssertionError()
134
self.pb.update(gettext('Writing weave'))
135
self.repo._activate_new_inventory()
136
self.inventory = None
137
ui.ui_factory.note(gettext('Inventory regenerated.'))
139
def _new_inv_parents(self, revision_key):
140
"""Lookup ghost-filtered parents for revision_key."""
141
# Use the filtered ghostless parents list:
142
return tuple([(revid,) for revid in self._rev_graph[revision_key[-1]]])
144
def _change_inv_parents(self, stream, get_parents, all_revision_keys):
145
"""Adapt a record stream to reconcile the parents."""
146
for record in stream:
147
wanted_parents = get_parents(record.key)
148
if wanted_parents and wanted_parents[0] not in all_revision_keys:
149
# The check for the left most parent only handles knit
150
# compressors, but this code only applies to knit and weave
151
# repositories anyway.
152
chunks = record.get_bytes_as('chunked')
153
yield ChunkedContentFactory(record.key, wanted_parents, record.sha1, chunks)
155
adapted_record = AdapterFactory(
156
record.key, wanted_parents, record)
158
self._reweave_step('adding inventories')
160
def _setup_steps(self, new_total):
161
"""Setup the markers we need to control the progress bar."""
162
self.total = new_total
165
def _graph_revision(self, rev_id):
166
"""Load a revision into the revision graph."""
167
# pick a random revision
168
# analyse revision id rev_id and put it in the stack.
169
self._reweave_step('loading revisions')
170
rev = self.repo.get_revision_reconcile(rev_id)
172
for parent in rev.parent_ids:
173
if self._parent_is_available(parent):
174
parents.append(parent)
176
mutter('found ghost %s', parent)
177
self._rev_graph[rev_id] = parents
179
def _check_garbage_inventories(self):
180
"""Check for garbage inventories which we cannot trust
182
We cant trust them because their pre-requisite file data may not
183
be present - all we know is that their revision was not installed.
185
if not self.thorough:
187
inventories = set(self.inventory.keys())
188
revisions = set(self.revisions.keys())
189
garbage = inventories.difference(revisions)
190
self.garbage_inventories = len(garbage)
191
for revision_key in garbage:
192
mutter('Garbage inventory {%s} found.', revision_key[-1])
194
def _parent_is_available(self, parent):
195
"""True if parent is a fully available revision
197
A fully available revision has a inventory and a revision object in the
200
if parent in self._rev_graph:
202
inv_present = (1 == len(self.inventory.get_parent_map([(parent,)])))
203
return (inv_present and self.repo.has_revision(parent))
205
def _reweave_step(self, message):
206
"""Mark a single step of regeneration complete."""
207
self.pb.update(message, self.count, self.total)
211
class KnitReconciler(VersionedFileRepoReconciler):
212
"""Reconciler that reconciles a knit format repository.
214
This will detect garbage inventories and remove them in thorough mode.
217
def _reconcile_steps(self):
218
"""Perform the steps to reconcile this repository."""
222
except errors.BzrCheckError:
225
# knits never suffer this
227
self._fix_text_parents()
229
def _load_indexes(self):
230
"""Load indexes for the reconciliation."""
231
self.transaction = self.repo.get_transaction()
232
self.pb.update(gettext('Reading indexes'), 0, 2)
233
self.inventory = self.repo.inventories
234
self.pb.update(gettext('Reading indexes'), 1, 2)
235
self.repo._check_for_inconsistent_revision_parents()
236
self.revisions = self.repo.revisions
237
self.pb.update(gettext('Reading indexes'), 2, 2)
239
def _gc_inventory(self):
240
"""Remove inventories that are not referenced from the revision store."""
241
self.pb.update(gettext('Checking unused inventories'), 0, 1)
242
self._check_garbage_inventories()
243
self.pb.update(gettext('Checking unused inventories'), 1, 3)
244
if not self.garbage_inventories:
245
ui.ui_factory.note(gettext('Inventory ok.'))
247
self.pb.update(gettext('Backing up inventory'), 0, 0)
248
self.repo._backup_inventory()
249
ui.ui_factory.note(gettext('Backup Inventory created'))
250
# asking for '' should never return a non-empty weave
251
new_inventories = self.repo._temp_inventories()
252
# we have topological order of revisions and non ghost parents ready.
253
graph = self.revisions.get_parent_map(self.revisions.keys())
254
revision_keys = topo_sort(graph)
255
revision_ids = [key[-1] for key in revision_keys]
256
self._setup_steps(len(revision_keys))
257
stream = self._change_inv_parents(
258
self.inventory.get_record_stream(revision_keys, 'unordered', True),
261
new_inventories.insert_record_stream(stream)
262
# if this worked, the set of new_inventory_vf.names should equal
263
# the revisionds list
264
if not(set(new_inventories.keys()) == set(revision_keys)):
265
raise AssertionError()
266
self.pb.update(gettext('Writing weave'))
267
self.repo._activate_new_inventory()
268
self.inventory = None
269
ui.ui_factory.note(gettext('Inventory regenerated.'))
271
def _fix_text_parents(self):
272
"""Fix bad versionedfile parent entries.
274
It is possible for the parents entry in a versionedfile entry to be
275
inconsistent with the values in the revision and inventory.
277
This method finds entries with such inconsistencies, corrects their
278
parent lists, and replaces the versionedfile with a corrected version.
280
transaction = self.repo.get_transaction()
281
versions = [key[-1] for key in self.revisions.keys()]
282
mutter('Prepopulating revision text cache with %d revisions',
284
vf_checker = self.repo._get_versioned_file_checker()
285
bad_parents, unused_versions = vf_checker.check_file_version_parents(
286
self.repo.texts, self.pb)
287
text_index = vf_checker.text_index
288
per_id_bad_parents = {}
289
for key in unused_versions:
290
# Ensure that every file with unused versions gets rewritten.
291
# NB: This is really not needed, reconcile != pack.
292
per_id_bad_parents[key[0]] = {}
293
# Generate per-knit/weave data.
294
for key, details in bad_parents.items():
297
knit_parents = tuple([parent[-1] for parent in details[0]])
298
correct_parents = tuple([parent[-1] for parent in details[1]])
299
file_details = per_id_bad_parents.setdefault(file_id, {})
300
file_details[rev_id] = (knit_parents, correct_parents)
301
file_id_versions = {}
302
for text_key in text_index:
303
versions_list = file_id_versions.setdefault(text_key[0], [])
304
versions_list.append(text_key[1])
305
# Do the reconcile of individual weaves.
306
for num, file_id in enumerate(per_id_bad_parents):
307
self.pb.update(gettext('Fixing text parents'), num,
308
len(per_id_bad_parents))
309
versions_with_bad_parents = per_id_bad_parents[file_id]
310
id_unused_versions = set(key[-1] for key in unused_versions
311
if key[0] == file_id)
312
if file_id in file_id_versions:
313
file_versions = file_id_versions[file_id]
315
# This id was present in the disk store but is not referenced
316
# by any revision at all.
318
self._fix_text_parent(file_id, versions_with_bad_parents,
319
id_unused_versions, file_versions)
321
def _fix_text_parent(self, file_id, versions_with_bad_parents,
322
unused_versions, all_versions):
323
"""Fix bad versionedfile entries in a single versioned file."""
324
mutter('fixing text parent: %r (%d versions)', file_id,
325
len(versions_with_bad_parents))
326
mutter('(%d are unused)', len(unused_versions))
327
new_file_id = b'temp:%s' % file_id
330
for version in all_versions:
331
if version in unused_versions:
333
elif version in versions_with_bad_parents:
334
parents = versions_with_bad_parents[version][1]
336
pmap = self.repo.texts.get_parent_map([(file_id, version)])
337
parents = [key[-1] for key in pmap[(file_id, version)]]
338
new_parents[(new_file_id, version)] = [
339
(new_file_id, parent) for parent in parents]
340
needed_keys.add((file_id, version))
342
def fix_parents(stream):
343
for record in stream:
344
chunks = record.get_bytes_as('chunked')
345
new_key = (new_file_id, record.key[-1])
346
parents = new_parents[new_key]
347
yield ChunkedContentFactory(new_key, parents, record.sha1, chunks)
348
stream = self.repo.texts.get_record_stream(
349
needed_keys, 'topological', True)
350
self.repo._remove_file_id(new_file_id)
351
self.repo.texts.insert_record_stream(fix_parents(stream))
352
self.repo._remove_file_id(file_id)
354
self.repo._move_file_id(new_file_id, file_id)
357
class PackReconciler(VersionedFileRepoReconciler):
358
"""Reconciler that reconciles a pack based repository.
360
Garbage inventories do not affect ancestry queries, and removal is
361
considerably more expensive as there is no separate versioned file for
362
them, so they are not cleaned. In short it is currently a no-op.
364
In future this may be a good place to hook in annotation cache checking,
365
index recreation etc.
368
# XXX: The index corruption that _fix_text_parents performs is needed for
369
# packs, but not yet implemented. The basic approach is to:
370
# - lock the names list
371
# - perform a customised pack() that regenerates data as needed
372
# - unlock the names list
373
# https://bugs.launchpad.net/bzr/+bug/154173
375
def __init__(self, repo, other=None, thorough=False,
376
canonicalize_chks=False):
377
super(PackReconciler, self).__init__(repo, other=other,
379
self.canonicalize_chks = canonicalize_chks
381
def _reconcile_steps(self):
382
"""Perform the steps to reconcile this repository."""
383
if not self.thorough:
385
collection = self.repo._pack_collection
386
collection.ensure_loaded()
387
collection.lock_names()
389
packs = collection.all_packs()
390
all_revisions = self.repo.all_revision_ids()
391
total_inventories = len(list(
392
collection.inventory_index.combined_index.iter_all_entries()))
393
if len(all_revisions):
394
if self.canonicalize_chks:
395
reconcile_meth = self.repo._canonicalize_chks_pack
397
reconcile_meth = self.repo._reconcile_pack
398
new_pack = reconcile_meth(collection, packs, ".reconcile",
399
all_revisions, self.pb)
400
if new_pack is not None:
401
self._discard_and_save(packs)
403
# only make a new pack when there is data to copy.
404
self._discard_and_save(packs)
405
self.garbage_inventories = total_inventories - len(list(
406
collection.inventory_index.combined_index.iter_all_entries()))
408
collection._unlock_names()
410
def _discard_and_save(self, packs):
411
"""Discard some packs from the repository.
413
This removes them from the memory index, saves the in-memory index
414
which makes the newly reconciled pack visible and hides the packs to be
415
discarded, and finally renames the packs being discarded into the
416
obsolete packs directory.
418
:param packs: The packs to discard.
421
self.repo._pack_collection._remove_pack_from_memory(pack)
422
self.repo._pack_collection._save_pack_names()
423
self.repo._pack_collection._obsolete_packs(packs)
426
class BranchReconciler(object):
427
"""Reconciler that works on a branch."""
429
def __init__(self, a_branch, thorough=False):
430
self.fixed_history = None
431
self.thorough = thorough
432
self.branch = a_branch
435
with self.branch.lock_write(), \
436
ui.ui_factory.nested_progress_bar() as self.pb:
437
ret = ReconcileResult()
438
ret.fixed_history = self._reconcile_steps()
441
def _reconcile_steps(self):
442
return self._reconcile_revision_history()
444
def _reconcile_revision_history(self):
445
last_revno, last_revision_id = self.branch.last_revision_info()
447
graph = self.branch.repository.get_graph()
449
for revid in graph.iter_lefthand_ancestry(
450
last_revision_id, (_mod_revision.NULL_REVISION,)):
451
real_history.append(revid)
452
except errors.RevisionNotPresent:
453
pass # Hit a ghost left hand parent
454
real_history.reverse()
455
if last_revno != len(real_history):
456
# Technically for Branch5 formats, it is more efficient to use
457
# set_revision_history, as this will regenerate it again.
458
# Not really worth a whole BranchReconciler class just for this,
460
ui.ui_factory.note(gettext('Fixing last revision info {0} '
462
last_revno, len(real_history)))
463
self.branch.set_last_revision_info(len(real_history),
467
ui.ui_factory.note(gettext('revision_history ok.'))