1
# (C) 2005, 2006 Canonical Limited.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Plugin to fix some potential data errors in a branch.
19
This makes sure that the inventory weave's DAG of ancestry is correct so that
20
attempts to fetch the branch over http, or certain merge operations cope
23
This is most likely needed if you have used fetch-ghosts from bzrlib to
24
resolve ghosts after a baz (or otherwise) import and you get bizarre behaviour
25
when either exporting over http or when merging from other translated branches.
33
import bzrlib.progress
34
from bzrlib.trace import mutter
35
import bzrlib.ui as ui
36
from bzrlib.weavefile import write_weave_v5 as w5
41
"""Reconcile the data in dir.
43
Currently this is limited to a inventory 'reweave'.
45
This is a convenience method, and the public api, for using a
48
reconciler = Reconciler(dir)
49
reconciler.reconcile()
52
class Reconciler(object):
53
"""Reconcilers are used to reconcile existing data.
55
Currently this is limited to a single repository, and consists
56
of an inventory reweave with revision cross-checks.
59
def __init__(self, dir):
63
"""Actually perform the reconciliation."""
64
self.pb = ui.ui_factory.progress_bar()
65
self.repo = self.bzrdir.open_repository()
66
self.repo.lock_write()
68
self.pb.note('Reconciling repository %s',
69
self.repo.bzrdir.root_transport.base)
70
self._reweave_inventory()
73
self.pb.note('Reconciliation complete.')
75
def _reweave_inventory(self):
76
"""Regenerate the inventory weave for the repository from scratch."""
77
self.pb.update('Reading inventory data.')
78
self.inventory = self.repo.get_inventory_weave()
79
self.repo.control_weaves.put_weave('inventory.backup',
81
self.repo.get_transaction())
82
self.pb.note('Backup Inventory created.')
83
# asking for '' should never return a non-empty weave
84
new_inventory = self.repo.control_weaves.get_weave_or_empty('',
85
self.repo.get_transaction())
87
# the total set of revisions to process
88
self.pending = set([file_id for file_id in self.repo.revision_store])
91
# the current line of history being processed.
92
# once we add in per inventory root ids this will involve a single
93
# pass within the revision knit at that point.
94
# these contain respectively the revision id and the not_ghost_parents.
96
self._parent_stack = []
97
self._pending_stack = []
98
# actual queue to reinsert. This is a single pass queue we can use to
99
# regenerate the inventory versioned file, and holds
100
# (rev_id, non_ghost_parents) entries
101
self._work_queue = []
102
# this is a set for fast lookups of queued revisions
103
self._work_set = set()
104
# total steps = 1 read per revision + one insert into the inventory
105
self.total = len(self.pending) * 2
108
# mapping from revision_id to parents
110
# we need the revision id of each revision and its available parents list
111
# we could do this on demand, but its easy to just build a graph.
112
for rev_id in self.pending:
113
# put a revision into the to-process queue
114
self._graph_revision(rev_id)
116
# now we do a depth first search of the revision graph until its empty.
117
# this gives us a topological order across all revisions in
118
# self._all_revisions.
119
# This is flattened to avoid deep recursion:
120
# At each step in the call graph we need to know which parent we are on.
121
# we do this by having three variables for each stack frame:
122
# revision_id being descended into (self._rev_stack)
123
# parents list for using when regenerating that revision_id's inventory
124
# (self._parent_stack)
125
# current queue of parents to descend into
126
# (self._pending_stack)
127
# putting all the parents into a pending list, left to
128
# right and processing the right most one each time. The same parent
129
# may occur at multiple places in the stack - thats fine. We check its
130
# not in the output before processing. However revisions cannot
132
# the algorithm is roughly:
133
# for revision, parents in _rev_graph.items():
134
# if revision in self._all_revisions:
136
# add_revision_parents(revision, parents)
137
# def add_revision_parents(revision, parents)
138
# fpr parent in parents:
139
# add_revision_parents(parent, parent.parents)
140
# self._all_revisions.append(revision)
141
# self._all_revision_parents.appent(parents)
142
# however we tune this to visit fragmens of the graph
143
# and not double-visit entries.
144
# nevertheless the output is a
145
# self._work_queue of revisions, nonghost parents in
146
# topological order and
147
# a self._work_set which is a complete set of revisions.
148
while self._rev_graph:
149
rev_id, parents = self._rev_graph.iteritems().next()
151
self._stack_revision(rev_id, parents)
152
while self._rev_stack:
153
# loop until this call completes.
154
parents_to_visit = self._pending_stack[-1]
155
# if all parents are done, the revision is done
156
if not parents_to_visit:
157
# append the revision to the topo sorted list
158
self._unstack_revision()
160
while self._pending_stack[-1]:
161
# recurse depth first into a single parent
162
next_rev_id = self._pending_stack[-1].pop()
163
if next_rev_id in self._work_set:
164
# this parent was completed by a child on the
165
# call stack. skip it.
167
# push it into the call stack
168
self._stack_revision(next_rev_id, self._rev_graph[next_rev_id])
169
# and do not continue processing parents until this 'call'
173
# we are now recursing down a call stack.
174
# its a width first search which
176
### Useful if fiddling with this code.
178
### for index in range(len(self._work_queue)):
179
### rev = self._work_queue[index][0]
180
### for left_index in range(index):
181
### if rev in self._work_queue[left_index][1]:
182
### print "revision in parent list of earlier revision"
183
### import pdb;pdb.set_trace()
185
# we have topological order of revisions and non ghost parents ready.
186
while self._work_queue:
187
rev_id, parents = self._work_queue.pop(0)
188
# double check this really is in topological order.
189
unavailable = [p for p in parents if p not in new_inventory]
190
assert len(unavailable) == 0
191
# this entry has all the non ghost parents in the inventory
193
self._reweave_step('adding inventories')
194
new_inventory.add(rev_id, parents, self.inventory.get(rev_id))
196
# if this worked, the set of new_inventory.names should equal
198
assert set(new_inventory.names()) == self.pending
199
self.pb.update('Writing weave')
200
self.repo.control_weaves.put_weave('inventory',
202
self.repo.get_transaction())
203
self.inventory = None
204
self.pb.note('Inventory regenerated.')
206
def _graph_revision(self, rev_id):
207
"""Load a revision into the revision graph."""
208
# pick a random revision
209
# analyse revision id rev_id and put it in the stack.
210
self._reweave_step('loading revisions')
211
rev = self.repo.get_revision(rev_id)
212
assert rev.revision_id == rev_id
214
for parent in rev.parent_ids:
215
if parent in self.inventory:
216
parents.append(parent)
218
mutter('found ghost %s', parent)
219
self._rev_graph[rev_id] = parents
221
def _stack_revision(self, rev_id, parents):
222
"""Add rev_id to the pending revision stack."""
223
self._rev_stack.append(rev_id)
224
self._parent_stack.append(parents)
225
self._pending_stack.append(list(parents))
227
def _unstack_revision(self):
228
"""A revision has been completed.
230
The revision is added to the work queue, and the data for
231
it popped from the call stack.
233
rev_id = self._rev_stack.pop()
234
parents = self._parent_stack.pop()
235
self._pending_stack.pop()
236
self._work_queue.append((rev_id, parents))
237
self._work_set.add(rev_id)
238
# and remove it from the rev graph as its now complete
239
self._rev_graph.pop(rev_id)
241
def _reweave_step(self, message):
242
"""Mark a single step of regeneration complete."""
243
self.pb.update(message, self.count, self.total)