bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
1570.1.2
by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.' |
1 |
# (C) 2005, 2006 Canonical Limited.
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Plugin to fix some potential data errors in a branch.
|
|
18 |
||
19 |
This makes sure that the inventory weave's DAG of ancestry is correct so that
|
|
20 |
attempts to fetch the branch over http, or certain merge operations cope
|
|
21 |
correctly.
|
|
22 |
||
23 |
This is most likely needed if you have used fetch-ghosts from bzrlib to
|
|
24 |
resolve ghosts after a baz (or otherwise) import and you get bizarre behaviour
|
|
25 |
when either exporting over http or when merging from other translated branches.
|
|
26 |
"""
|
|
27 |
||
28 |
||
29 |
import os |
|
30 |
||
31 |
||
32 |
import bzrlib.branch |
|
33 |
import bzrlib.progress |
|
34 |
from bzrlib.trace import mutter |
|
35 |
import bzrlib.ui as ui |
|
36 |
from bzrlib.weavefile import write_weave_v5 as w5 |
|
37 |
||
38 |
||
39 |
||
40 |
def reconcile(dir): |
|
41 |
"""Reconcile the data in dir. |
|
42 |
||
43 |
Currently this is limited to a inventory 'reweave'.
|
|
44 |
||
45 |
This is a convenience method, and the public api, for using a
|
|
46 |
Reconciler object.
|
|
47 |
"""
|
|
48 |
reconciler = Reconciler(dir) |
|
49 |
reconciler.reconcile() |
|
50 |
||
51 |
||
52 |
class Reconciler(object): |
|
53 |
"""Reconcilers are used to reconcile existing data. |
|
54 |
||
55 |
Currently this is limited to a single repository, and consists
|
|
56 |
of an inventory reweave with revision cross-checks.
|
|
57 |
"""
|
|
58 |
||
59 |
def __init__(self, dir): |
|
60 |
self.bzrdir = dir |
|
61 |
||
62 |
def reconcile(self): |
|
63 |
"""Actually perform the reconciliation.""" |
|
64 |
self.pb = ui.ui_factory.progress_bar() |
|
65 |
self.repo = self.bzrdir.open_repository() |
|
66 |
self.repo.lock_write() |
|
67 |
try: |
|
68 |
self.pb.note('Reconciling repository %s', |
|
69 |
self.repo.bzrdir.root_transport.base) |
|
70 |
self._reweave_inventory() |
|
71 |
finally: |
|
72 |
self.repo.unlock() |
|
73 |
self.pb.note('Reconciliation complete.') |
|
74 |
||
75 |
def _reweave_inventory(self): |
|
76 |
"""Regenerate the inventory weave for the repository from scratch.""" |
|
77 |
self.pb.update('Reading inventory data.') |
|
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
78 |
self.inventory = self.repo.get_inventory_weave() |
|
1570.1.2
by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.' |
79 |
self.repo.control_weaves.put_weave('inventory.backup', |
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
80 |
self.inventory, |
|
1570.1.2
by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.' |
81 |
self.repo.get_transaction()) |
82 |
self.pb.note('Backup Inventory created.') |
|
83 |
# asking for '' should never return a non-empty weave
|
|
84 |
new_inventory = self.repo.control_weaves.get_weave_or_empty('', |
|
85 |
self.repo.get_transaction()) |
|
86 |
||
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
87 |
# the total set of revisions to process
|
88 |
self.pending = set([file_id for file_id in self.repo.revision_store]) |
|
|
1570.1.4
by Robert Collins
Somewhat optimised version of reconciler. |
89 |
|
90 |
||
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
91 |
# the current line of history being processed.
|
92 |
# once we add in per inventory root ids this will involve a single
|
|
93 |
# pass within the revision knit at that point.
|
|
|
1570.1.4
by Robert Collins
Somewhat optimised version of reconciler. |
94 |
# these contain respectively the revision id and the not_ghost_parents.
|
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
95 |
self._rev_stack = [] |
|
1570.1.4
by Robert Collins
Somewhat optimised version of reconciler. |
96 |
self._parent_stack = [] |
97 |
self._pending_stack = [] |
|
98 |
# actual queue to reinsert. This is a single pass queue we can use to
|
|
99 |
# regenerate the inventory versioned file, and holds
|
|
100 |
# (rev_id, non_ghost_parents) entries
|
|
101 |
self._work_queue = [] |
|
102 |
# this is a set for fast lookups of queued revisions
|
|
103 |
self._work_set = set() |
|
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
104 |
# total steps = 1 read per revision + one insert into the inventory
|
105 |
self.total = len(self.pending) * 2 |
|
|
1570.1.2
by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.' |
106 |
self.count = 0 |
107 |
||
|
1570.1.4
by Robert Collins
Somewhat optimised version of reconciler. |
108 |
# mapping from revision_id to parents
|
109 |
self._rev_graph = {} |
|
110 |
# we need the revision id of each revision and its available parents list
|
|
111 |
# we could do this on demand, but its easy to just build a graph.
|
|
112 |
for rev_id in self.pending: |
|
113 |
# put a revision into the to-process queue
|
|
114 |
self._graph_revision(rev_id) |
|
115 |
||
116 |
# now we do a depth first search of the revision graph until its empty.
|
|
117 |
# this gives us a topological order across all revisions in
|
|
118 |
# self._all_revisions.
|
|
119 |
# This is flattened to avoid deep recursion:
|
|
120 |
# At each step in the call graph we need to know which parent we are on.
|
|
121 |
# we do this by having three variables for each stack frame:
|
|
122 |
# revision_id being descended into (self._rev_stack)
|
|
123 |
# parents list for using when regenerating that revision_id's inventory
|
|
124 |
# (self._parent_stack)
|
|
125 |
# current queue of parents to descend into
|
|
126 |
# (self._pending_stack)
|
|
127 |
# putting all the parents into a pending list, left to
|
|
128 |
# right and processing the right most one each time. The same parent
|
|
129 |
# may occur at multiple places in the stack - thats fine. We check its
|
|
130 |
# not in the output before processing. However revisions cannot
|
|
131 |
# appear twice.
|
|
132 |
# the algorithm is roughly:
|
|
133 |
# for revision, parents in _rev_graph.items():
|
|
134 |
# if revision in self._all_revisions:
|
|
135 |
# continue
|
|
136 |
# add_revision_parents(revision, parents)
|
|
137 |
# def add_revision_parents(revision, parents)
|
|
138 |
# fpr parent in parents:
|
|
139 |
# add_revision_parents(parent, parent.parents)
|
|
140 |
# self._all_revisions.append(revision)
|
|
141 |
# self._all_revision_parents.appent(parents)
|
|
142 |
# however we tune this to visit fragmens of the graph
|
|
143 |
# and not double-visit entries.
|
|
144 |
# nevertheless the output is a
|
|
145 |
# self._work_queue of revisions, nonghost parents in
|
|
146 |
# topological order and
|
|
147 |
# a self._work_set which is a complete set of revisions.
|
|
148 |
while self._rev_graph: |
|
149 |
rev_id, parents = self._rev_graph.iteritems().next() |
|
150 |
# rev_id
|
|
151 |
self._stack_revision(rev_id, parents) |
|
152 |
while self._rev_stack: |
|
153 |
# loop until this call completes.
|
|
154 |
parents_to_visit = self._pending_stack[-1] |
|
155 |
# if all parents are done, the revision is done
|
|
156 |
if not parents_to_visit: |
|
157 |
# append the revision to the topo sorted list
|
|
158 |
self._unstack_revision() |
|
|
1570.1.2
by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.' |
159 |
else: |
|
1570.1.4
by Robert Collins
Somewhat optimised version of reconciler. |
160 |
while self._pending_stack[-1]: |
161 |
# recurse depth first into a single parent
|
|
162 |
next_rev_id = self._pending_stack[-1].pop() |
|
163 |
if next_rev_id in self._work_set: |
|
164 |
# this parent was completed by a child on the
|
|
165 |
# call stack. skip it.
|
|
166 |
continue
|
|
167 |
# push it into the call stack
|
|
168 |
self._stack_revision(next_rev_id, self._rev_graph[next_rev_id]) |
|
169 |
# and do not continue processing parents until this 'call'
|
|
170 |
# has recursed.
|
|
171 |
break
|
|
172 |
||
173 |
# we are now recursing down a call stack.
|
|
174 |
# its a width first search which
|
|
175 |
||
176 |
### Useful if fiddling with this code.
|
|
177 |
### # cross check
|
|
178 |
### for index in range(len(self._work_queue)):
|
|
179 |
### rev = self._work_queue[index][0]
|
|
180 |
### for left_index in range(index):
|
|
181 |
### if rev in self._work_queue[left_index][1]:
|
|
182 |
### print "revision in parent list of earlier revision"
|
|
183 |
### import pdb;pdb.set_trace()
|
|
184 |
||
185 |
# we have topological order of revisions and non ghost parents ready.
|
|
186 |
while self._work_queue: |
|
187 |
rev_id, parents = self._work_queue.pop(0) |
|
188 |
# double check this really is in topological order.
|
|
189 |
unavailable = [p for p in parents if p not in new_inventory] |
|
190 |
assert len(unavailable) == 0 |
|
191 |
# this entry has all the non ghost parents in the inventory
|
|
192 |
# file already.
|
|
193 |
self._reweave_step('adding inventories') |
|
194 |
new_inventory.add(rev_id, parents, self.inventory.get(rev_id)) |
|
195 |
||
196 |
# if this worked, the set of new_inventory.names should equal
|
|
197 |
# self.pending
|
|
198 |
assert set(new_inventory.names()) == self.pending |
|
|
1570.1.2
by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.' |
199 |
self.pb.update('Writing weave') |
200 |
self.repo.control_weaves.put_weave('inventory', |
|
201 |
new_inventory, |
|
202 |
self.repo.get_transaction()) |
|
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
203 |
self.inventory = None |
|
1570.1.2
by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.' |
204 |
self.pb.note('Inventory regenerated.') |
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
205 |
|
|
1570.1.4
by Robert Collins
Somewhat optimised version of reconciler. |
206 |
def _graph_revision(self, rev_id): |
207 |
"""Load a revision into the revision graph.""" |
|
208 |
# pick a random revision
|
|
209 |
# analyse revision id rev_id and put it in the stack.
|
|
210 |
self._reweave_step('loading revisions') |
|
|
1570.1.3
by Robert Collins
Optimise reconcilation to only hit each revision once. |
211 |
rev = self.repo.get_revision(rev_id) |
212 |
assert rev.revision_id == rev_id |
|
213 |
parents = [] |
|
214 |
for parent in rev.parent_ids: |
|
215 |
if parent in self.inventory: |
|
216 |
parents.append(parent) |
|
217 |
else: |
|
218 |
mutter('found ghost %s', parent) |
|
|
1570.1.4
by Robert Collins
Somewhat optimised version of reconciler. |
219 |
self._rev_graph[rev_id] = parents |
220 |
||
221 |
def _stack_revision(self, rev_id, parents): |
|
222 |
"""Add rev_id to the pending revision stack.""" |
|
223 |
self._rev_stack.append(rev_id) |
|
224 |
self._parent_stack.append(parents) |
|
225 |
self._pending_stack.append(list(parents)) |
|
226 |
||
227 |
def _unstack_revision(self): |
|
228 |
"""A revision has been completed. |
|
229 |
||
230 |
The revision is added to the work queue, and the data for
|
|
231 |
it popped from the call stack.
|
|
232 |
"""
|
|
233 |
rev_id = self._rev_stack.pop() |
|
234 |
parents = self._parent_stack.pop() |
|
235 |
self._pending_stack.pop() |
|
236 |
self._work_queue.append((rev_id, parents)) |
|
237 |
self._work_set.add(rev_id) |
|
238 |
# and remove it from the rev graph as its now complete
|
|
239 |
self._rev_graph.pop(rev_id) |
|
240 |
||
241 |
def _reweave_step(self, message): |
|
242 |
"""Mark a single step of regeneration complete.""" |
|
243 |
self.pb.update(message, self.count, self.total) |
|
244 |
self.count += 1 |