/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
1
# (C) 2005, 2006 Canonical Limited.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Plugin to fix some potential data errors in a branch.
18
19
This makes sure that the inventory weave's DAG of ancestry is correct so that
20
attempts to fetch the branch over http, or certain merge operations cope
21
correctly.
22
23
This is most likely needed if you have used fetch-ghosts from bzrlib to
24
resolve ghosts after a baz (or otherwise) import and you get bizarre behaviour
25
when either exporting over http or when merging from other translated branches.
26
"""
27
28
29
import os
30
31
32
import bzrlib.branch
33
import bzrlib.progress
34
from bzrlib.trace import mutter
35
import bzrlib.ui as ui
36
from bzrlib.weavefile import write_weave_v5 as w5
37
38
39
40
def reconcile(dir):
41
    """Reconcile the data in dir.
42
43
    Currently this is limited to a inventory 'reweave'.
44
45
    This is a convenience method, and the public api, for using a 
46
    Reconciler object.
47
    """
48
    reconciler = Reconciler(dir)
49
    reconciler.reconcile()
50
51
52
class Reconciler(object):
53
    """Reconcilers are used to reconcile existing data.
54
55
    Currently this is limited to a single repository, and consists
56
    of an inventory reweave with revision cross-checks.
57
    """
58
59
    def __init__(self, dir):
60
        self.bzrdir = dir
61
62
    def reconcile(self):
63
        """Actually perform the reconciliation."""
64
        self.pb = ui.ui_factory.progress_bar()
65
        self.repo = self.bzrdir.open_repository()
66
        self.repo.lock_write()
67
        try:
68
            self.pb.note('Reconciling repository %s',
69
                         self.repo.bzrdir.root_transport.base)
70
            self._reweave_inventory()
71
        finally:
72
            self.repo.unlock()
73
        self.pb.note('Reconciliation complete.')
74
75
    def _reweave_inventory(self):
76
        """Regenerate the inventory weave for the repository from scratch."""
77
        self.pb.update('Reading inventory data.')
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
78
        self.inventory = self.repo.get_inventory_weave()
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
79
        self.repo.control_weaves.put_weave('inventory.backup',
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
80
                                           self.inventory,
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
81
                                           self.repo.get_transaction())
82
        self.pb.note('Backup Inventory created.')
83
        # asking for '' should never return a non-empty weave
84
        new_inventory = self.repo.control_weaves.get_weave_or_empty('',
85
            self.repo.get_transaction())
86
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
87
        # the total set of revisions to process
88
        self.pending = set([file_id for file_id in self.repo.revision_store])
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
89
90
        
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
91
        # the current line of history being processed.
92
        # once we add in per inventory root ids this will involve a single
93
        # pass within the revision knit at that point.
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
94
        # these contain respectively the revision id and the not_ghost_parents.
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
95
        self._rev_stack = []
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
96
        self._parent_stack = []
97
        self._pending_stack = []
98
        # actual queue to reinsert. This is a single pass queue we can use to 
99
        # regenerate the inventory versioned file, and holds
100
        # (rev_id, non_ghost_parents) entries
101
        self._work_queue = []
102
        # this is a set for fast lookups of queued revisions
103
        self._work_set = set()
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
104
        # total steps = 1 read per revision + one insert into the inventory
105
        self.total = len(self.pending) * 2
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
106
        self.count = 0
107
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
108
        # mapping from revision_id to parents
109
        self._rev_graph = {}
110
        # we need the revision id of each revision and its available parents list
111
        # we could do this on demand, but its easy to just build a graph.
112
        for rev_id in self.pending:
113
            # put a revision into the to-process queue
114
            self._graph_revision(rev_id)
115
116
        # now we do a depth first search of the revision graph until its empty.
117
        # this gives us a topological order across all revisions in 
118
        # self._all_revisions.
119
        # This is flattened to avoid deep recursion:
120
        # At each step in the call graph we need to know which parent we are on.
121
        # we do this by having three variables for each stack frame:
122
        # revision_id being descended into (self._rev_stack)
123
        # parents list for using when regenerating that revision_id's inventory
124
        #   (self._parent_stack)
125
        # current queue of parents to descend into 
126
        #   (self._pending_stack)
127
        # putting all the parents into a pending list, left to 
128
        # right and processing the right most one each time. The same parent
129
        # may occur at multiple places in the stack - thats fine. We check its
130
        # not in the output before processing. However revisions cannot
131
        # appear twice.
132
        # the algorithm is roughly:
133
        # for revision, parents in _rev_graph.items():
134
        #   if revision in self._all_revisions:
135
        #     continue
136
        #   add_revision_parents(revision, parents)
137
        #  def add_revision_parents(revision, parents)
138
        #    fpr parent in parents:
139
        #       add_revision_parents(parent, parent.parents)
140
        #   self._all_revisions.append(revision)
141
        #   self._all_revision_parents.appent(parents)
142
        # however we tune this to visit fragmens of the graph
143
        # and not double-visit entries.
144
        # nevertheless the output is a 
145
        # self._work_queue of revisions, nonghost parents in 
146
        # topological order and
147
        # a self._work_set which is a complete set of revisions.
148
        while self._rev_graph:
149
            rev_id, parents = self._rev_graph.iteritems().next()
150
            # rev_id
151
            self._stack_revision(rev_id, parents)
152
            while self._rev_stack:
153
                # loop until this call completes.
154
                parents_to_visit = self._pending_stack[-1]
155
                # if all parents are done, the revision is done
156
                if not parents_to_visit:
157
                    # append the revision to the topo sorted list
158
                    self._unstack_revision()
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
159
                else:
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
160
                    while self._pending_stack[-1]:
161
                        # recurse depth first into a single parent 
162
                        next_rev_id = self._pending_stack[-1].pop()
163
                        if next_rev_id in self._work_set:
164
                            # this parent was completed by a child on the
165
                            # call stack. skip it.
166
                            continue
167
                        # push it into the call stack
168
                        self._stack_revision(next_rev_id, self._rev_graph[next_rev_id])
169
                        # and do not continue processing parents until this 'call' 
170
                        # has recursed.
171
                        break
172
            
173
                # we are now recursing down a call stack.
174
                # its a width first search which 
175
176
###         Useful if fiddling with this code.
177
###        # cross check
178
###        for index in range(len(self._work_queue)):
179
###            rev = self._work_queue[index][0]
180
###            for left_index in range(index):
181
###                if rev in self._work_queue[left_index][1]:
182
###                    print "revision in parent list of earlier revision"
183
###                    import pdb;pdb.set_trace()
184
185
        # we have topological order of revisions and non ghost parents ready.
186
        while self._work_queue:
187
            rev_id, parents = self._work_queue.pop(0)
188
            # double check this really is in topological order.
189
            unavailable = [p for p in parents if p not in new_inventory]
190
            assert len(unavailable) == 0
191
            # this entry has all the non ghost parents in the inventory
192
            # file already.
193
            self._reweave_step('adding inventories')
194
            new_inventory.add(rev_id, parents, self.inventory.get(rev_id))
195
196
        # if this worked, the set of new_inventory.names should equal
197
        # self.pending
198
        assert set(new_inventory.names()) == self.pending
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
199
        self.pb.update('Writing weave')
200
        self.repo.control_weaves.put_weave('inventory',
201
                                           new_inventory,
202
                                           self.repo.get_transaction())
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
203
        self.inventory = None
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
204
        self.pb.note('Inventory regenerated.')
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
205
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
206
    def _graph_revision(self, rev_id):
207
        """Load a revision into the revision graph."""
208
        # pick a random revision
209
        # analyse revision id rev_id and put it in the stack.
210
        self._reweave_step('loading revisions')
1570.1.3 by Robert Collins
Optimise reconcilation to only hit each revision once.
211
        rev = self.repo.get_revision(rev_id)
212
        assert rev.revision_id == rev_id
213
        parents = []
214
        for parent in rev.parent_ids:
215
            if parent in self.inventory:
216
                parents.append(parent)
217
            else:
218
                mutter('found ghost %s', parent)
1570.1.4 by Robert Collins
Somewhat optimised version of reconciler.
219
        self._rev_graph[rev_id] = parents   
220
221
    def _stack_revision(self, rev_id, parents):
222
        """Add rev_id to the pending revision stack."""
223
        self._rev_stack.append(rev_id)
224
        self._parent_stack.append(parents)
225
        self._pending_stack.append(list(parents))
226
227
    def _unstack_revision(self):
228
        """A revision has been completed.
229
230
        The revision is added to the work queue, and the data for
231
        it popped from the call stack.
232
        """
233
        rev_id = self._rev_stack.pop()
234
        parents = self._parent_stack.pop()
235
        self._pending_stack.pop()
236
        self._work_queue.append((rev_id, parents))
237
        self._work_set.add(rev_id)
238
        # and remove it from the rev graph as its now complete
239
        self._rev_graph.pop(rev_id)
240
241
    def _reweave_step(self, message):
242
        """Mark a single step of regeneration complete."""
243
        self.pb.update(message, self.count, self.total)
244
        self.count += 1