/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
1616.1.5 by Martin Pool
Cleanup and document some check code
1
# Copyright (C) 2005, 2006 by Canonical Ltd
1 by mbp at sourcefrog
import from baz patch-364
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1335 by Martin Pool
doc
17
# TODO: Check ancestries are correct for every revision: includes
18
# every committed so far, and in a reasonable order.
19
1347 by Martin Pool
- refactor check code into method object
20
# TODO: Also check non-mainline revisions mentioned as parents.
21
22
# TODO: Check for extra files in the control directory.
23
1348 by Martin Pool
- more refactoring of check code
24
# TODO: Check revision, inventory and entry objects have all 
25
# required fields.
26
1185.16.101 by mbp at sourcefrog
todo
27
# TODO: Get every revision in the revision-store even if they're not
28
# referenced by history and make sure they're all valid.
1347 by Martin Pool
- refactor check code into method object
29
1616.1.5 by Martin Pool
Cleanup and document some check code
30
# TODO: Perhaps have a way to record errors other than by raising exceptions;
31
# would perhaps be enough to accumulate exception objects in a list without
32
# raising them.  If there's more than one exception it'd be good to see them
33
# all.
34
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
35
from bzrlib.errors import BzrCheckError, NoSuchRevision
36
from bzrlib.symbol_versioning import *
37
from bzrlib.trace import mutter, note, warning
1104 by Martin Pool
- Add a simple UIFactory
38
import bzrlib.ui
1 by mbp at sourcefrog
import from baz patch-364
39
1104 by Martin Pool
- Add a simple UIFactory
40
1347 by Martin Pool
- refactor check code into method object
41
class Check(object):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
42
    """Check a repository"""
1449 by Robert Collins
teach check about ghosts
43
1616.1.5 by Martin Pool
Cleanup and document some check code
44
    # The Check object interacts with InventoryEntry.check, etc.
45
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
46
    def __init__(self, repository):
47
        self.repository = repository
1383 by Martin Pool
- untabify only
48
        self.checked_text_cnt = 0
49
        self.checked_rev_cnt = 0
1449 by Robert Collins
teach check about ghosts
50
        self.ghosts = []
1365 by Martin Pool
- try to avoid checking texts repeatedly
51
        self.repeated_text_cnt = 0
1449 by Robert Collins
teach check about ghosts
52
        self.missing_parent_links = {}
1348 by Martin Pool
- more refactoring of check code
53
        self.missing_inventory_sha_cnt = 0
54
        self.missing_revision_cnt = 0
1616.1.5 by Martin Pool
Cleanup and document some check code
55
        # maps (file-id, version) -> sha1; used by InventoryFile._check
1365 by Martin Pool
- try to avoid checking texts repeatedly
56
        self.checked_texts = {}
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
57
        self.checked_weaves = {}
676 by Martin Pool
- lock branch while checking
58
1449 by Robert Collins
teach check about ghosts
59
    def check(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
60
        self.repository.lock_read()
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
61
        self.progress = bzrlib.ui.ui_factory.nested_progress_bar()
1449 by Robert Collins
teach check about ghosts
62
        try:
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
63
            self.progress.update('retrieving inventory', 0, 0)
64
            # do not put in init, as it should be done with progess,
65
            # and inside the lock.
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
66
            self.inventory_weave = self.repository.get_inventory_weave()
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
67
            self.plan_revisions()
68
            revno = 0
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
69
            self.check_weaves()
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
70
            while revno < len(self.planned_revisions):
71
                rev_id = self.planned_revisions[revno]
72
                self.progress.update('checking revision', revno,
1449 by Robert Collins
teach check about ghosts
73
                                     len(self.planned_revisions))
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
74
                revno += 1
1449 by Robert Collins
teach check about ghosts
75
                self.check_one_rev(rev_id)
1185.35.34 by Aaron Bentley
Made bzr check for stored revisions missing from ancestry
76
        finally:
1594.1.3 by Robert Collins
Fixup pb usage to use nested_progress_bar.
77
            self.progress.finished()
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
78
            self.repository.unlock()
1449 by Robert Collins
teach check about ghosts
79
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
80
    def plan_revisions(self):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
81
        repository = self.repository
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
82
        self.planned_revisions = set(repository.all_revision_ids())
83
        self.progress.clear()
1563.2.35 by Robert Collins
cleanup deprecation warnings and finish conversion so the inventory is knit based too.
84
        inventoried = set(self.inventory_weave.versions())
1510 by Robert Collins
Merge from mpool, adjusting check to retain HTTP support.
85
        awol = self.planned_revisions - inventoried
86
        if len(awol) > 0:
87
            raise BzrCheckError('Stored revisions missing from inventory'
88
                '{%s}' % ','.join([f for f in awol]))
89
        self.planned_revisions = list(self.planned_revisions)
90
1449 by Robert Collins
teach check about ghosts
91
    def report_results(self, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
92
        note('checked repository %s format %s',
93
             self.repository.bzrdir.root_transport,
94
             self.repository._format)
1365 by Martin Pool
- try to avoid checking texts repeatedly
95
        note('%6d revisions', self.checked_rev_cnt)
96
        note('%6d unique file texts', self.checked_text_cnt)
97
        note('%6d repeated file texts', self.repeated_text_cnt)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
98
        note('%6d weaves', len(self.checked_weaves))
1348 by Martin Pool
- more refactoring of check code
99
        if self.missing_inventory_sha_cnt:
1449 by Robert Collins
teach check about ghosts
100
            note('%6d revisions are missing inventory_sha1',
1383 by Martin Pool
- untabify only
101
                 self.missing_inventory_sha_cnt)
1348 by Martin Pool
- more refactoring of check code
102
        if self.missing_revision_cnt:
1449 by Robert Collins
teach check about ghosts
103
            note('%6d revisions are mentioned but not present',
1383 by Martin Pool
- untabify only
104
                 self.missing_revision_cnt)
1449 by Robert Collins
teach check about ghosts
105
        if len(self.ghosts):
106
            note('%6d ghost revisions', len(self.ghosts))
107
            if verbose:
108
                for ghost in self.ghosts:
109
                    note('      %s', ghost)
110
        if len(self.missing_parent_links):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
111
            note('%6d revisions missing parents in ancestry',
1449 by Robert Collins
teach check about ghosts
112
                 len(self.missing_parent_links))
113
            if verbose:
114
                for link, linkers in self.missing_parent_links.items():
115
                    note('      %s should be in the ancestry for:', link)
116
                    for linker in linkers:
117
                        note('       * %s', linker)
118
119
    def check_one_rev(self, rev_id):
1383 by Martin Pool
- untabify only
120
        """Check one revision.
121
122
        rev_id - the one to check
123
        """
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
124
        rev = self.repository.get_revision(rev_id)
1449 by Robert Collins
teach check about ghosts
125
                
1383 by Martin Pool
- untabify only
126
        if rev.revision_id != rev_id:
127
            raise BzrCheckError('wrong internal revision id in revision {%s}'
128
                                % rev_id)
129
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
130
        for parent in rev.parent_ids:
131
            if not parent in self.planned_revisions:
132
                missing_links = self.missing_parent_links.get(parent, [])
133
                missing_links.append(rev_id)
134
                self.missing_parent_links[parent] = missing_links
135
                # list based so somewhat slow,
136
                # TODO have a planned_revisions list and set.
137
                if self.repository.has_revision(parent):
138
                    missing_ancestry = self.repository.get_ancestry(parent)
139
                    for missing in missing_ancestry:
140
                        if (missing is not None 
141
                            and missing not in self.planned_revisions):
142
                            self.planned_revisions.append(missing)
1449 by Robert Collins
teach check about ghosts
143
                else:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
144
                    self.ghosts.append(rev_id)
1383 by Martin Pool
- untabify only
145
146
        if rev.inventory_sha1:
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
147
            inv_sha1 = self.repository.get_inventory_sha1(rev_id)
1383 by Martin Pool
- untabify only
148
            if inv_sha1 != rev.inventory_sha1:
149
                raise BzrCheckError('Inventory sha1 hash doesn\'t match'
150
                    ' value in revision {%s}' % rev_id)
151
        else:
1757.1.1 by Robert Collins
Trivial fix to check to not barf on a missing inventory sha1 value.
152
            self.missing_inventory_sha_cnt += 1
1185.31.4 by John Arbash Meinel
Fixing mutter() calls to not have to do string processing.
153
            mutter("no inventory_sha1 on revision {%s}", rev_id)
1383 by Martin Pool
- untabify only
154
        self._check_revision_tree(rev_id)
1362 by Martin Pool
- keep track of number of checked revisions
155
        self.checked_rev_cnt += 1
1349 by Martin Pool
- more refactoring of check code
156
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
157
    def check_weaves(self):
158
        """Check all the weaves we can get our hands on.
159
        """
160
        n_weaves = 1
161
        weave_ids = []
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
162
        if self.repository.weave_store.listable():
163
            weave_ids = list(self.repository.weave_store)
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
164
            n_weaves = len(weave_ids)
165
        self.progress.update('checking weave', 0, n_weaves)
166
        self.inventory_weave.check(progress_bar=self.progress)
167
        for i, weave_id in enumerate(weave_ids):
168
            self.progress.update('checking weave', i, n_weaves)
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
169
            w = self.repository.weave_store.get_weave(weave_id,
170
                    self.repository.get_transaction())
1185.50.28 by John Arbash Meinel
Lots of updates for 'bzr check'
171
            # No progress here, because it looks ugly.
172
            w.check()
173
            self.checked_weaves[weave_id] = True
174
1349 by Martin Pool
- more refactoring of check code
175
    def _check_revision_tree(self, rev_id):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
176
        tree = self.repository.revision_tree(rev_id)
1383 by Martin Pool
- untabify only
177
        inv = tree.inventory
178
        seen_ids = {}
179
        for file_id in inv:
180
            if file_id in seen_ids:
181
                raise BzrCheckError('duplicated file_id {%s} '
182
                                    'in inventory for revision {%s}'
183
                                    % (file_id, rev_id))
184
            seen_ids[file_id] = True
185
        for file_id in inv:
1092.2.20 by Robert Collins
symlink and weaves, whaddya know
186
            ie = inv[file_id]
187
            ie.check(self, rev_id, inv, tree)
1383 by Martin Pool
- untabify only
188
        seen_names = {}
189
        for path, ie in inv.iter_entries():
190
            if path in seen_names:
191
                raise BzrCheckError('duplicated path %s '
192
                                    'in inventory for revision {%s}'
193
                                    % (path, rev_id))
194
            seen_names[path] = True
1349 by Martin Pool
- more refactoring of check code
195
1347 by Martin Pool
- refactor check code into method object
196
1449 by Robert Collins
teach check about ghosts
197
def check(branch, verbose):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
198
    """Run consistency checks on a branch.
199
    
200
    Results are reported through logging.
201
    
202
    :raise BzrCheckError: if there's a consistency error.
203
    """
204
    branch.lock_read()
205
    try:
206
        branch_result = branch.check()
207
        repo_result = branch.repository.check([branch.last_revision()])
208
    finally:
209
        branch.unlock()
210
    branch_result.report_results(verbose)
211
    repo_result.report_results(verbose)