/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
26
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
27
    processor,
28
    )
29
30
31
# Maximum number of parents for a merge commit
32
_MAX_PARENTS = 16
33
34
35
class InfoProcessor(processor.ImportProcessor):
36
    """An import processor that dumps statistics about the input.
37
38
    No changes to the current repository are made.
39
40
    As well as providing useful information about an import
41
    stream before importing it, this processor is useful for
42
    benchmarking the speed at which data can be extracted from
43
    the source.
44
    """
45
0.64.8 by Ian Clatworthy
custom parameters for processors
46
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
48
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
50
    def pre_process(self):
51
        # Init statistics
52
        self.cmd_counts = {}
53
        for cmd in commands.COMMAND_NAMES:
54
            self.cmd_counts[cmd] = 0
55
        self.file_cmd_counts = {}
56
        for fc in commands.FILE_COMMAND_NAMES:
57
            self.file_cmd_counts[fc] = 0
58
        self.parent_counts = {}
59
        for i in xrange(0, _MAX_PARENTS):
60
            self.parent_counts[i] = 0
61
        self.committers = set()
62
        self.separate_authors_found = False
63
        self.symlinks_found = False
64
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
65
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
66
        self.lightweight_tags = 0
67
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
68
        # Blob usage tracking
69
        self.blobs = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
70
        for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
71
            self.blobs[usage] = set()
0.64.30 by Ian Clatworthy
add heads analysis to info processor
72
        # Head tracking: map of commit mark to ref
73
        self.heads = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
74
        self.last_ids = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
76
    def post_process(self):
77
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
78
        cmd_names = commands.COMMAND_NAMES
79
        fc_names = commands.FILE_COMMAND_NAMES
80
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
81
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
82
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
83
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
84
85
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
87
            p_names = []
88
            p_values = []
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
89
            for i in xrange(0, _MAX_PARENTS):
90
                count = self.parent_counts[i]
91
                if count > 0:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
92
                    p_names.append("parents-%d" % i)
93
                    p_values.append(count)
94
            flags = {
95
                'separate authors found': self.separate_authors_found,
96
                'executables': self.executables_found,
97
                'symlinks': self.symlinks_found,
98
                'blobs referenced by SHA': self.sha_blob_references,
99
                }
100
            self._dump_stats_group("Parent counts", p_names, p_values, str)
101
            self._dump_stats_group("Commit analysis", flags.keys(),
102
                flags.values(), _found)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
103
            heads = helpers.invert_dict(self.heads)
104
            self._dump_stats_group("Head analysis", heads.keys(),
105
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
106
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
107
108
        # Blob stats
109
        if self.cmd_counts['blob']:
110
            # In verbose mode, don't list every blob used
111
            if self.verbose:
112
                del self.blobs['used']
113
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
114
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
115
116
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
117
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
118
            reset_stats = {
119
                'lightweight tags': self.lightweight_tags,
120
                'other resets': self.named_branches,
121
                }
122
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
123
                reset_stats.values())
124
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
125
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
126
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
127
        """Dump a statistics group.
128
        
129
        In verbose mode, do so as a config file so
130
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
131
        :param normal_formatter: the callable to apply to the value
132
          before displaying it in normal mode
133
        :param verbose_formatter: the callable to apply to the value
134
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
135
        """
136
        if self.verbose:
137
            print "[%s]" % (title,)
138
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
139
                if verbose_formatter is not None:
140
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
141
                print "%s = %s" % (name.replace(' ', '-'),value)
142
            print ""
143
        else:
144
            print "%s:" % (title,)
145
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
146
                if normal_formatter is not None:
147
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
148
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
149
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
150
    def progress_handler(self, cmd):
151
        """Process a ProgressCommand."""
152
        self.cmd_counts[cmd.name] += 1
153
154
    def blob_handler(self, cmd):
155
        """Process a BlobCommand."""
156
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
157
        if cmd.mark is None:
158
            self.blobs['unmarked'].add(cmd.id)
159
        else:
160
            self.blobs['new'].add(cmd.id)
0.64.39 by Ian Clatworthy
fix blob tracking when marks are reused
161
            # Marks can be re-used so remove it from used if already there.
162
            # Note: we definitely do NOT want to remove it from multi if
163
            # it's already in that set.
164
            try:
165
                self.blobs['used'].remove(cmd.id)
166
            except KeyError:
167
                pass
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
168
169
    def checkpoint_handler(self, cmd):
170
        """Process a CheckpointCommand."""
171
        self.cmd_counts[cmd.name] += 1
172
173
    def commit_handler(self, cmd):
174
        """Process a CommitCommand."""
175
        self.cmd_counts[cmd.name] += 1
176
        self.parent_counts[len(cmd.parents)] += 1
177
        self.committers.add(cmd.committer)
178
        if cmd.author is not None:
179
            self.separate_authors_found = True
180
        for fc in cmd.file_iter():
181
            self.file_cmd_counts[fc.name] += 1
182
            if isinstance(fc, commands.FileModifyCommand):
183
                if fc.is_executable:
184
                    self.executables_found = True
185
                if fc.kind == commands.SYMLINK_KIND:
186
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
187
                if fc.dataref is not None:
188
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
189
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
190
                    else:
191
                        self.sha_blob_references = True
0.64.30 by Ian Clatworthy
add heads analysis to info processor
192
        # Track the heads
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
193
        if cmd.parents:
194
            parents = cmd.parents
195
        else:
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
196
            last_id = self.last_ids.get(cmd.ref)
197
            if last_id is not None:
198
                parents = [last_id]
199
            else:
200
                parents = []
201
        for parent in parents:
0.64.30 by Ian Clatworthy
add heads analysis to info processor
202
            try:
203
                del self.heads[parent]
204
            except KeyError:
0.64.42 by Ian Clatworthy
removed parent not found warnings as not a problem
205
                # it's ok if the parent isn't there - another
206
                # commit may have already removed it
207
                pass
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
208
        self.heads[cmd.id] = cmd.ref
209
        self.last_ids[cmd.ref] = cmd.id
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
210
211
    def reset_handler(self, cmd):
212
        """Process a ResetCommand."""
213
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
214
        if cmd.ref.startswith('refs/tags/'):
215
            self.lightweight_tags += 1
216
        else:
217
            self.named_branches.append(cmd.ref)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
218
219
    def tag_handler(self, cmd):
220
        """Process a TagCommand."""
221
        self.cmd_counts[cmd.name] += 1
222
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
223
    def _track_blob(self, mark):
224
        if mark in self.blobs['multi']:
225
            pass
226
        elif mark in self.blobs['used']:
227
            self.blobs['multi'].add(mark)
228
            self.blobs['used'].remove(mark)
229
        elif mark in self.blobs['new']:
230
            self.blobs['used'].add(mark)
231
            self.blobs['new'].remove(mark)
232
        else:
233
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
234
235
def _found(b):
236
    """Format a found boolean as a string."""
237
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
238
0.64.30 by Ian Clatworthy
add heads analysis to info processor
239
def _iterable_as_config_list(s):
240
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
241
    
242
    To match what ConfigObj expects, a single item list has a trailing comma.
243
    """
244
    items = sorted(s)
245
    if len(items) == 1:
246
        return "%s," % (items[0],)
247
    else:
248
        return ", ".join(items)