/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
26
    processor,
27
    )
28
29
30
# Maximum number of parents for a merge commit
31
_MAX_PARENTS = 16
32
33
34
class InfoProcessor(processor.ImportProcessor):
35
    """An import processor that dumps statistics about the input.
36
37
    No changes to the current repository are made.
38
39
    As well as providing useful information about an import
40
    stream before importing it, this processor is useful for
41
    benchmarking the speed at which data can be extracted from
42
    the source.
43
    """
44
0.64.8 by Ian Clatworthy
custom parameters for processors
45
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
46
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
47
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
48
49
    def pre_process(self):
50
        # Init statistics
51
        self.cmd_counts = {}
52
        for cmd in commands.COMMAND_NAMES:
53
            self.cmd_counts[cmd] = 0
54
        self.file_cmd_counts = {}
55
        for fc in commands.FILE_COMMAND_NAMES:
56
            self.file_cmd_counts[fc] = 0
57
        self.parent_counts = {}
58
        for i in xrange(0, _MAX_PARENTS):
59
            self.parent_counts[i] = 0
60
        self.committers = set()
61
        self.separate_authors_found = False
62
        self.symlinks_found = False
63
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
64
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
65
        self.lightweight_tags = 0
66
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
67
        # Blob usage tracking
68
        self.blobs = {}
69
        for usage in ['new', 'used', 'multi', 'unknown']:
70
            self.blobs[usage] = set()
71
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
72
73
    def post_process(self):
74
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
75
        if self.verbose:
76
            note('# Configuration file generated by bzr fast-import --info')
77
        cmd_names = commands.COMMAND_NAMES
78
        fc_names = commands.FILE_COMMAND_NAMES
79
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
80
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
81
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
82
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
83
84
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
85
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
86
            p_names = []
87
            p_values = []
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
88
            for i in xrange(0, _MAX_PARENTS):
89
                count = self.parent_counts[i]
90
                if count > 0:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
91
                    p_names.append("parents-%d" % i)
92
                    p_values.append(count)
93
            flags = {
94
                'separate authors found': self.separate_authors_found,
95
                'executables': self.executables_found,
96
                'symlinks': self.symlinks_found,
97
                'blobs referenced by SHA': self.sha_blob_references,
98
                }
99
            self._dump_stats_group("Parent counts", p_names, p_values, str)
100
            self._dump_stats_group("Commit analysis", flags.keys(),
101
                flags.values(), _found)
102
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
103
104
        # Blob stats
105
        if self.cmd_counts['blob']:
106
            # In verbose mode, don't list every blob used
107
            if self.verbose:
108
                del self.blobs['used']
109
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
110
                self.blobs.values(), len)
111
112
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
113
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
114
            reset_stats = {
115
                'lightweight tags': self.lightweight_tags,
116
                'other resets': self.named_branches,
117
                }
118
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
119
                reset_stats.values())
120
121
    def _dump_stats_group(self, title, names, values, formatter):
122
        """Dump a statistics group.
123
        
124
        In verbose mode, do so as a config file so
125
        that other processors can load the information if they want to.
126
        """
127
        if self.verbose:
128
            print "[%s]" % (title,)
129
            for name, value in zip(names, values):
130
                print "%s = %s" % (name.replace(' ', '-'),value)
131
            print ""
132
        else:
133
            print "%s:" % (title,)
134
            for name, value in zip(names, values):
135
                if formatter is not None:
136
                    value = formatter(value)
137
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
138
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
139
    def progress_handler(self, cmd):
140
        """Process a ProgressCommand."""
141
        self.cmd_counts[cmd.name] += 1
142
143
    def blob_handler(self, cmd):
144
        """Process a BlobCommand."""
145
        self.cmd_counts[cmd.name] += 1
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
146
        self.blobs['new'].add(cmd.mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
147
148
    def checkpoint_handler(self, cmd):
149
        """Process a CheckpointCommand."""
150
        self.cmd_counts[cmd.name] += 1
151
152
    def commit_handler(self, cmd):
153
        """Process a CommitCommand."""
154
        self.cmd_counts[cmd.name] += 1
155
        self.parent_counts[len(cmd.parents)] += 1
156
        self.committers.add(cmd.committer)
157
        if cmd.author is not None:
158
            self.separate_authors_found = True
159
        for fc in cmd.file_iter():
160
            self.file_cmd_counts[fc.name] += 1
161
            if isinstance(fc, commands.FileModifyCommand):
162
                if fc.is_executable:
163
                    self.executables_found = True
164
                if fc.kind == commands.SYMLINK_KIND:
165
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
166
                if fc.dataref is not None:
167
                    if fc.dataref[0] == ':':
168
                        self._track_blob(fc.dataref[1:])
169
                    else:
170
                        self.sha_blob_references = True
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
171
172
    def reset_handler(self, cmd):
173
        """Process a ResetCommand."""
174
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
175
        if cmd.ref.startswith('refs/tags/'):
176
            self.lightweight_tags += 1
177
        else:
178
            self.named_branches.append(cmd.ref)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
179
180
    def tag_handler(self, cmd):
181
        """Process a TagCommand."""
182
        self.cmd_counts[cmd.name] += 1
183
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
184
    def _track_blob(self, mark):
185
        if mark in self.blobs['multi']:
186
            pass
187
        elif mark in self.blobs['used']:
188
            self.blobs['multi'].add(mark)
189
            self.blobs['used'].remove(mark)
190
        elif mark in self.blobs['new']:
191
            self.blobs['used'].add(mark)
192
            self.blobs['new'].remove(mark)
193
        else:
194
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
195
196
def _found(b):
197
    """Format a found boolean as a string."""
198
    return ['no', 'found'][b]