/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
26
    processor,
27
    )
28
29
30
# Maximum number of parents for a merge commit
31
_MAX_PARENTS = 16
32
33
34
class InfoProcessor(processor.ImportProcessor):
35
    """An import processor that dumps statistics about the input.
36
37
    No changes to the current repository are made.
38
39
    As well as providing useful information about an import
40
    stream before importing it, this processor is useful for
41
    benchmarking the speed at which data can be extracted from
42
    the source.
43
    """
44
0.64.8 by Ian Clatworthy
custom parameters for processors
45
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
46
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
47
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
48
49
    def pre_process(self):
50
        # Init statistics
51
        self.cmd_counts = {}
52
        for cmd in commands.COMMAND_NAMES:
53
            self.cmd_counts[cmd] = 0
54
        self.file_cmd_counts = {}
55
        for fc in commands.FILE_COMMAND_NAMES:
56
            self.file_cmd_counts[fc] = 0
57
        self.parent_counts = {}
58
        for i in xrange(0, _MAX_PARENTS):
59
            self.parent_counts[i] = 0
60
        self.committers = set()
61
        self.separate_authors_found = False
62
        self.symlinks_found = False
63
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
64
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
65
        self.lightweight_tags = 0
66
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
67
        # Blob usage tracking
68
        self.blobs = {}
69
        for usage in ['new', 'used', 'multi', 'unknown']:
70
            self.blobs[usage] = set()
71
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
72
73
    def post_process(self):
74
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
75
        cmd_names = commands.COMMAND_NAMES
76
        fc_names = commands.FILE_COMMAND_NAMES
77
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
78
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
79
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
80
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
81
82
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
83
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
84
            p_names = []
85
            p_values = []
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
            for i in xrange(0, _MAX_PARENTS):
87
                count = self.parent_counts[i]
88
                if count > 0:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
89
                    p_names.append("parents-%d" % i)
90
                    p_values.append(count)
91
            flags = {
92
                'separate authors found': self.separate_authors_found,
93
                'executables': self.executables_found,
94
                'symlinks': self.symlinks_found,
95
                'blobs referenced by SHA': self.sha_blob_references,
96
                }
97
            self._dump_stats_group("Parent counts", p_names, p_values, str)
98
            self._dump_stats_group("Commit analysis", flags.keys(),
99
                flags.values(), _found)
100
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
101
102
        # Blob stats
103
        if self.cmd_counts['blob']:
104
            # In verbose mode, don't list every blob used
105
            if self.verbose:
106
                del self.blobs['used']
107
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
108
                self.blobs.values(), len, _set_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
109
110
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
111
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
112
            reset_stats = {
113
                'lightweight tags': self.lightweight_tags,
114
                'other resets': self.named_branches,
115
                }
116
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
117
                reset_stats.values())
118
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
119
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
120
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
121
        """Dump a statistics group.
122
        
123
        In verbose mode, do so as a config file so
124
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
125
        :param normal_formatter: the callable to apply to the value
126
          before displaying it in normal mode
127
        :param verbose_formatter: the callable to apply to the value
128
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
129
        """
130
        if self.verbose:
131
            print "[%s]" % (title,)
132
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
133
                if verbose_formatter is not None:
134
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
135
                print "%s = %s" % (name.replace(' ', '-'),value)
136
            print ""
137
        else:
138
            print "%s:" % (title,)
139
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
140
                if normal_formatter is not None:
141
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
142
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
143
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
144
    def progress_handler(self, cmd):
145
        """Process a ProgressCommand."""
146
        self.cmd_counts[cmd.name] += 1
147
148
    def blob_handler(self, cmd):
149
        """Process a BlobCommand."""
150
        self.cmd_counts[cmd.name] += 1
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
151
        self.blobs['new'].add(":" + cmd.mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
152
153
    def checkpoint_handler(self, cmd):
154
        """Process a CheckpointCommand."""
155
        self.cmd_counts[cmd.name] += 1
156
157
    def commit_handler(self, cmd):
158
        """Process a CommitCommand."""
159
        self.cmd_counts[cmd.name] += 1
160
        self.parent_counts[len(cmd.parents)] += 1
161
        self.committers.add(cmd.committer)
162
        if cmd.author is not None:
163
            self.separate_authors_found = True
164
        for fc in cmd.file_iter():
165
            self.file_cmd_counts[fc.name] += 1
166
            if isinstance(fc, commands.FileModifyCommand):
167
                if fc.is_executable:
168
                    self.executables_found = True
169
                if fc.kind == commands.SYMLINK_KIND:
170
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
171
                if fc.dataref is not None:
172
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
173
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
174
                    else:
175
                        self.sha_blob_references = True
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
176
177
    def reset_handler(self, cmd):
178
        """Process a ResetCommand."""
179
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
180
        if cmd.ref.startswith('refs/tags/'):
181
            self.lightweight_tags += 1
182
        else:
183
            self.named_branches.append(cmd.ref)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
184
185
    def tag_handler(self, cmd):
186
        """Process a TagCommand."""
187
        self.cmd_counts[cmd.name] += 1
188
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
189
    def _track_blob(self, mark):
190
        if mark in self.blobs['multi']:
191
            pass
192
        elif mark in self.blobs['used']:
193
            self.blobs['multi'].add(mark)
194
            self.blobs['used'].remove(mark)
195
        elif mark in self.blobs['new']:
196
            self.blobs['used'].add(mark)
197
            self.blobs['new'].remove(mark)
198
        else:
199
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
200
201
def _found(b):
202
    """Format a found boolean as a string."""
203
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
204
205
def _set_as_config_list(s):
206
    """Format a set as a sequence of comma-separated strings.
207
    
208
    To match what ConfigObj expects, a single item list has a trailing comma.
209
    """
210
    items = sorted(s)
211
    if len(items) == 1:
212
        return "%s," % (items[0],)
213
    else:
214
        return ", ".join(items)