/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
26
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
27
    processor,
28
    )
29
30
31
# Maximum number of parents for a merge commit
32
_MAX_PARENTS = 16
33
34
35
class InfoProcessor(processor.ImportProcessor):
36
    """An import processor that dumps statistics about the input.
37
38
    No changes to the current repository are made.
39
40
    As well as providing useful information about an import
41
    stream before importing it, this processor is useful for
42
    benchmarking the speed at which data can be extracted from
43
    the source.
44
    """
45
0.64.8 by Ian Clatworthy
custom parameters for processors
46
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
48
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
50
    def pre_process(self):
51
        # Init statistics
52
        self.cmd_counts = {}
53
        for cmd in commands.COMMAND_NAMES:
54
            self.cmd_counts[cmd] = 0
55
        self.file_cmd_counts = {}
56
        for fc in commands.FILE_COMMAND_NAMES:
57
            self.file_cmd_counts[fc] = 0
58
        self.parent_counts = {}
59
        for i in xrange(0, _MAX_PARENTS):
60
            self.parent_counts[i] = 0
61
        self.committers = set()
62
        self.separate_authors_found = False
63
        self.symlinks_found = False
64
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
65
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
66
        self.lightweight_tags = 0
67
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
68
        # Blob usage tracking
69
        self.blobs = {}
70
        for usage in ['new', 'used', 'multi', 'unknown']:
71
            self.blobs[usage] = set()
0.64.30 by Ian Clatworthy
add heads analysis to info processor
72
        # Head tracking: map of commit mark to ref
73
        self.heads = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
74
75
    def post_process(self):
76
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
77
        cmd_names = commands.COMMAND_NAMES
78
        fc_names = commands.FILE_COMMAND_NAMES
79
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
80
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
81
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
82
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
83
84
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
85
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
86
            p_names = []
87
            p_values = []
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
88
            for i in xrange(0, _MAX_PARENTS):
89
                count = self.parent_counts[i]
90
                if count > 0:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
91
                    p_names.append("parents-%d" % i)
92
                    p_values.append(count)
93
            flags = {
94
                'separate authors found': self.separate_authors_found,
95
                'executables': self.executables_found,
96
                'symlinks': self.symlinks_found,
97
                'blobs referenced by SHA': self.sha_blob_references,
98
                }
99
            self._dump_stats_group("Parent counts", p_names, p_values, str)
100
            self._dump_stats_group("Commit analysis", flags.keys(),
101
                flags.values(), _found)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
102
            heads = helpers.invert_dict(self.heads)
103
            self._dump_stats_group("Head analysis", heads.keys(),
104
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
105
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
106
107
        # Blob stats
108
        if self.cmd_counts['blob']:
109
            # In verbose mode, don't list every blob used
110
            if self.verbose:
111
                del self.blobs['used']
112
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
113
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
114
115
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
116
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
117
            reset_stats = {
118
                'lightweight tags': self.lightweight_tags,
119
                'other resets': self.named_branches,
120
                }
121
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
122
                reset_stats.values())
123
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
124
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
125
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
126
        """Dump a statistics group.
127
        
128
        In verbose mode, do so as a config file so
129
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
130
        :param normal_formatter: the callable to apply to the value
131
          before displaying it in normal mode
132
        :param verbose_formatter: the callable to apply to the value
133
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
134
        """
135
        if self.verbose:
136
            print "[%s]" % (title,)
137
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
138
                if verbose_formatter is not None:
139
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
140
                print "%s = %s" % (name.replace(' ', '-'),value)
141
            print ""
142
        else:
143
            print "%s:" % (title,)
144
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
145
                if normal_formatter is not None:
146
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
147
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
148
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
149
    def progress_handler(self, cmd):
150
        """Process a ProgressCommand."""
151
        self.cmd_counts[cmd.name] += 1
152
153
    def blob_handler(self, cmd):
154
        """Process a BlobCommand."""
155
        self.cmd_counts[cmd.name] += 1
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
156
        self.blobs['new'].add(":" + cmd.mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
157
158
    def checkpoint_handler(self, cmd):
159
        """Process a CheckpointCommand."""
160
        self.cmd_counts[cmd.name] += 1
161
162
    def commit_handler(self, cmd):
163
        """Process a CommitCommand."""
164
        self.cmd_counts[cmd.name] += 1
165
        self.parent_counts[len(cmd.parents)] += 1
166
        self.committers.add(cmd.committer)
167
        if cmd.author is not None:
168
            self.separate_authors_found = True
169
        for fc in cmd.file_iter():
170
            self.file_cmd_counts[fc.name] += 1
171
            if isinstance(fc, commands.FileModifyCommand):
172
                if fc.is_executable:
173
                    self.executables_found = True
174
                if fc.kind == commands.SYMLINK_KIND:
175
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
176
                if fc.dataref is not None:
177
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
178
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
179
                    else:
180
                        self.sha_blob_references = True
0.64.30 by Ian Clatworthy
add heads analysis to info processor
181
        # Track the heads
182
        for parent in cmd.parents:
183
            try:
184
                del self.heads[parent]
185
            except KeyError:
186
                print "hmm - didn't find parent %s" % parent
187
                pass
188
        self.heads[":" + cmd.mark] = cmd.ref
189
            
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
190
191
    def reset_handler(self, cmd):
192
        """Process a ResetCommand."""
193
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
194
        if cmd.ref.startswith('refs/tags/'):
195
            self.lightweight_tags += 1
196
        else:
197
            self.named_branches.append(cmd.ref)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
198
199
    def tag_handler(self, cmd):
200
        """Process a TagCommand."""
201
        self.cmd_counts[cmd.name] += 1
202
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
203
    def _track_blob(self, mark):
204
        if mark in self.blobs['multi']:
205
            pass
206
        elif mark in self.blobs['used']:
207
            self.blobs['multi'].add(mark)
208
            self.blobs['used'].remove(mark)
209
        elif mark in self.blobs['new']:
210
            self.blobs['used'].add(mark)
211
            self.blobs['new'].remove(mark)
212
        else:
213
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
214
215
def _found(b):
216
    """Format a found boolean as a string."""
217
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
218
0.64.30 by Ian Clatworthy
add heads analysis to info processor
219
def _iterable_as_config_list(s):
220
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
221
    
222
    To match what ConfigObj expects, a single item list has a trailing comma.
223
    """
224
    items = sorted(s)
225
    if len(items) == 1:
226
        return "%s," % (items[0],)
227
    else:
228
        return ", ".join(items)