/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
26
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
27
    processor,
28
    )
29
30
31
# Maximum number of parents for a merge commit
32
_MAX_PARENTS = 16
33
34
35
class InfoProcessor(processor.ImportProcessor):
36
    """An import processor that dumps statistics about the input.
37
38
    No changes to the current repository are made.
39
40
    As well as providing useful information about an import
41
    stream before importing it, this processor is useful for
42
    benchmarking the speed at which data can be extracted from
43
    the source.
44
    """
45
0.64.8 by Ian Clatworthy
custom parameters for processors
46
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
48
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
50
    def pre_process(self):
51
        # Init statistics
52
        self.cmd_counts = {}
53
        for cmd in commands.COMMAND_NAMES:
54
            self.cmd_counts[cmd] = 0
55
        self.file_cmd_counts = {}
56
        for fc in commands.FILE_COMMAND_NAMES:
57
            self.file_cmd_counts[fc] = 0
58
        self.parent_counts = {}
59
        for i in xrange(0, _MAX_PARENTS):
60
            self.parent_counts[i] = 0
61
        self.committers = set()
62
        self.separate_authors_found = False
63
        self.symlinks_found = False
64
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
65
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
66
        self.lightweight_tags = 0
67
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
68
        # Blob usage tracking
69
        self.blobs = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
70
        for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
71
            self.blobs[usage] = set()
0.64.30 by Ian Clatworthy
add heads analysis to info processor
72
        # Head tracking: map of commit mark to ref
73
        self.heads = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
74
        self.last_ids = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
76
    def post_process(self):
77
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
78
        cmd_names = commands.COMMAND_NAMES
79
        fc_names = commands.FILE_COMMAND_NAMES
80
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
81
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
82
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
83
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
84
85
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
87
            p_names = []
88
            p_values = []
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
89
            for i in xrange(0, _MAX_PARENTS):
90
                count = self.parent_counts[i]
91
                if count > 0:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
92
                    p_names.append("parents-%d" % i)
93
                    p_values.append(count)
94
            flags = {
95
                'separate authors found': self.separate_authors_found,
96
                'executables': self.executables_found,
97
                'symlinks': self.symlinks_found,
98
                'blobs referenced by SHA': self.sha_blob_references,
99
                }
100
            self._dump_stats_group("Parent counts", p_names, p_values, str)
101
            self._dump_stats_group("Commit analysis", flags.keys(),
102
                flags.values(), _found)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
103
            heads = helpers.invert_dict(self.heads)
104
            self._dump_stats_group("Head analysis", heads.keys(),
105
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
106
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
107
108
        # Blob stats
109
        if self.cmd_counts['blob']:
110
            # In verbose mode, don't list every blob used
111
            if self.verbose:
112
                del self.blobs['used']
113
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
114
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
115
116
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
117
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
118
            reset_stats = {
119
                'lightweight tags': self.lightweight_tags,
120
                'other resets': self.named_branches,
121
                }
122
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
123
                reset_stats.values())
124
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
125
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
126
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
127
        """Dump a statistics group.
128
        
129
        In verbose mode, do so as a config file so
130
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
131
        :param normal_formatter: the callable to apply to the value
132
          before displaying it in normal mode
133
        :param verbose_formatter: the callable to apply to the value
134
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
135
        """
136
        if self.verbose:
137
            print "[%s]" % (title,)
138
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
139
                if verbose_formatter is not None:
140
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
141
                print "%s = %s" % (name.replace(' ', '-'),value)
142
            print ""
143
        else:
144
            print "%s:" % (title,)
145
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
146
                if normal_formatter is not None:
147
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
148
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
149
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
150
    def progress_handler(self, cmd):
151
        """Process a ProgressCommand."""
152
        self.cmd_counts[cmd.name] += 1
153
154
    def blob_handler(self, cmd):
155
        """Process a BlobCommand."""
156
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
157
        if cmd.mark is None:
158
            self.blobs['unmarked'].add(cmd.id)
159
        else:
160
            self.blobs['new'].add(cmd.id)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
161
162
    def checkpoint_handler(self, cmd):
163
        """Process a CheckpointCommand."""
164
        self.cmd_counts[cmd.name] += 1
165
166
    def commit_handler(self, cmd):
167
        """Process a CommitCommand."""
168
        self.cmd_counts[cmd.name] += 1
169
        self.parent_counts[len(cmd.parents)] += 1
170
        self.committers.add(cmd.committer)
171
        if cmd.author is not None:
172
            self.separate_authors_found = True
173
        for fc in cmd.file_iter():
174
            self.file_cmd_counts[fc.name] += 1
175
            if isinstance(fc, commands.FileModifyCommand):
176
                if fc.is_executable:
177
                    self.executables_found = True
178
                if fc.kind == commands.SYMLINK_KIND:
179
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
180
                if fc.dataref is not None:
181
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
182
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
183
                    else:
184
                        self.sha_blob_references = True
0.64.30 by Ian Clatworthy
add heads analysis to info processor
185
        # Track the heads
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
186
        if cmd.mark is None:
187
            last_id = self.last_ids.get(cmd.ref)
188
            if last_id is not None:
189
                parents = [last_id]
190
            else:
191
                parents = []
192
        else:
193
            parents = cmd.parents
194
        for parent in parents:
0.64.30 by Ian Clatworthy
add heads analysis to info processor
195
            try:
196
                del self.heads[parent]
197
            except KeyError:
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
198
                note("hmm - didn't find parent %s" % parent)
199
        self.heads[cmd.id] = cmd.ref
200
        self.last_ids[cmd.ref] = cmd.id
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
201
202
    def reset_handler(self, cmd):
203
        """Process a ResetCommand."""
204
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
205
        if cmd.ref.startswith('refs/tags/'):
206
            self.lightweight_tags += 1
207
        else:
208
            self.named_branches.append(cmd.ref)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
209
210
    def tag_handler(self, cmd):
211
        """Process a TagCommand."""
212
        self.cmd_counts[cmd.name] += 1
213
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
214
    def _track_blob(self, mark):
215
        if mark in self.blobs['multi']:
216
            pass
217
        elif mark in self.blobs['used']:
218
            self.blobs['multi'].add(mark)
219
            self.blobs['used'].remove(mark)
220
        elif mark in self.blobs['new']:
221
            self.blobs['used'].add(mark)
222
            self.blobs['new'].remove(mark)
223
        else:
224
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
225
226
def _found(b):
227
    """Format a found boolean as a string."""
228
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
229
0.64.30 by Ian Clatworthy
add heads analysis to info processor
230
def _iterable_as_config_list(s):
231
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
232
    
233
    To match what ConfigObj expects, a single item list has a trailing comma.
234
    """
235
    items = sorted(s)
236
    if len(items) == 1:
237
        return "%s," % (items[0],)
238
    else:
239
        return ", ".join(items)