/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
26
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
27
    processor,
28
    )
29
30
31
# Maximum number of parents for a merge commit
32
_MAX_PARENTS = 16
33
34
35
class InfoProcessor(processor.ImportProcessor):
36
    """An import processor that dumps statistics about the input.
37
38
    No changes to the current repository are made.
39
40
    As well as providing useful information about an import
41
    stream before importing it, this processor is useful for
42
    benchmarking the speed at which data can be extracted from
43
    the source.
44
    """
45
0.64.8 by Ian Clatworthy
custom parameters for processors
46
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
47
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
48
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
49
50
    def pre_process(self):
51
        # Init statistics
52
        self.cmd_counts = {}
53
        for cmd in commands.COMMAND_NAMES:
54
            self.cmd_counts[cmd] = 0
55
        self.file_cmd_counts = {}
56
        for fc in commands.FILE_COMMAND_NAMES:
57
            self.file_cmd_counts[fc] = 0
58
        self.parent_counts = {}
59
        for i in xrange(0, _MAX_PARENTS):
60
            self.parent_counts[i] = 0
61
        self.committers = set()
62
        self.separate_authors_found = False
63
        self.symlinks_found = False
64
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
65
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
66
        self.lightweight_tags = 0
67
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
68
        # Blob usage tracking
69
        self.blobs = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
70
        for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
71
            self.blobs[usage] = set()
0.64.30 by Ian Clatworthy
add heads analysis to info processor
72
        # Head tracking: map of commit mark to ref
73
        self.heads = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
74
        self.last_ids = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
76
    def post_process(self):
77
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
78
        cmd_names = commands.COMMAND_NAMES
79
        fc_names = commands.FILE_COMMAND_NAMES
80
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
81
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
82
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
83
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
84
85
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
87
            p_names = []
88
            p_values = []
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
89
            for i in xrange(0, _MAX_PARENTS):
90
                count = self.parent_counts[i]
91
                if count > 0:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
92
                    p_names.append("parents-%d" % i)
93
                    p_values.append(count)
94
            flags = {
95
                'separate authors found': self.separate_authors_found,
96
                'executables': self.executables_found,
97
                'symlinks': self.symlinks_found,
98
                'blobs referenced by SHA': self.sha_blob_references,
99
                }
100
            self._dump_stats_group("Parent counts", p_names, p_values, str)
101
            self._dump_stats_group("Commit analysis", flags.keys(),
102
                flags.values(), _found)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
103
            heads = helpers.invert_dict(self.heads)
104
            self._dump_stats_group("Head analysis", heads.keys(),
105
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
106
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
107
108
        # Blob stats
109
        if self.cmd_counts['blob']:
110
            # In verbose mode, don't list every blob used
111
            if self.verbose:
112
                del self.blobs['used']
113
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
114
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
115
116
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
117
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
118
            reset_stats = {
119
                'lightweight tags': self.lightweight_tags,
120
                'other resets': self.named_branches,
121
                }
122
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
123
                reset_stats.values())
124
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
125
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
126
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
127
        """Dump a statistics group.
128
        
129
        In verbose mode, do so as a config file so
130
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
131
        :param normal_formatter: the callable to apply to the value
132
          before displaying it in normal mode
133
        :param verbose_formatter: the callable to apply to the value
134
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
135
        """
136
        if self.verbose:
137
            print "[%s]" % (title,)
138
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
139
                if verbose_formatter is not None:
140
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
141
                print "%s = %s" % (name.replace(' ', '-'),value)
142
            print ""
143
        else:
144
            print "%s:" % (title,)
145
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
146
                if normal_formatter is not None:
147
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
148
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
149
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
150
    def progress_handler(self, cmd):
151
        """Process a ProgressCommand."""
152
        self.cmd_counts[cmd.name] += 1
153
154
    def blob_handler(self, cmd):
155
        """Process a BlobCommand."""
156
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
157
        if cmd.mark is None:
158
            self.blobs['unmarked'].add(cmd.id)
159
        else:
160
            self.blobs['new'].add(cmd.id)
0.64.39 by Ian Clatworthy
fix blob tracking when marks are reused
161
            # Marks can be re-used so remove it from used if already there.
162
            # Note: we definitely do NOT want to remove it from multi if
163
            # it's already in that set.
164
            try:
165
                self.blobs['used'].remove(cmd.id)
166
            except KeyError:
167
                pass
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
168
169
    def checkpoint_handler(self, cmd):
170
        """Process a CheckpointCommand."""
171
        self.cmd_counts[cmd.name] += 1
172
173
    def commit_handler(self, cmd):
174
        """Process a CommitCommand."""
175
        self.cmd_counts[cmd.name] += 1
176
        self.committers.add(cmd.committer)
177
        if cmd.author is not None:
178
            self.separate_authors_found = True
179
        for fc in cmd.file_iter():
180
            self.file_cmd_counts[fc.name] += 1
181
            if isinstance(fc, commands.FileModifyCommand):
182
                if fc.is_executable:
183
                    self.executables_found = True
184
                if fc.kind == commands.SYMLINK_KIND:
185
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
186
                if fc.dataref is not None:
187
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
188
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
189
                    else:
190
                        self.sha_blob_references = True
0.64.30 by Ian Clatworthy
add heads analysis to info processor
191
        # Track the heads
0.64.60 by Ian Clatworthy
support merges when from clause implicit
192
        if cmd.from_ is not None:
193
            parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
194
        else:
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
195
            last_id = self.last_ids.get(cmd.ref)
196
            if last_id is not None:
197
                parents = [last_id]
198
            else:
199
                parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
200
        parents.extend(cmd.merges)
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
201
        for parent in parents:
0.64.30 by Ian Clatworthy
add heads analysis to info processor
202
            try:
203
                del self.heads[parent]
204
            except KeyError:
0.64.42 by Ian Clatworthy
removed parent not found warnings as not a problem
205
                # it's ok if the parent isn't there - another
206
                # commit may have already removed it
207
                pass
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
208
        self.heads[cmd.id] = cmd.ref
209
        self.last_ids[cmd.ref] = cmd.id
0.64.60 by Ian Clatworthy
support merges when from clause implicit
210
        self.parent_counts[len(parents)] += 1
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
211
212
    def reset_handler(self, cmd):
213
        """Process a ResetCommand."""
214
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
215
        if cmd.ref.startswith('refs/tags/'):
216
            self.lightweight_tags += 1
217
        else:
218
            self.named_branches.append(cmd.ref)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
219
220
    def tag_handler(self, cmd):
221
        """Process a TagCommand."""
222
        self.cmd_counts[cmd.name] += 1
223
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
224
    def _track_blob(self, mark):
225
        if mark in self.blobs['multi']:
226
            pass
227
        elif mark in self.blobs['used']:
228
            self.blobs['multi'].add(mark)
229
            self.blobs['used'].remove(mark)
230
        elif mark in self.blobs['new']:
231
            self.blobs['used'].add(mark)
232
            self.blobs['new'].remove(mark)
233
        else:
234
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
235
236
def _found(b):
237
    """Format a found boolean as a string."""
238
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
239
0.64.30 by Ian Clatworthy
add heads analysis to info processor
240
def _iterable_as_config_list(s):
241
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
242
    
243
    To match what ConfigObj expects, a single item list has a trailing comma.
244
    """
245
    items = sorted(s)
246
    if len(items) == 1:
247
        return "%s," % (items[0],)
248
    else:
249
        return ", ".join(items)