/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
26
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
27
    processor,
28
    )
29
30
31
class InfoProcessor(processor.ImportProcessor):
32
    """An import processor that dumps statistics about the input.
33
34
    No changes to the current repository are made.
35
36
    As well as providing useful information about an import
37
    stream before importing it, this processor is useful for
38
    benchmarking the speed at which data can be extracted from
39
    the source.
40
    """
41
0.64.8 by Ian Clatworthy
custom parameters for processors
42
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
43
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
44
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
45
46
    def pre_process(self):
47
        # Init statistics
48
        self.cmd_counts = {}
49
        for cmd in commands.COMMAND_NAMES:
50
            self.cmd_counts[cmd] = 0
51
        self.file_cmd_counts = {}
52
        for fc in commands.FILE_COMMAND_NAMES:
53
            self.file_cmd_counts[fc] = 0
54
        self.parent_counts = {}
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
55
        self.max_parent_count = 0
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
56
        self.committers = set()
57
        self.separate_authors_found = False
58
        self.symlinks_found = False
59
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
60
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
61
        self.lightweight_tags = 0
62
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
63
        # Blob usage tracking
64
        self.blobs = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
65
        for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
66
            self.blobs[usage] = set()
0.64.30 by Ian Clatworthy
add heads analysis to info processor
67
        # Head tracking: map of commit mark to ref
68
        self.heads = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
69
        self.last_ids = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
70
71
    def post_process(self):
72
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
73
        cmd_names = commands.COMMAND_NAMES
74
        fc_names = commands.FILE_COMMAND_NAMES
75
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
76
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
77
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
78
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
79
80
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
81
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
82
            p_names = []
83
            p_values = []
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
84
            for i in xrange(0, self.max_parent_count + 1):
85
                if i in self.parent_counts:
86
                    count = self.parent_counts[i]
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
87
                    p_names.append("parents-%d" % i)
88
                    p_values.append(count)
89
            flags = {
90
                'separate authors found': self.separate_authors_found,
91
                'executables': self.executables_found,
92
                'symlinks': self.symlinks_found,
93
                'blobs referenced by SHA': self.sha_blob_references,
94
                }
95
            self._dump_stats_group("Parent counts", p_names, p_values, str)
96
            self._dump_stats_group("Commit analysis", flags.keys(),
97
                flags.values(), _found)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
98
            heads = helpers.invert_dict(self.heads)
99
            self._dump_stats_group("Head analysis", heads.keys(),
100
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
101
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
102
103
        # Blob stats
104
        if self.cmd_counts['blob']:
105
            # In verbose mode, don't list every blob used
106
            if self.verbose:
107
                del self.blobs['used']
108
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
109
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
110
111
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
112
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
113
            reset_stats = {
114
                'lightweight tags': self.lightweight_tags,
115
                'other resets': self.named_branches,
116
                }
117
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
118
                reset_stats.values())
119
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
120
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
121
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
122
        """Dump a statistics group.
123
        
124
        In verbose mode, do so as a config file so
125
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
126
        :param normal_formatter: the callable to apply to the value
127
          before displaying it in normal mode
128
        :param verbose_formatter: the callable to apply to the value
129
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
130
        """
131
        if self.verbose:
132
            print "[%s]" % (title,)
133
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
134
                if verbose_formatter is not None:
135
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
136
                print "%s = %s" % (name.replace(' ', '-'),value)
137
            print ""
138
        else:
139
            print "%s:" % (title,)
140
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
141
                if normal_formatter is not None:
142
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
143
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
144
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
145
    def progress_handler(self, cmd):
146
        """Process a ProgressCommand."""
147
        self.cmd_counts[cmd.name] += 1
148
149
    def blob_handler(self, cmd):
150
        """Process a BlobCommand."""
151
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
152
        if cmd.mark is None:
153
            self.blobs['unmarked'].add(cmd.id)
154
        else:
155
            self.blobs['new'].add(cmd.id)
0.64.39 by Ian Clatworthy
fix blob tracking when marks are reused
156
            # Marks can be re-used so remove it from used if already there.
157
            # Note: we definitely do NOT want to remove it from multi if
158
            # it's already in that set.
159
            try:
160
                self.blobs['used'].remove(cmd.id)
161
            except KeyError:
162
                pass
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
163
164
    def checkpoint_handler(self, cmd):
165
        """Process a CheckpointCommand."""
166
        self.cmd_counts[cmd.name] += 1
167
168
    def commit_handler(self, cmd):
169
        """Process a CommitCommand."""
170
        self.cmd_counts[cmd.name] += 1
171
        self.committers.add(cmd.committer)
172
        if cmd.author is not None:
173
            self.separate_authors_found = True
174
        for fc in cmd.file_iter():
175
            self.file_cmd_counts[fc.name] += 1
176
            if isinstance(fc, commands.FileModifyCommand):
177
                if fc.is_executable:
178
                    self.executables_found = True
179
                if fc.kind == commands.SYMLINK_KIND:
180
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
181
                if fc.dataref is not None:
182
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
183
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
184
                    else:
185
                        self.sha_blob_references = True
0.64.30 by Ian Clatworthy
add heads analysis to info processor
186
        # Track the heads
0.64.60 by Ian Clatworthy
support merges when from clause implicit
187
        if cmd.from_ is not None:
188
            parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
189
        else:
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
190
            last_id = self.last_ids.get(cmd.ref)
191
            if last_id is not None:
192
                parents = [last_id]
193
            else:
194
                parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
195
        parents.extend(cmd.merges)
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
196
        for parent in parents:
0.64.30 by Ian Clatworthy
add heads analysis to info processor
197
            try:
198
                del self.heads[parent]
199
            except KeyError:
0.64.42 by Ian Clatworthy
removed parent not found warnings as not a problem
200
                # it's ok if the parent isn't there - another
201
                # commit may have already removed it
202
                pass
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
203
        self.heads[cmd.id] = cmd.ref
204
        self.last_ids[cmd.ref] = cmd.id
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
205
        parent_count = len(parents)
206
        if self.parent_counts.has_key(parent_count):
207
            self.parent_counts[parent_count] += 1
208
        else:
209
            self.parent_counts[parent_count] = 1
210
            if parent_count > self.max_parent_count:
211
                self.max_parent_count = parent_count
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
212
213
    def reset_handler(self, cmd):
214
        """Process a ResetCommand."""
215
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
216
        if cmd.ref.startswith('refs/tags/'):
217
            self.lightweight_tags += 1
218
        else:
219
            self.named_branches.append(cmd.ref)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
220
221
    def tag_handler(self, cmd):
222
        """Process a TagCommand."""
223
        self.cmd_counts[cmd.name] += 1
224
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
225
    def _track_blob(self, mark):
226
        if mark in self.blobs['multi']:
227
            pass
228
        elif mark in self.blobs['used']:
229
            self.blobs['multi'].add(mark)
230
            self.blobs['used'].remove(mark)
231
        elif mark in self.blobs['new']:
232
            self.blobs['used'].add(mark)
233
            self.blobs['new'].remove(mark)
234
        else:
235
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
236
237
def _found(b):
238
    """Format a found boolean as a string."""
239
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
240
0.64.30 by Ian Clatworthy
add heads analysis to info processor
241
def _iterable_as_config_list(s):
242
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
243
    
244
    To match what ConfigObj expects, a single item list has a trailing comma.
245
    """
246
    items = sorted(s)
247
    if len(items) == 1:
248
        return "%s," % (items[0],)
249
    else:
250
        return ", ".join(items)