/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
25
    cache_manager,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
26
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
27
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
28
    processor,
29
    )
30
31
32
class InfoProcessor(processor.ImportProcessor):
33
    """An import processor that dumps statistics about the input.
34
35
    No changes to the current repository are made.
36
37
    As well as providing useful information about an import
38
    stream before importing it, this processor is useful for
39
    benchmarking the speed at which data can be extracted from
40
    the source.
41
    """
42
0.64.8 by Ian Clatworthy
custom parameters for processors
43
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
44
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
45
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
46
47
    def pre_process(self):
48
        # Init statistics
49
        self.cmd_counts = {}
50
        for cmd in commands.COMMAND_NAMES:
51
            self.cmd_counts[cmd] = 0
52
        self.file_cmd_counts = {}
53
        for fc in commands.FILE_COMMAND_NAMES:
54
            self.file_cmd_counts[fc] = 0
55
        self.parent_counts = {}
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
56
        self.max_parent_count = 0
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
57
        self.committers = set()
58
        self.separate_authors_found = False
59
        self.symlinks_found = False
60
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
61
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
62
        self.lightweight_tags = 0
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
63
        # Blob usage tracking
64
        self.blobs = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
65
        for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
66
            self.blobs[usage] = set()
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
67
        # Head tracking - delegate to the cache manager
68
        self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
69
        # Stuff to cache: a map from mark to # of times that mark is merged
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
70
        self.merges = {}
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
71
        # Stuff to cache: these are maps from mark to sets
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
72
        self.rename_old_paths = {}
73
        self.copy_source_paths = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
74
75
    def post_process(self):
76
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
77
        cmd_names = commands.COMMAND_NAMES
78
        fc_names = commands.FILE_COMMAND_NAMES
79
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
80
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
81
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
82
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
83
84
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
85
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
86
            p_names = []
87
            p_values = []
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
88
            for i in xrange(0, self.max_parent_count + 1):
89
                if i in self.parent_counts:
90
                    count = self.parent_counts[i]
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
91
                    p_names.append("parents-%d" % i)
92
                    p_values.append(count)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
93
            merges_count = len(self.merges.keys())
94
            p_names.append('total revisions merged')
95
            p_values.append(merges_count)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
96
            flags = {
97
                'separate authors found': self.separate_authors_found,
98
                'executables': self.executables_found,
99
                'symlinks': self.symlinks_found,
100
                'blobs referenced by SHA': self.sha_blob_references,
101
                }
102
            self._dump_stats_group("Parent counts", p_names, p_values, str)
103
            self._dump_stats_group("Commit analysis", flags.keys(),
104
                flags.values(), _found)
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
105
            heads = helpers.invert_dictset(self.cache_mgr.heads)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
106
            self._dump_stats_group("Head analysis", heads.keys(),
107
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
108
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
109
            self._dump_stats_group("Merges", self.merges.keys(),
110
                self.merges.values(), None)
111
            self._dump_stats_group("Rename old paths", self.rename_old_paths.keys(),
112
                self.rename_old_paths.values(), len, _iterable_as_config_list)
113
            self._dump_stats_group("Copy source paths", self.copy_source_paths.keys(),
114
                self.copy_source_paths.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
115
116
        # Blob stats
117
        if self.cmd_counts['blob']:
118
            # In verbose mode, don't list every blob used
119
            if self.verbose:
120
                del self.blobs['used']
121
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
122
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
123
124
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
125
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
126
            reset_stats = {
127
                'lightweight tags': self.lightweight_tags,
128
                }
129
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
130
                reset_stats.values())
131
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
132
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
133
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
134
        """Dump a statistics group.
135
        
136
        In verbose mode, do so as a config file so
137
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
138
        :param normal_formatter: the callable to apply to the value
139
          before displaying it in normal mode
140
        :param verbose_formatter: the callable to apply to the value
141
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
142
        """
143
        if self.verbose:
144
            print "[%s]" % (title,)
145
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
146
                if verbose_formatter is not None:
147
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
148
                print "%s = %s" % (name.replace(' ', '-'),value)
149
            print ""
150
        else:
151
            print "%s:" % (title,)
152
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
153
                if normal_formatter is not None:
154
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
155
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
156
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
157
    def progress_handler(self, cmd):
158
        """Process a ProgressCommand."""
159
        self.cmd_counts[cmd.name] += 1
160
161
    def blob_handler(self, cmd):
162
        """Process a BlobCommand."""
163
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
164
        if cmd.mark is None:
165
            self.blobs['unmarked'].add(cmd.id)
166
        else:
167
            self.blobs['new'].add(cmd.id)
0.64.39 by Ian Clatworthy
fix blob tracking when marks are reused
168
            # Marks can be re-used so remove it from used if already there.
169
            # Note: we definitely do NOT want to remove it from multi if
170
            # it's already in that set.
171
            try:
172
                self.blobs['used'].remove(cmd.id)
173
            except KeyError:
174
                pass
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
175
176
    def checkpoint_handler(self, cmd):
177
        """Process a CheckpointCommand."""
178
        self.cmd_counts[cmd.name] += 1
179
180
    def commit_handler(self, cmd):
181
        """Process a CommitCommand."""
182
        self.cmd_counts[cmd.name] += 1
183
        self.committers.add(cmd.committer)
184
        if cmd.author is not None:
185
            self.separate_authors_found = True
186
        for fc in cmd.file_iter():
187
            self.file_cmd_counts[fc.name] += 1
188
            if isinstance(fc, commands.FileModifyCommand):
189
                if fc.is_executable:
190
                    self.executables_found = True
191
                if fc.kind == commands.SYMLINK_KIND:
192
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
193
                if fc.dataref is not None:
194
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
195
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
196
                    else:
197
                        self.sha_blob_references = True
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
198
            elif isinstance(fc, commands.FileRenameCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
199
                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
200
            elif isinstance(fc, commands.FileCopyCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
201
                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
202
0.64.30 by Ian Clatworthy
add heads analysis to info processor
203
        # Track the heads
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
204
        parents = self.cache_mgr.track_heads(cmd)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
205
206
        # Track the parent counts
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
207
        parent_count = len(parents)
208
        if self.parent_counts.has_key(parent_count):
209
            self.parent_counts[parent_count] += 1
210
        else:
211
            self.parent_counts[parent_count] = 1
212
            if parent_count > self.max_parent_count:
213
                self.max_parent_count = parent_count
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
214
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
215
        # Remember the merges
216
        if cmd.merges:
217
            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
218
            for merge in cmd.merges:
219
                if merge in self.merges:
220
                    self.merges[merge] += 1
221
                else:
222
                    self.merges[merge] = 1
223
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
224
    def reset_handler(self, cmd):
225
        """Process a ResetCommand."""
226
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
227
        if cmd.ref.startswith('refs/tags/'):
228
            self.lightweight_tags += 1
229
        else:
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
230
            if cmd.from_ is not None:
231
                self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
232
233
    def tag_handler(self, cmd):
234
        """Process a TagCommand."""
235
        self.cmd_counts[cmd.name] += 1
236
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
237
    def _track_blob(self, mark):
238
        if mark in self.blobs['multi']:
239
            pass
240
        elif mark in self.blobs['used']:
241
            self.blobs['multi'].add(mark)
242
            self.blobs['used'].remove(mark)
243
        elif mark in self.blobs['new']:
244
            self.blobs['used'].add(mark)
245
            self.blobs['new'].remove(mark)
246
        else:
247
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
248
249
def _found(b):
250
    """Format a found boolean as a string."""
251
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
252
0.64.30 by Ian Clatworthy
add heads analysis to info processor
253
def _iterable_as_config_list(s):
254
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
255
    
256
    To match what ConfigObj expects, a single item list has a trailing comma.
257
    """
258
    items = sorted(s)
259
    if len(items) == 1:
260
        return "%s," % (items[0],)
261
    else:
262
        return ", ".join(items)