/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
25
    cache_manager,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
26
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
27
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
28
    processor,
29
    )
30
31
32
class InfoProcessor(processor.ImportProcessor):
33
    """An import processor that dumps statistics about the input.
34
35
    No changes to the current repository are made.
36
37
    As well as providing useful information about an import
38
    stream before importing it, this processor is useful for
39
    benchmarking the speed at which data can be extracted from
40
    the source.
41
    """
42
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
43
    def __init__(self, target=None, params=None, verbose=0):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
44
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
45
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
46
47
    def pre_process(self):
48
        # Init statistics
49
        self.cmd_counts = {}
50
        for cmd in commands.COMMAND_NAMES:
51
            self.cmd_counts[cmd] = 0
52
        self.file_cmd_counts = {}
53
        for fc in commands.FILE_COMMAND_NAMES:
54
            self.file_cmd_counts[fc] = 0
55
        self.parent_counts = {}
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
56
        self.max_parent_count = 0
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
57
        self.committers = set()
58
        self.separate_authors_found = False
59
        self.symlinks_found = False
60
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
61
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
62
        self.lightweight_tags = 0
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
63
        # Blob usage tracking
64
        self.blobs = {}
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
65
        for usage in ['new', 'used', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
66
            self.blobs[usage] = set()
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
67
        self.blob_ref_counts = {}
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
68
        # Head tracking - delegate to the cache manager
69
        self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
70
        # Stuff to cache: a map from mark to # of times that mark is merged
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
71
        self.merges = {}
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
72
        # Stuff to cache: these are maps from mark to sets
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
73
        self.rename_old_paths = {}
74
        self.copy_source_paths = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
76
    def post_process(self):
77
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
78
        cmd_names = commands.COMMAND_NAMES
79
        fc_names = commands.FILE_COMMAND_NAMES
80
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
81
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
82
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
83
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
84
85
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
87
            p_names = []
88
            p_values = []
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
89
            for i in xrange(0, self.max_parent_count + 1):
90
                if i in self.parent_counts:
91
                    count = self.parent_counts[i]
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
92
                    p_names.append("parents-%d" % i)
93
                    p_values.append(count)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
94
            merges_count = len(self.merges.keys())
95
            p_names.append('total revisions merged')
96
            p_values.append(merges_count)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
97
            flags = {
98
                'separate authors found': self.separate_authors_found,
99
                'executables': self.executables_found,
100
                'symlinks': self.symlinks_found,
101
                'blobs referenced by SHA': self.sha_blob_references,
102
                }
103
            self._dump_stats_group("Parent counts", p_names, p_values, str)
104
            self._dump_stats_group("Commit analysis", flags.keys(),
105
                flags.values(), _found)
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
106
            heads = helpers.invert_dictset(self.cache_mgr.heads)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
107
            self._dump_stats_group("Head analysis", heads.keys(),
108
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
109
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
110
            self._dump_stats_group("Merges", self.merges.keys(),
111
                self.merges.values(), None)
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
112
            # We only show the rename old path and copy source paths when -vv
113
            # (verbose=2) is specified. The output here for mysql's data can't
114
            # be parsed currently so this bit of code needs more work anyhow ..
115
            if self.verbose >= 2:
116
                self._dump_stats_group("Rename old paths",
117
                    self.rename_old_paths.keys(),
118
                    self.rename_old_paths.values(), len,
119
                    _iterable_as_config_list)
120
                self._dump_stats_group("Copy source paths",
121
                    self.copy_source_paths.keys(),
122
                    self.copy_source_paths.values(), len,
123
                    _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
124
125
        # Blob stats
126
        if self.cmd_counts['blob']:
127
            # In verbose mode, don't list every blob used
128
            if self.verbose:
129
                del self.blobs['used']
130
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
131
                self.blobs.values(), len, _iterable_as_config_list)
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
132
        if self.blob_ref_counts:
133
            blobs_by_count = helpers.invert_dict(self.blob_ref_counts)
134
            self._dump_stats_group("Blob reference counts",
135
                blobs_by_count.keys(),
136
                blobs_by_count.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
137
138
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
139
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
140
            reset_stats = {
141
                'lightweight tags': self.lightweight_tags,
142
                }
143
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
144
                reset_stats.values())
145
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
146
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
147
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
148
        """Dump a statistics group.
149
        
150
        In verbose mode, do so as a config file so
151
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
152
        :param normal_formatter: the callable to apply to the value
153
          before displaying it in normal mode
154
        :param verbose_formatter: the callable to apply to the value
155
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
156
        """
157
        if self.verbose:
158
            print "[%s]" % (title,)
159
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
160
                if verbose_formatter is not None:
161
                    value = verbose_formatter(value)
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
162
                if type(name) == str:
163
                    name = name.replace(' ', '-')
164
                print "%s = %s" % (name, value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
165
            print ""
166
        else:
167
            print "%s:" % (title,)
168
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
169
                if normal_formatter is not None:
170
                    value = normal_formatter(value)
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
171
                print "\t%s\t%s" % (value, name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
172
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
173
    def progress_handler(self, cmd):
174
        """Process a ProgressCommand."""
175
        self.cmd_counts[cmd.name] += 1
176
177
    def blob_handler(self, cmd):
178
        """Process a BlobCommand."""
179
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
180
        if cmd.mark is None:
181
            self.blobs['unmarked'].add(cmd.id)
182
        else:
183
            self.blobs['new'].add(cmd.id)
0.64.39 by Ian Clatworthy
fix blob tracking when marks are reused
184
            # Marks can be re-used so remove it from used if already there.
185
            # Note: we definitely do NOT want to remove it from multi if
186
            # it's already in that set.
187
            try:
188
                self.blobs['used'].remove(cmd.id)
189
            except KeyError:
190
                pass
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
191
192
    def checkpoint_handler(self, cmd):
193
        """Process a CheckpointCommand."""
194
        self.cmd_counts[cmd.name] += 1
195
196
    def commit_handler(self, cmd):
197
        """Process a CommitCommand."""
198
        self.cmd_counts[cmd.name] += 1
199
        self.committers.add(cmd.committer)
200
        if cmd.author is not None:
201
            self.separate_authors_found = True
202
        for fc in cmd.file_iter():
203
            self.file_cmd_counts[fc.name] += 1
204
            if isinstance(fc, commands.FileModifyCommand):
205
                if fc.is_executable:
206
                    self.executables_found = True
207
                if fc.kind == commands.SYMLINK_KIND:
208
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
209
                if fc.dataref is not None:
210
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
211
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
212
                    else:
213
                        self.sha_blob_references = True
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
214
            elif isinstance(fc, commands.FileRenameCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
215
                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
216
            elif isinstance(fc, commands.FileCopyCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
217
                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
218
0.64.30 by Ian Clatworthy
add heads analysis to info processor
219
        # Track the heads
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
220
        parents = self.cache_mgr.track_heads(cmd)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
221
222
        # Track the parent counts
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
223
        parent_count = len(parents)
224
        if self.parent_counts.has_key(parent_count):
225
            self.parent_counts[parent_count] += 1
226
        else:
227
            self.parent_counts[parent_count] = 1
228
            if parent_count > self.max_parent_count:
229
                self.max_parent_count = parent_count
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
230
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
231
        # Remember the merges
232
        if cmd.merges:
233
            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
234
            for merge in cmd.merges:
235
                if merge in self.merges:
236
                    self.merges[merge] += 1
237
                else:
238
                    self.merges[merge] = 1
239
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
240
    def reset_handler(self, cmd):
241
        """Process a ResetCommand."""
242
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
243
        if cmd.ref.startswith('refs/tags/'):
244
            self.lightweight_tags += 1
245
        else:
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
246
            if cmd.from_ is not None:
247
                self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
248
249
    def tag_handler(self, cmd):
250
        """Process a TagCommand."""
251
        self.cmd_counts[cmd.name] += 1
252
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
253
    def _track_blob(self, mark):
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
254
        if mark in self.blob_ref_counts:
255
            self.blob_ref_counts[mark] += 1
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
256
            pass
257
        elif mark in self.blobs['used']:
0.64.168 by Ian Clatworthy
blob reference counting, not just sticky vs otherwise
258
            self.blob_ref_counts[mark] = 2
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
259
            self.blobs['used'].remove(mark)
260
        elif mark in self.blobs['new']:
261
            self.blobs['used'].add(mark)
262
            self.blobs['new'].remove(mark)
263
        else:
264
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
265
266
def _found(b):
267
    """Format a found boolean as a string."""
268
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
269
0.64.30 by Ian Clatworthy
add heads analysis to info processor
270
def _iterable_as_config_list(s):
271
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
272
    
273
    To match what ConfigObj expects, a single item list has a trailing comma.
274
    """
275
    items = sorted(s)
276
    if len(items) == 1:
277
        return "%s," % (items[0],)
278
    else:
279
        return ", ".join(items)