/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
25
    cache_manager,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
26
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
27
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
28
    processor,
29
    )
30
31
32
class InfoProcessor(processor.ImportProcessor):
33
    """An import processor that dumps statistics about the input.
34
35
    No changes to the current repository are made.
36
37
    As well as providing useful information about an import
38
    stream before importing it, this processor is useful for
39
    benchmarking the speed at which data can be extracted from
40
    the source.
41
    """
42
0.64.8 by Ian Clatworthy
custom parameters for processors
43
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
44
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
45
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
46
47
    def pre_process(self):
48
        # Init statistics
49
        self.cmd_counts = {}
50
        for cmd in commands.COMMAND_NAMES:
51
            self.cmd_counts[cmd] = 0
52
        self.file_cmd_counts = {}
53
        for fc in commands.FILE_COMMAND_NAMES:
54
            self.file_cmd_counts[fc] = 0
55
        self.parent_counts = {}
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
56
        self.max_parent_count = 0
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
57
        self.committers = set()
58
        self.separate_authors_found = False
59
        self.symlinks_found = False
60
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
61
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
62
        self.lightweight_tags = 0
63
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
64
        # Blob usage tracking
65
        self.blobs = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
66
        for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
67
            self.blobs[usage] = set()
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
68
        # Head tracking - delegate to the cache manager
69
        self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
70
        # Stuff to cache: a map from mark to # of times that mark is merged
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
71
        self.merges = {}
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
72
        # Stuff to cache: these are maps from mark to sets
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
73
        self.rename_old_paths = {}
74
        self.copy_source_paths = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
75
76
    def post_process(self):
77
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
78
        cmd_names = commands.COMMAND_NAMES
79
        fc_names = commands.FILE_COMMAND_NAMES
80
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
81
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
82
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
83
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
84
85
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
86
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
87
            p_names = []
88
            p_values = []
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
89
            for i in xrange(0, self.max_parent_count + 1):
90
                if i in self.parent_counts:
91
                    count = self.parent_counts[i]
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
92
                    p_names.append("parents-%d" % i)
93
                    p_values.append(count)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
94
            merges_count = len(self.merges.keys())
95
            p_names.append('total revisions merged')
96
            p_values.append(merges_count)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
97
            flags = {
98
                'separate authors found': self.separate_authors_found,
99
                'executables': self.executables_found,
100
                'symlinks': self.symlinks_found,
101
                'blobs referenced by SHA': self.sha_blob_references,
102
                }
103
            self._dump_stats_group("Parent counts", p_names, p_values, str)
104
            self._dump_stats_group("Commit analysis", flags.keys(),
105
                flags.values(), _found)
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
106
            heads = helpers.invert_dictset(self.cache_mgr.heads)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
107
            self._dump_stats_group("Head analysis", heads.keys(),
108
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
109
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
110
            self._dump_stats_group("Merges", self.merges.keys(),
111
                self.merges.values(), None)
112
            self._dump_stats_group("Rename old paths", self.rename_old_paths.keys(),
113
                self.rename_old_paths.values(), len, _iterable_as_config_list)
114
            self._dump_stats_group("Copy source paths", self.copy_source_paths.keys(),
115
                self.copy_source_paths.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
116
117
        # Blob stats
118
        if self.cmd_counts['blob']:
119
            # In verbose mode, don't list every blob used
120
            if self.verbose:
121
                del self.blobs['used']
122
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
123
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
124
125
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
126
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
127
            reset_stats = {
128
                'lightweight tags': self.lightweight_tags,
129
                'other resets': self.named_branches,
130
                }
131
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
132
                reset_stats.values())
133
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
134
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
135
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
136
        """Dump a statistics group.
137
        
138
        In verbose mode, do so as a config file so
139
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
140
        :param normal_formatter: the callable to apply to the value
141
          before displaying it in normal mode
142
        :param verbose_formatter: the callable to apply to the value
143
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
144
        """
145
        if self.verbose:
146
            print "[%s]" % (title,)
147
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
148
                if verbose_formatter is not None:
149
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
150
                print "%s = %s" % (name.replace(' ', '-'),value)
151
            print ""
152
        else:
153
            print "%s:" % (title,)
154
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
155
                if normal_formatter is not None:
156
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
157
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
158
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
159
    def progress_handler(self, cmd):
160
        """Process a ProgressCommand."""
161
        self.cmd_counts[cmd.name] += 1
162
163
    def blob_handler(self, cmd):
164
        """Process a BlobCommand."""
165
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
166
        if cmd.mark is None:
167
            self.blobs['unmarked'].add(cmd.id)
168
        else:
169
            self.blobs['new'].add(cmd.id)
0.64.39 by Ian Clatworthy
fix blob tracking when marks are reused
170
            # Marks can be re-used so remove it from used if already there.
171
            # Note: we definitely do NOT want to remove it from multi if
172
            # it's already in that set.
173
            try:
174
                self.blobs['used'].remove(cmd.id)
175
            except KeyError:
176
                pass
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
177
178
    def checkpoint_handler(self, cmd):
179
        """Process a CheckpointCommand."""
180
        self.cmd_counts[cmd.name] += 1
181
182
    def commit_handler(self, cmd):
183
        """Process a CommitCommand."""
184
        self.cmd_counts[cmd.name] += 1
185
        self.committers.add(cmd.committer)
186
        if cmd.author is not None:
187
            self.separate_authors_found = True
188
        for fc in cmd.file_iter():
189
            self.file_cmd_counts[fc.name] += 1
190
            if isinstance(fc, commands.FileModifyCommand):
191
                if fc.is_executable:
192
                    self.executables_found = True
193
                if fc.kind == commands.SYMLINK_KIND:
194
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
195
                if fc.dataref is not None:
196
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
197
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
198
                    else:
199
                        self.sha_blob_references = True
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
200
            elif isinstance(fc, commands.FileRenameCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
201
                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
202
            elif isinstance(fc, commands.FileCopyCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
203
                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
204
0.64.30 by Ian Clatworthy
add heads analysis to info processor
205
        # Track the heads
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
206
        parents = self.cache_mgr.track_heads(cmd)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
207
208
        # Track the parent counts
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
209
        parent_count = len(parents)
210
        if self.parent_counts.has_key(parent_count):
211
            self.parent_counts[parent_count] += 1
212
        else:
213
            self.parent_counts[parent_count] = 1
214
            if parent_count > self.max_parent_count:
215
                self.max_parent_count = parent_count
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
216
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
217
        # Remember the merges
218
        if cmd.merges:
219
            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
220
            for merge in cmd.merges:
221
                if merge in self.merges:
222
                    self.merges[merge] += 1
223
                else:
224
                    self.merges[merge] = 1
225
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
226
    def reset_handler(self, cmd):
227
        """Process a ResetCommand."""
228
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
229
        if cmd.ref.startswith('refs/tags/'):
230
            self.lightweight_tags += 1
231
        else:
232
            self.named_branches.append(cmd.ref)
0.83.2 by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import
233
            if cmd.from_ is not None:
234
                self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
235
236
    def tag_handler(self, cmd):
237
        """Process a TagCommand."""
238
        self.cmd_counts[cmd.name] += 1
239
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
240
    def _track_blob(self, mark):
241
        if mark in self.blobs['multi']:
242
            pass
243
        elif mark in self.blobs['used']:
244
            self.blobs['multi'].add(mark)
245
            self.blobs['used'].remove(mark)
246
        elif mark in self.blobs['new']:
247
            self.blobs['used'].add(mark)
248
            self.blobs['new'].remove(mark)
249
        else:
250
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
251
252
def _found(b):
253
    """Format a found boolean as a string."""
254
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
255
0.64.30 by Ian Clatworthy
add heads analysis to info processor
256
def _iterable_as_config_list(s):
257
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
258
    
259
    To match what ConfigObj expects, a single item list has a trailing comma.
260
    """
261
    items = sorted(s)
262
    if len(items) == 1:
263
        return "%s," % (items[0],)
264
    else:
265
        return ", ".join(items)