/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
16
"""Import processor that dump stats about the input (and doesn't import)."""
17
18
from bzrlib.plugins.fastimport import (
19
    reftracker,
20
    )
21
from fastimport import (
22
    commands,
23
    processor,
24
    )
25
from fastimport.helpers import (
26
    invert_dict,
27
    invert_dictset,
28
    )
29
import stat
30
31
32
class InfoProcessor(processor.ImportProcessor):
33
    """An import processor that dumps statistics about the input.
34
35
    No changes to the current repository are made.
36
37
    As well as providing useful information about an import
38
    stream before importing it, this processor is useful for
39
    benchmarking the speed at which data can be extracted from
40
    the source.
41
    """
42
43
    def __init__(self, params=None, verbose=0, outf=None):
44
        processor.ImportProcessor.__init__(self, params, verbose,
45
            outf=outf)
46
47
    def pre_process(self):
48
        # Init statistics
49
        self.cmd_counts = {}
50
        for cmd in commands.COMMAND_NAMES:
51
            self.cmd_counts[cmd] = 0
52
        self.file_cmd_counts = {}
53
        for fc in commands.FILE_COMMAND_NAMES:
54
            self.file_cmd_counts[fc] = 0
55
        self.parent_counts = {}
56
        self.max_parent_count = 0
57
        self.committers = set()
58
        self.separate_authors_found = False
59
        self.symlinks_found = False
60
        self.executables_found = False
61
        self.sha_blob_references = False
62
        self.lightweight_tags = 0
63
        # Blob usage tracking
64
        self.blobs = {}
65
        for usage in ['new', 'used', 'unknown', 'unmarked']:
66
            self.blobs[usage] = set()
67
        self.blob_ref_counts = {}
68
        # Head tracking
69
        self.reftracker = reftracker.RefTracker()
70
        # Stuff to cache: a map from mark to # of times that mark is merged
71
        self.merges = {}
72
        # Stuff to cache: these are maps from mark to sets
73
        self.rename_old_paths = {}
74
        self.copy_source_paths = {}
75
76
    def post_process(self):
77
        # Dump statistics
78
        cmd_names = commands.COMMAND_NAMES
79
        fc_names = commands.FILE_COMMAND_NAMES
80
        self._dump_stats_group("Command counts",
81
            [(c, self.cmd_counts[c]) for c in cmd_names], str)
82
        self._dump_stats_group("File command counts", 
83
            [(c, self.file_cmd_counts[c]) for c in fc_names], str)
84
85
        # Commit stats
86
        if self.cmd_counts['commit']:
87
            p_items = []
88
            for i in xrange(0, self.max_parent_count + 1):
89
                if i in self.parent_counts:
90
                    count = self.parent_counts[i]
91
                    p_items.append(("parents-%d" % i, count))
92
            merges_count = len(self.merges.keys())
93
            p_items.append(('total revisions merged', merges_count))
94
            flags = {
95
                'separate authors found': self.separate_authors_found,
96
                'executables': self.executables_found,
97
                'symlinks': self.symlinks_found,
98
                'blobs referenced by SHA': self.sha_blob_references,
99
                }
100
            self._dump_stats_group("Parent counts", p_items, str)
101
            self._dump_stats_group("Commit analysis", flags.iteritems(), _found)
102
            heads = invert_dictset(self.reftracker.heads)
103
            self._dump_stats_group("Head analysis", heads.iteritems(), None,
104
                                    _iterable_as_config_list)
105
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
106
            self._dump_stats_group("Merges", self.merges.iteritems(), None)
107
            # We only show the rename old path and copy source paths when -vv
108
            # (verbose=2) is specified. The output here for mysql's data can't
109
            # be parsed currently so this bit of code needs more work anyhow ..
110
            if self.verbose >= 2:
111
                self._dump_stats_group("Rename old paths",
112
                    self.rename_old_paths.iteritems(), len,
113
                    _iterable_as_config_list)
114
                self._dump_stats_group("Copy source paths",
115
                    self.copy_source_paths.iteritems(), len,
116
                    _iterable_as_config_list)
117
118
        # Blob stats
119
        if self.cmd_counts['blob']:
120
            # In verbose mode, don't list every blob used
121
            if self.verbose:
122
                del self.blobs['used']
123
            self._dump_stats_group("Blob usage tracking",
124
                self.blobs.iteritems(), len, _iterable_as_config_list)
125
        if self.blob_ref_counts:
126
            blobs_by_count = invert_dict(self.blob_ref_counts)
127
            blob_items = blobs_by_count.items()
128
            blob_items.sort()
129
            self._dump_stats_group("Blob reference counts",
130
                blob_items, len, _iterable_as_config_list)
131
132
        # Other stats
133
        if self.cmd_counts['reset']:
134
            reset_stats = {
135
                'lightweight tags': self.lightweight_tags,
136
                }
137
            self._dump_stats_group("Reset analysis", reset_stats.iteritems())
138
139
    def _dump_stats_group(self, title, items, normal_formatter=None,
140
        verbose_formatter=None):
141
        """Dump a statistics group.
142
        
143
        In verbose mode, do so as a config file so
144
        that other processors can load the information if they want to.
145
        :param normal_formatter: the callable to apply to the value
146
          before displaying it in normal mode
147
        :param verbose_formatter: the callable to apply to the value
148
          before displaying it in verbose mode
149
        """
150
        if self.verbose:
151
            self.outf.write("[%s]\n" % (title,))
152
            for name, value in items:
153
                if verbose_formatter is not None:
154
                    value = verbose_formatter(value)
155
                if type(name) == str:
156
                    name = name.replace(' ', '-')
157
                self.outf.write("%s = %s\n" % (name, value))
158
            self.outf.write("\n")
159
        else:
160
            self.outf.write("%s:\n" % (title,))
161
            for name, value in items:
162
                if normal_formatter is not None:
163
                    value = normal_formatter(value)
164
                self.outf.write("\t%s\t%s\n" % (value, name))
165
166
    def progress_handler(self, cmd):
167
        """Process a ProgressCommand."""
168
        self.cmd_counts[cmd.name] += 1
169
170
    def blob_handler(self, cmd):
171
        """Process a BlobCommand."""
172
        self.cmd_counts[cmd.name] += 1
173
        if cmd.mark is None:
174
            self.blobs['unmarked'].add(cmd.id)
175
        else:
176
            self.blobs['new'].add(cmd.id)
177
            # Marks can be re-used so remove it from used if already there.
178
            # Note: we definitely do NOT want to remove it from multi if
179
            # it's already in that set.
180
            try:
181
                self.blobs['used'].remove(cmd.id)
182
            except KeyError:
183
                pass
184
185
    def checkpoint_handler(self, cmd):
186
        """Process a CheckpointCommand."""
187
        self.cmd_counts[cmd.name] += 1
188
189
    def commit_handler(self, cmd):
190
        """Process a CommitCommand."""
191
        self.cmd_counts[cmd.name] += 1
192
        self.committers.add(cmd.committer)
193
        if cmd.author is not None:
194
            self.separate_authors_found = True
195
        for fc in cmd.iter_files():
196
            self.file_cmd_counts[fc.name] += 1
197
            if isinstance(fc, commands.FileModifyCommand):
198
                if fc.mode & 0111:
199
                    self.executables_found = True
200
                if stat.S_ISLNK(fc.mode):
201
                    self.symlinks_found = True
202
                if fc.dataref is not None:
203
                    if fc.dataref[0] == ':':
204
                        self._track_blob(fc.dataref)
205
                    else:
206
                        self.sha_blob_references = True
207
            elif isinstance(fc, commands.FileRenameCommand):
208
                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
209
            elif isinstance(fc, commands.FileCopyCommand):
210
                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
211
212
        # Track the heads
213
        parents = self.reftracker.track_heads(cmd)
214
215
        # Track the parent counts
216
        parent_count = len(parents)
217
        if self.parent_counts.has_key(parent_count):
218
            self.parent_counts[parent_count] += 1
219
        else:
220
            self.parent_counts[parent_count] = 1
221
            if parent_count > self.max_parent_count:
222
                self.max_parent_count = parent_count
223
224
        # Remember the merges
225
        if cmd.merges:
226
            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
227
            for merge in cmd.merges:
228
                if merge in self.merges:
229
                    self.merges[merge] += 1
230
                else:
231
                    self.merges[merge] = 1
232
233
    def reset_handler(self, cmd):
234
        """Process a ResetCommand."""
235
        self.cmd_counts[cmd.name] += 1
236
        if cmd.ref.startswith('refs/tags/'):
237
            self.lightweight_tags += 1
238
        else:
239
            if cmd.from_ is not None:
240
                self.reftracker.track_heads_for_ref(
241
                    cmd.ref, cmd.from_)
242
243
    def tag_handler(self, cmd):
244
        """Process a TagCommand."""
245
        self.cmd_counts[cmd.name] += 1
246
247
    def feature_handler(self, cmd):
248
        """Process a FeatureCommand."""
249
        self.cmd_counts[cmd.name] += 1
250
        feature = cmd.feature_name
251
        if feature not in commands.FEATURE_NAMES:
252
            self.warning("feature %s is not supported - parsing may fail"
253
                % (feature,))
254
255
    def _track_blob(self, mark):
256
        if mark in self.blob_ref_counts:
257
            self.blob_ref_counts[mark] += 1
258
            pass
259
        elif mark in self.blobs['used']:
260
            self.blob_ref_counts[mark] = 2
261
            self.blobs['used'].remove(mark)
262
        elif mark in self.blobs['new']:
263
            self.blobs['used'].add(mark)
264
            self.blobs['new'].remove(mark)
265
        else:
266
            self.blobs['unknown'].add(mark)
267
268
def _found(b):
269
    """Format a found boolean as a string."""
270
    return ['no', 'found'][b]
271
272
def _iterable_as_config_list(s):
273
    """Format an iterable as a sequence of comma-separated strings.
274
    
275
    To match what ConfigObj expects, a single item list has a trailing comma.
276
    """
277
    items = sorted(s)
278
    if len(items) == 1:
279
        return "%s," % (items[0],)
280
    else:
281
        return ", ".join(items)