/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
16
"""Import processor that dump stats about the input (and doesn't import)."""
17
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
18
from __future__ import absolute_import
19
20
from .. import (
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
21
    reftracker,
22
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
23
from ..helpers import (
0.139.1 by Jelmer Vernooij
Import helper functions that have been removed from python-fastimport.
24
    invert_dict,
25
    invert_dictset,
26
    )
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
27
from fastimport import (
28
    commands,
29
    processor,
30
    )
31
import stat
32
33
34
class InfoProcessor(processor.ImportProcessor):
35
    """An import processor that dumps statistics about the input.
36
37
    No changes to the current repository are made.
38
39
    As well as providing useful information about an import
40
    stream before importing it, this processor is useful for
41
    benchmarking the speed at which data can be extracted from
42
    the source.
43
    """
44
45
    def __init__(self, params=None, verbose=0, outf=None):
46
        processor.ImportProcessor.__init__(self, params, verbose,
47
            outf=outf)
48
49
    def pre_process(self):
50
        # Init statistics
51
        self.cmd_counts = {}
52
        for cmd in commands.COMMAND_NAMES:
53
            self.cmd_counts[cmd] = 0
54
        self.file_cmd_counts = {}
55
        for fc in commands.FILE_COMMAND_NAMES:
56
            self.file_cmd_counts[fc] = 0
57
        self.parent_counts = {}
58
        self.max_parent_count = 0
59
        self.committers = set()
60
        self.separate_authors_found = False
61
        self.symlinks_found = False
62
        self.executables_found = False
63
        self.sha_blob_references = False
64
        self.lightweight_tags = 0
65
        # Blob usage tracking
66
        self.blobs = {}
67
        for usage in ['new', 'used', 'unknown', 'unmarked']:
68
            self.blobs[usage] = set()
69
        self.blob_ref_counts = {}
70
        # Head tracking
71
        self.reftracker = reftracker.RefTracker()
72
        # Stuff to cache: a map from mark to # of times that mark is merged
73
        self.merges = {}
74
        # Stuff to cache: these are maps from mark to sets
75
        self.rename_old_paths = {}
76
        self.copy_source_paths = {}
77
78
    def post_process(self):
79
        # Dump statistics
80
        cmd_names = commands.COMMAND_NAMES
81
        fc_names = commands.FILE_COMMAND_NAMES
82
        self._dump_stats_group("Command counts",
83
            [(c, self.cmd_counts[c]) for c in cmd_names], str)
84
        self._dump_stats_group("File command counts", 
85
            [(c, self.file_cmd_counts[c]) for c in fc_names], str)
86
87
        # Commit stats
88
        if self.cmd_counts['commit']:
89
            p_items = []
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
90
            for i in range(self.max_parent_count + 1):
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
91
                if i in self.parent_counts:
92
                    count = self.parent_counts[i]
93
                    p_items.append(("parents-%d" % i, count))
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
94
            merges_count = len(self.merges)
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
95
            p_items.append(('total revisions merged', merges_count))
96
            flags = {
97
                'separate authors found': self.separate_authors_found,
98
                'executables': self.executables_found,
99
                'symlinks': self.symlinks_found,
100
                'blobs referenced by SHA': self.sha_blob_references,
101
                }
102
            self._dump_stats_group("Parent counts", p_items, str)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
103
            self._dump_stats_group("Commit analysis", flags.items(), _found)
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
104
            heads = invert_dictset(self.reftracker.heads)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
105
            self._dump_stats_group("Head analysis", heads.items(), None,
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
106
                                    _iterable_as_config_list)
107
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
108
            self._dump_stats_group("Merges", self.merges.items(), None)
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
109
            # We only show the rename old path and copy source paths when -vv
110
            # (verbose=2) is specified. The output here for mysql's data can't
111
            # be parsed currently so this bit of code needs more work anyhow ..
112
            if self.verbose >= 2:
113
                self._dump_stats_group("Rename old paths",
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
114
                    self.rename_old_paths.items(), len,
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
115
                    _iterable_as_config_list)
116
                self._dump_stats_group("Copy source paths",
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
117
                    self.copy_source_paths.items(), len,
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
118
                    _iterable_as_config_list)
119
120
        # Blob stats
121
        if self.cmd_counts['blob']:
122
            # In verbose mode, don't list every blob used
123
            if self.verbose:
124
                del self.blobs['used']
125
            self._dump_stats_group("Blob usage tracking",
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
126
                self.blobs.items(), len, _iterable_as_config_list)
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
127
        if self.blob_ref_counts:
128
            blobs_by_count = invert_dict(self.blob_ref_counts)
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
129
            blob_items = sorted(blobs_by_count.items())
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
130
            self._dump_stats_group("Blob reference counts",
131
                blob_items, len, _iterable_as_config_list)
132
133
        # Other stats
134
        if self.cmd_counts['reset']:
135
            reset_stats = {
136
                'lightweight tags': self.lightweight_tags,
137
                }
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
138
            self._dump_stats_group("Reset analysis", reset_stats.items())
0.64.349 by Jelmer Vernooij
Reimport some modules removed from python-fastimport 0.9.2.
139
140
    def _dump_stats_group(self, title, items, normal_formatter=None,
141
        verbose_formatter=None):
142
        """Dump a statistics group.
143
        
144
        In verbose mode, do so as a config file so
145
        that other processors can load the information if they want to.
146
        :param normal_formatter: the callable to apply to the value
147
          before displaying it in normal mode
148
        :param verbose_formatter: the callable to apply to the value
149
          before displaying it in verbose mode
150
        """
151
        if self.verbose:
152
            self.outf.write("[%s]\n" % (title,))
153
            for name, value in items:
154
                if verbose_formatter is not None:
155
                    value = verbose_formatter(value)
156
                if type(name) == str:
157
                    name = name.replace(' ', '-')
158
                self.outf.write("%s = %s\n" % (name, value))
159
            self.outf.write("\n")
160
        else:
161
            self.outf.write("%s:\n" % (title,))
162
            for name, value in items:
163
                if normal_formatter is not None:
164
                    value = normal_formatter(value)
165
                self.outf.write("\t%s\t%s\n" % (value, name))
166
167
    def progress_handler(self, cmd):
168
        """Process a ProgressCommand."""
169
        self.cmd_counts[cmd.name] += 1
170
171
    def blob_handler(self, cmd):
172
        """Process a BlobCommand."""
173
        self.cmd_counts[cmd.name] += 1
174
        if cmd.mark is None:
175
            self.blobs['unmarked'].add(cmd.id)
176
        else:
177
            self.blobs['new'].add(cmd.id)
178
            # Marks can be re-used so remove it from used if already there.
179
            # Note: we definitely do NOT want to remove it from multi if
180
            # it's already in that set.
181
            try:
182
                self.blobs['used'].remove(cmd.id)
183
            except KeyError:
184
                pass
185
186
    def checkpoint_handler(self, cmd):
187
        """Process a CheckpointCommand."""
188
        self.cmd_counts[cmd.name] += 1
189
190
    def commit_handler(self, cmd):
191
        """Process a CommitCommand."""
192
        self.cmd_counts[cmd.name] += 1
193
        self.committers.add(cmd.committer)
194
        if cmd.author is not None:
195
            self.separate_authors_found = True
196
        for fc in cmd.iter_files():
197
            self.file_cmd_counts[fc.name] += 1
198
            if isinstance(fc, commands.FileModifyCommand):
199
                if fc.mode & 0111:
200
                    self.executables_found = True
201
                if stat.S_ISLNK(fc.mode):
202
                    self.symlinks_found = True
203
                if fc.dataref is not None:
204
                    if fc.dataref[0] == ':':
205
                        self._track_blob(fc.dataref)
206
                    else:
207
                        self.sha_blob_references = True
208
            elif isinstance(fc, commands.FileRenameCommand):
209
                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
210
            elif isinstance(fc, commands.FileCopyCommand):
211
                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
212
213
        # Track the heads
214
        parents = self.reftracker.track_heads(cmd)
215
216
        # Track the parent counts
217
        parent_count = len(parents)
218
        if self.parent_counts.has_key(parent_count):
219
            self.parent_counts[parent_count] += 1
220
        else:
221
            self.parent_counts[parent_count] = 1
222
            if parent_count > self.max_parent_count:
223
                self.max_parent_count = parent_count
224
225
        # Remember the merges
226
        if cmd.merges:
227
            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
228
            for merge in cmd.merges:
229
                if merge in self.merges:
230
                    self.merges[merge] += 1
231
                else:
232
                    self.merges[merge] = 1
233
234
    def reset_handler(self, cmd):
235
        """Process a ResetCommand."""
236
        self.cmd_counts[cmd.name] += 1
237
        if cmd.ref.startswith('refs/tags/'):
238
            self.lightweight_tags += 1
239
        else:
240
            if cmd.from_ is not None:
241
                self.reftracker.track_heads_for_ref(
242
                    cmd.ref, cmd.from_)
243
244
    def tag_handler(self, cmd):
245
        """Process a TagCommand."""
246
        self.cmd_counts[cmd.name] += 1
247
248
    def feature_handler(self, cmd):
249
        """Process a FeatureCommand."""
250
        self.cmd_counts[cmd.name] += 1
251
        feature = cmd.feature_name
252
        if feature not in commands.FEATURE_NAMES:
253
            self.warning("feature %s is not supported - parsing may fail"
254
                % (feature,))
255
256
    def _track_blob(self, mark):
257
        if mark in self.blob_ref_counts:
258
            self.blob_ref_counts[mark] += 1
259
            pass
260
        elif mark in self.blobs['used']:
261
            self.blob_ref_counts[mark] = 2
262
            self.blobs['used'].remove(mark)
263
        elif mark in self.blobs['new']:
264
            self.blobs['used'].add(mark)
265
            self.blobs['new'].remove(mark)
266
        else:
267
            self.blobs['unknown'].add(mark)
268
269
def _found(b):
270
    """Format a found boolean as a string."""
271
    return ['no', 'found'][b]
272
273
def _iterable_as_config_list(s):
274
    """Format an iterable as a sequence of comma-separated strings.
275
    
276
    To match what ConfigObj expects, a single item list has a trailing comma.
277
    """
278
    items = sorted(s)
279
    if len(items) == 1:
280
        return "%s," % (items[0],)
281
    else:
282
        return ", ".join(items)