/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Import processor that dump stats about the input (and doesn't import)."""
18
19
20
from bzrlib.trace import (
21
    note,
22
    warning,
23
    )
24
from bzrlib.plugins.fastimport import (
25
    commands,
0.64.30 by Ian Clatworthy
add heads analysis to info processor
26
    helpers,
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
27
    processor,
28
    )
29
30
31
class InfoProcessor(processor.ImportProcessor):
32
    """An import processor that dumps statistics about the input.
33
34
    No changes to the current repository are made.
35
36
    As well as providing useful information about an import
37
    stream before importing it, this processor is useful for
38
    benchmarking the speed at which data can be extracted from
39
    the source.
40
    """
41
0.64.8 by Ian Clatworthy
custom parameters for processors
42
    def __init__(self, target=None, params=None, verbose=False):
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
43
        # Allow creation without a target
0.64.8 by Ian Clatworthy
custom parameters for processors
44
        processor.ImportProcessor.__init__(self, target, params, verbose)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
45
46
    def pre_process(self):
47
        # Init statistics
48
        self.cmd_counts = {}
49
        for cmd in commands.COMMAND_NAMES:
50
            self.cmd_counts[cmd] = 0
51
        self.file_cmd_counts = {}
52
        for fc in commands.FILE_COMMAND_NAMES:
53
            self.file_cmd_counts[fc] = 0
54
        self.parent_counts = {}
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
55
        self.max_parent_count = 0
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
56
        self.committers = set()
57
        self.separate_authors_found = False
58
        self.symlinks_found = False
59
        self.executables_found = False
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
60
        self.sha_blob_references = False
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
61
        self.lightweight_tags = 0
62
        self.named_branches = []
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
63
        # Blob usage tracking
64
        self.blobs = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
65
        for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
66
            self.blobs[usage] = set()
0.64.30 by Ian Clatworthy
add heads analysis to info processor
67
        # Head tracking: map of commit mark to ref
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
68
        # (Head tracking future - delegate to the cache manager)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
69
        self.heads = {}
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
70
        self.last_ids = {}
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
71
        #self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0)
72
        # Stuff to cache: a map from mark to # of times that mark is merged
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
73
        self.merges = {}
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
74
        # Stuff to cache: these are maps from mark to sets
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
75
        self.rename_old_paths = {}
76
        self.copy_source_paths = {}
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
77
78
    def post_process(self):
79
        # Dump statistics
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
80
        cmd_names = commands.COMMAND_NAMES
81
        fc_names = commands.FILE_COMMAND_NAMES
82
        cmd_values = [self.cmd_counts[c] for c in cmd_names]
83
        fc_values = [self.file_cmd_counts[c] for c in fc_names]
84
        self._dump_stats_group("Command counts", cmd_names, cmd_values, str)
85
        self._dump_stats_group("File command counts", fc_names, fc_values, str)
86
87
        # Commit stats
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
88
        if self.cmd_counts['commit']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
89
            p_names = []
90
            p_values = []
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
91
            for i in xrange(0, self.max_parent_count + 1):
92
                if i in self.parent_counts:
93
                    count = self.parent_counts[i]
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
94
                    p_names.append("parents-%d" % i)
95
                    p_values.append(count)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
96
            merges_count = len(self.merges.keys())
97
            p_names.append('total revisions merged')
98
            p_values.append(merges_count)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
99
            flags = {
100
                'separate authors found': self.separate_authors_found,
101
                'executables': self.executables_found,
102
                'symlinks': self.symlinks_found,
103
                'blobs referenced by SHA': self.sha_blob_references,
104
                }
105
            self._dump_stats_group("Parent counts", p_names, p_values, str)
106
            self._dump_stats_group("Commit analysis", flags.keys(),
107
                flags.values(), _found)
0.64.30 by Ian Clatworthy
add heads analysis to info processor
108
            heads = helpers.invert_dict(self.heads)
109
            self._dump_stats_group("Head analysis", heads.keys(),
110
                heads.values(), None, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
111
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
112
            self._dump_stats_group("Merges", self.merges.keys(),
113
                self.merges.values(), None)
114
            self._dump_stats_group("Rename old paths", self.rename_old_paths.keys(),
115
                self.rename_old_paths.values(), len, _iterable_as_config_list)
116
            self._dump_stats_group("Copy source paths", self.copy_source_paths.keys(),
117
                self.copy_source_paths.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
118
119
        # Blob stats
120
        if self.cmd_counts['blob']:
121
            # In verbose mode, don't list every blob used
122
            if self.verbose:
123
                del self.blobs['used']
124
            self._dump_stats_group("Blob usage tracking", self.blobs.keys(),
0.64.30 by Ian Clatworthy
add heads analysis to info processor
125
                self.blobs.values(), len, _iterable_as_config_list)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
126
127
        # Other stats
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
128
        if self.cmd_counts['reset']:
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
129
            reset_stats = {
130
                'lightweight tags': self.lightweight_tags,
131
                'other resets': self.named_branches,
132
                }
133
            self._dump_stats_group("Reset analysis", reset_stats.keys(),
134
                reset_stats.values())
135
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
136
    def _dump_stats_group(self, title, names, values, normal_formatter=None,
137
        verbose_formatter=None):
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
138
        """Dump a statistics group.
139
        
140
        In verbose mode, do so as a config file so
141
        that other processors can load the information if they want to.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
142
        :param normal_formatter: the callable to apply to the value
143
          before displaying it in normal mode
144
        :param verbose_formatter: the callable to apply to the value
145
          before displaying it in verbose mode
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
146
        """
147
        if self.verbose:
148
            print "[%s]" % (title,)
149
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
150
                if verbose_formatter is not None:
151
                    value = verbose_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
152
                print "%s = %s" % (name.replace(' ', '-'),value)
153
            print ""
154
        else:
155
            print "%s:" % (title,)
156
            for name, value in zip(names, values):
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
157
                if normal_formatter is not None:
158
                    value = normal_formatter(value)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
159
                print "\t%s\t%s" % (value,name)
0.64.9 by Ian Clatworthy
dump parameter for info processor
160
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
161
    def progress_handler(self, cmd):
162
        """Process a ProgressCommand."""
163
        self.cmd_counts[cmd.name] += 1
164
165
    def blob_handler(self, cmd):
166
        """Process a BlobCommand."""
167
        self.cmd_counts[cmd.name] += 1
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
168
        if cmd.mark is None:
169
            self.blobs['unmarked'].add(cmd.id)
170
        else:
171
            self.blobs['new'].add(cmd.id)
0.64.39 by Ian Clatworthy
fix blob tracking when marks are reused
172
            # Marks can be re-used so remove it from used if already there.
173
            # Note: we definitely do NOT want to remove it from multi if
174
            # it's already in that set.
175
            try:
176
                self.blobs['used'].remove(cmd.id)
177
            except KeyError:
178
                pass
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
179
180
    def checkpoint_handler(self, cmd):
181
        """Process a CheckpointCommand."""
182
        self.cmd_counts[cmd.name] += 1
183
184
    def commit_handler(self, cmd):
185
        """Process a CommitCommand."""
186
        self.cmd_counts[cmd.name] += 1
187
        self.committers.add(cmd.committer)
188
        if cmd.author is not None:
189
            self.separate_authors_found = True
190
        for fc in cmd.file_iter():
191
            self.file_cmd_counts[fc.name] += 1
192
            if isinstance(fc, commands.FileModifyCommand):
193
                if fc.is_executable:
194
                    self.executables_found = True
195
                if fc.kind == commands.SYMLINK_KIND:
196
                    self.symlinks_found = True
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
197
                if fc.dataref is not None:
198
                    if fc.dataref[0] == ':':
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
199
                        self._track_blob(fc.dataref)
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
200
                    else:
201
                        self.sha_blob_references = True
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
202
            elif isinstance(fc, commands.FileRenameCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
203
                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
204
            elif isinstance(fc, commands.FileCopyCommand):
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
205
                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
206
0.64.30 by Ian Clatworthy
add heads analysis to info processor
207
        # Track the heads
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
208
        # (future: parents = self.cache_mgr.track_heads(cmd))
0.64.60 by Ian Clatworthy
support merges when from clause implicit
209
        if cmd.from_ is not None:
210
            parents = [cmd.from_]
0.64.55 by Ian Clatworthy
fix head tracking when from clause implied
211
        else:
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
212
            last_id = self.last_ids.get(cmd.ref)
213
            if last_id is not None:
214
                parents = [last_id]
215
            else:
216
                parents = []
0.64.60 by Ian Clatworthy
support merges when from clause implicit
217
        parents.extend(cmd.merges)
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
218
        for parent in parents:
0.64.30 by Ian Clatworthy
add heads analysis to info processor
219
            try:
220
                del self.heads[parent]
221
            except KeyError:
0.64.42 by Ian Clatworthy
removed parent not found warnings as not a problem
222
                # it's ok if the parent isn't there - another
223
                # commit may have already removed it
224
                pass
0.64.35 by Ian Clatworthy
identify unmarked blobs and commits by line numbers
225
        self.heads[cmd.id] = cmd.ref
226
        self.last_ids[cmd.ref] = cmd.id
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
227
228
        # Track the parent counts
0.64.105 by Ian Clatworthy
handle > 16 parents in fast-import-info
229
        parent_count = len(parents)
230
        if self.parent_counts.has_key(parent_count):
231
            self.parent_counts[parent_count] += 1
232
        else:
233
            self.parent_counts[parent_count] = 1
234
            if parent_count > self.max_parent_count:
235
                self.max_parent_count = parent_count
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
236
0.81.11 by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths
237
        # Remember the merges
238
        if cmd.merges:
239
            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
240
            for merge in cmd.merges:
241
                if merge in self.merges:
242
                    self.merges[merge] += 1
243
                else:
244
                    self.merges[merge] = 1
245
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
246
    def reset_handler(self, cmd):
247
        """Process a ResetCommand."""
248
        self.cmd_counts[cmd.name] += 1
0.64.12 by Ian Clatworthy
lightweight tags, filter processor and param validation
249
        if cmd.ref.startswith('refs/tags/'):
250
            self.lightweight_tags += 1
251
        else:
252
            self.named_branches.append(cmd.ref)
0.64.127 by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required
253
            # future ...
254
            #if cmd.from_ is not None:
255
            #    self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
256
257
    def tag_handler(self, cmd):
258
        """Process a TagCommand."""
259
        self.cmd_counts[cmd.name] += 1
260
0.64.23 by Ian Clatworthy
blob tracking analysis and verbose mode for info processor
261
    def _track_blob(self, mark):
262
        if mark in self.blobs['multi']:
263
            pass
264
        elif mark in self.blobs['used']:
265
            self.blobs['multi'].add(mark)
266
            self.blobs['used'].remove(mark)
267
        elif mark in self.blobs['new']:
268
            self.blobs['used'].add(mark)
269
            self.blobs['new'].remove(mark)
270
        else:
271
            self.blobs['unknown'].add(mark)
0.64.1 by Ian Clatworthy
1st cut: gfi parser + --info processing method
272
273
def _found(b):
274
    """Format a found boolean as a string."""
275
    return ['no', 'found'][b]
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
276
0.64.30 by Ian Clatworthy
add heads analysis to info processor
277
def _iterable_as_config_list(s):
278
    """Format an iterable as a sequence of comma-separated strings.
0.64.24 by Ian Clatworthy
smart blob caching using analysis done by --info
279
    
280
    To match what ConfigObj expects, a single item list has a trailing comma.
281
    """
282
    items = sorted(s)
283
    if len(items) == 1:
284
        return "%s," % (items[0],)
285
    else:
286
        return ", ".join(items)