/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.26 by Jelmer Vernooij
Add copyright headers.
1
# Copyright (C) 2005-2008 Canonical Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
16
"""A Simple bzr plugin to generate statistics about the history."""
17
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
18
import re
19
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
20
from bzrlib.lazy_import import lazy_import
21
lazy_import(globals(), """
22
from bzrlib import (
23
    branch,
24
    commands,
25
    config,
26
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
27
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
28
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
29
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
30
    workingtree,
31
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
32
from bzrlib.plugins.stats.classify import classify_delta
33
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
34
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
35
36
37
def find_fullnames(lst):
38
    """Find the fullnames for a list committer names."""
39
40
    counts = {}
41
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
42
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
43
        counts.setdefault(fullname, 0)
44
        counts[fullname] += 1
45
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
46
47
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
48
def collapse_by_person(committers):
49
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
50
51
    Some people commit with a similar username, but different email
52
    address. Which makes it hard to sort out when they have multiple
53
    entries. Email is actually more stable, though, since people
54
    frequently forget to set their name properly.
55
56
    So take the most common username for each email address, and
57
    combine them into one new list.
58
    """
59
    # Just an indirection so that multiple names can reference
60
    # the same record information
61
    name_to_counter = {}
62
    # indirection back to real information
63
    # [[full_rev_list], {email:count}, {fname:count}]
64
    counter_to_info = {}
65
    counter = 0
66
    for email, revs in committers.iteritems():
0.140.21 by Lukáš Lalinský
Some stats fixes:
67
        fullnames = find_fullnames(rev.get_apparent_author() for rev in revs)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
68
        match = None
69
        for count, fullname in fullnames:
70
            if fullname and fullname in name_to_counter:
71
                # We found a match
72
                match = name_to_counter[fullname]
73
                break
74
75
        if match:
76
            # One of the names matched, we need to collapse to records
77
            record = counter_to_info[match]
78
            record[0].extend(revs)
79
            record[1][email] = len(revs)
80
            for count, fullname in fullnames:
81
                name_to_counter[fullname] = match
82
                record[2].setdefault(fullname, 0)
83
                record[2][fullname] += count
84
        else:
85
            # just add this one to the list
86
            counter += 1
87
            for count, fullname in fullnames:
88
                if fullname:
89
                    name_to_counter[fullname] = counter
90
            fname_map = dict((fullname, count) for count, fullname in fullnames)
91
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
92
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
93
            for revs, email, fname in counter_to_info.values()), reverse=True)
94
95
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
96
def sort_by_committer(a_repo, revids):
97
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
98
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
99
    try:
100
        pb.note('getting revisions')
101
        revisions = a_repo.get_revisions(revids)
102
        for count, rev in enumerate(revisions):
103
            pb.update('checking', count, len(revids))
0.140.21 by Lukáš Lalinský
Some stats fixes:
104
            email = config.parse_username(rev.get_apparent_author())[1]
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
105
            committers.setdefault(email, []).append(rev)
106
    finally:
107
        pb.finished()
108
    
109
    return committers
110
111
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
112
def get_info(a_repo, revision):
113
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
114
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
115
    a_repo.lock_read()
116
    try:
117
        pb.note('getting ancestry')
118
        ancestry = a_repo.get_ancestry(revision)[1:]
119
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
120
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
121
    finally:
122
        a_repo.unlock()
123
        pb.finished()
124
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
125
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
126
127
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
128
def get_diff_info(a_repo, start_rev, end_rev):
129
    """Get only the info for new revisions between the two revisions
130
    
131
    This lets us figure out what has actually changed between 2 revisions.
132
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
133
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
134
    committers = {}
135
    a_repo.lock_read()
136
    try:
137
        pb.note('getting ancestry 1')
138
        start_ancestry = set(a_repo.get_ancestry(start_rev))
139
        pb.note('getting ancestry 2')
140
        ancestry = a_repo.get_ancestry(end_rev)[1:]
141
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
142
        pb.note('getting revisions')
143
        revisions = a_repo.get_revisions(ancestry)
144
145
        for count, rev in enumerate(revisions):
146
            pb.update('checking', count, len(ancestry))
147
            try:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
148
                email = config.extract_email_address(rev.get_apparent_author())
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
149
            except errors.BzrError:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
150
                email = rev.get_apparent_author()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
151
            committers.setdefault(email, []).append(rev)
152
    finally:
153
        a_repo.unlock()
154
        pb.finished()
155
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
156
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
157
    return info
158
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
159
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
160
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
161
    """Write out the information"""
162
163
    for count, revs, emails, fullnames in info:
164
        # Get the most common email name
165
        sorted_emails = sorted(((count, email)
166
                               for email,count in emails.iteritems()),
167
                               reverse=True)
168
        sorted_fullnames = sorted(((count, fullname)
169
                                  for fullname,count in fullnames.iteritems()),
170
                                  reverse=True)
171
        to_file.write('%4d %s <%s>\n'
172
                      % (count, sorted_fullnames[0][1],
173
                         sorted_emails[0][1]))
174
        if len(sorted_fullnames) > 1:
175
            print '     Other names:'
176
            for count, fname in sorted_fullnames[1:]:
177
                to_file.write('     %4d ' % (count,))
178
                if fname == '':
179
                    to_file.write("''\n")
180
                else:
181
                    to_file.write("%s\n" % (fname,))
182
        if len(sorted_emails) > 1:
183
            print '     Other email addresses:'
184
            for count, email in sorted_emails:
185
                to_file.write('     %4d ' % (count,))
186
                if email == '':
187
                    to_file.write("''\n")
188
                else:
189
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
190
        if gather_class_stats is not None:
191
            print '     Contributions:'
192
            classes, total = gather_class_stats(revs)
193
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
0.140.24 by Jelmer Vernooij
Remove 2.5ism.
194
                if name is None:
195
                    name = "Unknown"
196
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
197
198
0.140.14 by Jelmer Vernooij
Merge upstream.
199
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
200
    """Generate statistics for LOCATION."""
201
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
202
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
203
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
204
    takes_options = ['revision', 
205
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
206
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
207
    encoding_type = 'replace'
208
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
209
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
210
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
211
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
212
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
213
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
214
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
215
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
216
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
217
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
218
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
219
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
220
        if revision is not None:
221
            last_rev = revision[0].in_history(a_branch).rev_id
222
            if len(revision) > 1:
223
                alternate_rev = revision[1].in_history(a_branch).rev_id
224
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
225
        a_branch.lock_read()
226
        try:
227
            if alternate_rev:
228
                info = get_diff_info(a_branch.repository, last_rev,
229
                                     alternate_rev)
230
            else:
231
                info = get_info(a_branch.repository, last_rev)
232
        finally:
233
            a_branch.unlock()
0.140.25 by Jelmer Vernooij
Merge support for Python2.4.
234
        if show_class:
235
            def fetch_class_stats(revs):
236
                return gather_class_stats(a_branch.repository, revs)
237
        else:
238
            fetch_class_stats = None
0.145.1 by Russ Brown
Made to work with python 2.4
239
        display_info(info, self.outf, fetch_class_stats)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
240
241
0.140.14 by Jelmer Vernooij
Merge upstream.
242
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
243
244
245
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
246
    """Figure out the ancestor graph for LOCATION"""
247
248
    takes_args = ['location?']
249
250
    encoding_type = 'replace'
251
252
    def run(self, location='.'):
253
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
254
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
255
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
256
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
257
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
258
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
259
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
260
            last_rev = wt.last_revision()
261
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
262
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
263
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
264
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
265
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
266
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
267
268
        revno = 0
269
        cur_parents = 0
270
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
271
        for num, node_name, depth, isend in reversed(sorted_graph):
272
            cur_parents += 1
273
            if depth == 0:
274
                revno += 1
275
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
276
277
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
278
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
279
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
280
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
281
def gather_class_stats(repository, revs):
282
    ret = {}
283
    total = 0
284
    pb = ui.ui_factory.nested_progress_bar()
285
    try:
286
        repository.lock_read()
287
        try:
288
            i = 0
289
            for delta in repository.get_deltas_for_revisions(revs):
290
                pb.update("classifying commits", i, len(revs))
291
                for c in classify_delta(delta):
292
                    if not c in ret:
293
                        ret[c] = 0
294
                    ret[c] += 1
295
                    total += 1
296
                i += 1
297
        finally:
298
            repository.unlock()
299
    finally:
300
        pb.finished()
301
    return ret, total
302
303
304
def display_credits(credits):
305
    (coders, documenters, artists, translators) = credits
306
    def print_section(name, lst):
307
        if len(lst) == 0:
308
            return
309
        print "%s:" % name
310
        for name in lst:
311
            print "%s" % name
312
        print ""
313
    print_section("Code", coders)
314
    print_section("Documentation", documenters)
315
    print_section("Art", artists)
316
    print_section("Translations", translators)
317
318
319
def find_credits(repository, revid):
320
    """Find the credits of the contributors to a revision.
321
322
    :return: tuple with (authors, documenters, artists, translators)
323
    """
324
    ret = {"documentation": {},
325
           "code": {},
326
           "art": {},
327
           "translation": {},
328
           None: {}
329
           }
330
    repository.lock_read()
331
    try:
332
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
333
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
334
        pb = ui.ui_factory.nested_progress_bar()
335
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
336
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
337
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
338
                # Don't count merges
339
                if len(rev.parent_ids) > 1:
340
                    continue
341
                for c in set(classify_delta(delta)):
342
                    author = rev.get_apparent_author()
343
                    if not author in ret[c]:
344
                        ret[c][author] = 0
345
                    ret[c][author] += 1
346
        finally:
347
            pb.finished()
348
    finally:
349
        repository.unlock()
350
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
351
        return map(lambda (x,y): x, 
352
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
353
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
354
355
356
class cmd_credits(commands.Command):
357
    """Determine credits for LOCATION."""
358
359
    takes_args = ['location?']
360
    takes_options = ['revision']
361
362
    encoding_type = 'replace'
363
364
    def run(self, location='.', revision=None):
365
        try:
366
            wt = workingtree.WorkingTree.open_containing(location)[0]
367
        except errors.NoWorkingTree:
368
            a_branch = branch.Branch.open(location)
369
            last_rev = a_branch.last_revision()
370
        else:
371
            a_branch = wt.branch
372
            last_rev = wt.last_revision()
373
374
        if revision is not None:
375
            last_rev = revision[0].in_history(a_branch).rev_id
376
377
        a_branch.lock_read()
378
        try:
379
            credits = find_credits(a_branch.repository, last_rev)
380
            display_credits(credits)
381
        finally:
382
            a_branch.unlock()
383
384
385
commands.register_command(cmd_credits)
386
387
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
388
def test_suite():
389
    from unittest import TestSuite
390
    from bzrlib.tests import TestLoader
391
    suite = TestSuite()
392
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
393
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
394
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
395
    return suite
396