/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.26 by Jelmer Vernooij
Add copyright headers.
1
# Copyright (C) 2005-2008 Canonical Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
16
"""A Simple bzr plugin to generate statistics about the history."""
17
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
18
import re
19
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
20
from bzrlib.lazy_import import lazy_import
21
lazy_import(globals(), """
22
from bzrlib import (
23
    branch,
24
    commands,
25
    config,
26
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
27
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
28
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
29
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
30
    workingtree,
31
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
32
from bzrlib.plugins.stats.classify import classify_delta
33
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
34
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
35
36
37
def find_fullnames(lst):
38
    """Find the fullnames for a list committer names."""
39
40
    counts = {}
41
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
42
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
43
        counts.setdefault(fullname, 0)
44
        counts[fullname] += 1
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
45
    return sorted(((count, name) for name,count in counts.iteritems()),
46
        reverse=True)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
47
48
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
49
def collapse_by_person(committers):
50
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
51
52
    Some people commit with a similar username, but different email
53
    address. Which makes it hard to sort out when they have multiple
54
    entries. Email is actually more stable, though, since people
55
    frequently forget to set their name properly.
56
57
    So take the most common username for each email address, and
58
    combine them into one new list.
59
    """
60
    # Just an indirection so that multiple names can reference
61
    # the same record information
62
    name_to_counter = {}
63
    # indirection back to real information
64
    # [[full_rev_list], {email:count}, {fname:count}]
65
    counter_to_info = {}
66
    counter = 0
67
    for email, revs in committers.iteritems():
0.140.21 by Lukáš Lalinský
Some stats fixes:
68
        fullnames = find_fullnames(rev.get_apparent_author() for rev in revs)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
69
        match = None
70
        for count, fullname in fullnames:
71
            if fullname and fullname in name_to_counter:
72
                # We found a match
73
                match = name_to_counter[fullname]
74
                break
75
76
        if match:
77
            # One of the names matched, we need to collapse to records
78
            record = counter_to_info[match]
79
            record[0].extend(revs)
80
            record[1][email] = len(revs)
81
            for count, fullname in fullnames:
82
                name_to_counter[fullname] = match
83
                record[2].setdefault(fullname, 0)
84
                record[2][fullname] += count
85
        else:
86
            # just add this one to the list
87
            counter += 1
88
            for count, fullname in fullnames:
89
                if fullname:
90
                    name_to_counter[fullname] = counter
91
            fname_map = dict((fullname, count) for count, fullname in fullnames)
92
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
93
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
94
            for revs, email, fname in counter_to_info.values()), reverse=True)
95
96
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
97
def sort_by_committer(a_repo, revids):
98
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
99
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
100
    try:
101
        pb.note('getting revisions')
102
        revisions = a_repo.get_revisions(revids)
103
        for count, rev in enumerate(revisions):
104
            pb.update('checking', count, len(revids))
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
105
            for author in rev.get_apparent_authors():
106
                username = config.parse_username(author)
107
                if username[1] == '':
108
                    email = username[0]
109
                else:
110
                    email = username[1]
111
                committers.setdefault(email, []).append(rev)
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
112
    finally:
113
        pb.finished()
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
114
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
115
    return committers
116
117
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
118
def get_info(a_repo, revision):
119
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
120
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
121
    a_repo.lock_read()
122
    try:
123
        pb.note('getting ancestry')
124
        ancestry = a_repo.get_ancestry(revision)[1:]
125
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
126
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
127
    finally:
128
        a_repo.unlock()
129
        pb.finished()
130
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
131
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
132
133
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
134
def get_diff_info(a_repo, start_rev, end_rev):
135
    """Get only the info for new revisions between the two revisions
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
136
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
137
    This lets us figure out what has actually changed between 2 revisions.
138
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
139
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
140
    committers = {}
141
    a_repo.lock_read()
142
    try:
143
        pb.note('getting ancestry 1')
144
        start_ancestry = set(a_repo.get_ancestry(start_rev))
145
        pb.note('getting ancestry 2')
146
        ancestry = a_repo.get_ancestry(end_rev)[1:]
147
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
148
        pb.note('getting revisions')
149
        revisions = a_repo.get_revisions(ancestry)
150
151
        for count, rev in enumerate(revisions):
152
            pb.update('checking', count, len(ancestry))
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
153
            for author in rev.get_apparent_authors():
154
                try:
155
                    email = config.extract_email_address(author)
156
                except errors.BzrError:
157
                    email = author
158
                committers.setdefault(email, []).append(rev)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
159
    finally:
160
        a_repo.unlock()
161
        pb.finished()
162
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
163
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
164
    return info
165
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
166
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
167
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
168
    """Write out the information"""
169
170
    for count, revs, emails, fullnames in info:
171
        # Get the most common email name
172
        sorted_emails = sorted(((count, email)
173
                               for email,count in emails.iteritems()),
174
                               reverse=True)
175
        sorted_fullnames = sorted(((count, fullname)
176
                                  for fullname,count in fullnames.iteritems()),
177
                                  reverse=True)
0.146.2 by Paul Hummer
Fixed a typo per review
178
        # There is a chance sometimes with svn imports that the full name and
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
179
        # email can BOTH be blank.
180
        if sorted_fullnames[0][1] == '':
181
            to_file.write('%4d %s\n'
182
                          % (count, 'Unknown'))
183
        else:
184
            to_file.write('%4d %s <%s>\n'
185
                          % (count, sorted_fullnames[0][1],
186
                             sorted_emails[0][1]))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
187
        if len(sorted_fullnames) > 1:
188
            print '     Other names:'
189
            for count, fname in sorted_fullnames[1:]:
190
                to_file.write('     %4d ' % (count,))
191
                if fname == '':
192
                    to_file.write("''\n")
193
                else:
194
                    to_file.write("%s\n" % (fname,))
195
        if len(sorted_emails) > 1:
196
            print '     Other email addresses:'
197
            for count, email in sorted_emails:
198
                to_file.write('     %4d ' % (count,))
199
                if email == '':
200
                    to_file.write("''\n")
201
                else:
202
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
203
        if gather_class_stats is not None:
204
            print '     Contributions:'
205
            classes, total = gather_class_stats(revs)
206
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
0.140.24 by Jelmer Vernooij
Remove 2.5ism.
207
                if name is None:
208
                    name = "Unknown"
209
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
210
211
0.140.14 by Jelmer Vernooij
Merge upstream.
212
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
213
    """Generate statistics for LOCATION."""
214
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
215
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
216
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
217
    takes_options = ['revision', 
218
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
219
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
220
    encoding_type = 'replace'
221
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
222
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
223
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
224
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
225
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
226
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
227
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
228
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
229
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
230
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
231
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
232
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
233
        if revision is not None:
234
            last_rev = revision[0].in_history(a_branch).rev_id
235
            if len(revision) > 1:
236
                alternate_rev = revision[1].in_history(a_branch).rev_id
237
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
238
        a_branch.lock_read()
239
        try:
240
            if alternate_rev:
241
                info = get_diff_info(a_branch.repository, last_rev,
242
                                     alternate_rev)
243
            else:
244
                info = get_info(a_branch.repository, last_rev)
245
        finally:
246
            a_branch.unlock()
0.140.25 by Jelmer Vernooij
Merge support for Python2.4.
247
        if show_class:
248
            def fetch_class_stats(revs):
249
                return gather_class_stats(a_branch.repository, revs)
250
        else:
251
            fetch_class_stats = None
0.145.1 by Russ Brown
Made to work with python 2.4
252
        display_info(info, self.outf, fetch_class_stats)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
253
254
0.140.14 by Jelmer Vernooij
Merge upstream.
255
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
256
257
258
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
259
    """Figure out the ancestor graph for LOCATION"""
260
261
    takes_args = ['location?']
262
263
    encoding_type = 'replace'
264
265
    def run(self, location='.'):
266
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
267
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
268
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
269
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
270
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
271
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
272
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
273
            last_rev = wt.last_revision()
274
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
275
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
276
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
277
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
278
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
279
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
280
281
        revno = 0
282
        cur_parents = 0
283
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
284
        for num, node_name, depth, isend in reversed(sorted_graph):
285
            cur_parents += 1
286
            if depth == 0:
287
                revno += 1
288
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
289
290
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
291
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
292
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
293
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
294
def gather_class_stats(repository, revs):
295
    ret = {}
296
    total = 0
297
    pb = ui.ui_factory.nested_progress_bar()
298
    try:
299
        repository.lock_read()
300
        try:
301
            i = 0
302
            for delta in repository.get_deltas_for_revisions(revs):
303
                pb.update("classifying commits", i, len(revs))
304
                for c in classify_delta(delta):
305
                    if not c in ret:
306
                        ret[c] = 0
307
                    ret[c] += 1
308
                    total += 1
309
                i += 1
310
        finally:
311
            repository.unlock()
312
    finally:
313
        pb.finished()
314
    return ret, total
315
316
317
def display_credits(credits):
318
    (coders, documenters, artists, translators) = credits
319
    def print_section(name, lst):
320
        if len(lst) == 0:
321
            return
322
        print "%s:" % name
323
        for name in lst:
324
            print "%s" % name
325
        print ""
326
    print_section("Code", coders)
327
    print_section("Documentation", documenters)
328
    print_section("Art", artists)
329
    print_section("Translations", translators)
330
331
332
def find_credits(repository, revid):
333
    """Find the credits of the contributors to a revision.
334
335
    :return: tuple with (authors, documenters, artists, translators)
336
    """
337
    ret = {"documentation": {},
338
           "code": {},
339
           "art": {},
340
           "translation": {},
341
           None: {}
342
           }
343
    repository.lock_read()
344
    try:
345
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
346
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
347
        pb = ui.ui_factory.nested_progress_bar()
348
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
349
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
350
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
351
                # Don't count merges
352
                if len(rev.parent_ids) > 1:
353
                    continue
354
                for c in set(classify_delta(delta)):
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
355
                    for author in rev.get_apparent_authors():
356
                        if not author in ret[c]:
357
                            ret[c][author] = 0
358
                        ret[c][author] += 1
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
359
        finally:
360
            pb.finished()
361
    finally:
362
        repository.unlock()
363
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
364
        return map(lambda (x,y): x, 
365
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
366
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
367
368
369
class cmd_credits(commands.Command):
370
    """Determine credits for LOCATION."""
371
372
    takes_args = ['location?']
373
    takes_options = ['revision']
374
375
    encoding_type = 'replace'
376
377
    def run(self, location='.', revision=None):
378
        try:
379
            wt = workingtree.WorkingTree.open_containing(location)[0]
380
        except errors.NoWorkingTree:
381
            a_branch = branch.Branch.open(location)
382
            last_rev = a_branch.last_revision()
383
        else:
384
            a_branch = wt.branch
385
            last_rev = wt.last_revision()
386
387
        if revision is not None:
388
            last_rev = revision[0].in_history(a_branch).rev_id
389
390
        a_branch.lock_read()
391
        try:
392
            credits = find_credits(a_branch.repository, last_rev)
393
            display_credits(credits)
394
        finally:
395
            a_branch.unlock()
396
397
398
commands.register_command(cmd_credits)
399
400
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
401
def test_suite():
402
    from unittest import TestSuite
403
    from bzrlib.tests import TestLoader
404
    suite = TestSuite()
405
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
406
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
407
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
408
    return suite
409