/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.26 by Jelmer Vernooij
Add copyright headers.
1
# Copyright (C) 2005-2008 Canonical Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
16
"""A Simple bzr plugin to generate statistics about the history."""
17
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
18
import re
19
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
20
from bzrlib.lazy_import import lazy_import
21
lazy_import(globals(), """
22
from bzrlib import (
23
    branch,
24
    commands,
25
    config,
26
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
27
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
28
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
29
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
30
    workingtree,
31
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
32
from bzrlib.plugins.stats.classify import classify_delta
33
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
34
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
35
36
37
def find_fullnames(lst):
38
    """Find the fullnames for a list committer names."""
39
40
    counts = {}
41
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
42
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
43
        counts.setdefault(fullname, 0)
44
        counts[fullname] += 1
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
45
    return sorted(((count, name) for name,count in counts.iteritems()),
46
        reverse=True)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
47
48
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
49
def collapse_by_person(committers):
50
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
51
52
    Some people commit with a similar username, but different email
53
    address. Which makes it hard to sort out when they have multiple
54
    entries. Email is actually more stable, though, since people
55
    frequently forget to set their name properly.
56
57
    So take the most common username for each email address, and
58
    combine them into one new list.
59
    """
60
    # Just an indirection so that multiple names can reference
61
    # the same record information
62
    name_to_counter = {}
63
    # indirection back to real information
64
    # [[full_rev_list], {email:count}, {fname:count}]
65
    counter_to_info = {}
66
    counter = 0
67
    for email, revs in committers.iteritems():
0.140.30 by Jelmer Vernooij
Remove use of get_apparent_authors.
68
        authors = []
69
        for rev in revs:
70
            authors += rev.get_apparent_authors()
71
        fullnames = find_fullnames(authors)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
72
        match = None
73
        for count, fullname in fullnames:
74
            if fullname and fullname in name_to_counter:
75
                # We found a match
76
                match = name_to_counter[fullname]
77
                break
78
79
        if match:
80
            # One of the names matched, we need to collapse to records
81
            record = counter_to_info[match]
82
            record[0].extend(revs)
83
            record[1][email] = len(revs)
84
            for count, fullname in fullnames:
85
                name_to_counter[fullname] = match
86
                record[2].setdefault(fullname, 0)
87
                record[2][fullname] += count
88
        else:
89
            # just add this one to the list
90
            counter += 1
91
            for count, fullname in fullnames:
92
                if fullname:
93
                    name_to_counter[fullname] = counter
94
            fname_map = dict((fullname, count) for count, fullname in fullnames)
95
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
96
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
97
            for revs, email, fname in counter_to_info.values()), reverse=True)
98
99
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
100
def sort_by_committer(a_repo, revids):
101
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
102
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
103
    try:
104
        pb.note('getting revisions')
105
        revisions = a_repo.get_revisions(revids)
106
        for count, rev in enumerate(revisions):
107
            pb.update('checking', count, len(revids))
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
108
            for author in rev.get_apparent_authors():
109
                username = config.parse_username(author)
110
                if username[1] == '':
111
                    email = username[0]
112
                else:
113
                    email = username[1]
114
                committers.setdefault(email, []).append(rev)
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
115
    finally:
116
        pb.finished()
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
117
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
118
    return committers
119
120
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
121
def get_info(a_repo, revision):
122
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
123
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
124
    a_repo.lock_read()
125
    try:
126
        pb.note('getting ancestry')
127
        ancestry = a_repo.get_ancestry(revision)[1:]
128
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
129
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
130
    finally:
131
        a_repo.unlock()
132
        pb.finished()
133
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
134
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
135
136
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
137
def get_diff_info(a_repo, start_rev, end_rev):
138
    """Get only the info for new revisions between the two revisions
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
139
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
140
    This lets us figure out what has actually changed between 2 revisions.
141
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
142
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
143
    committers = {}
144
    a_repo.lock_read()
145
    try:
146
        pb.note('getting ancestry 1')
147
        start_ancestry = set(a_repo.get_ancestry(start_rev))
148
        pb.note('getting ancestry 2')
149
        ancestry = a_repo.get_ancestry(end_rev)[1:]
150
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
151
        pb.note('getting revisions')
152
        revisions = a_repo.get_revisions(ancestry)
153
154
        for count, rev in enumerate(revisions):
155
            pb.update('checking', count, len(ancestry))
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
156
            for author in rev.get_apparent_authors():
157
                try:
158
                    email = config.extract_email_address(author)
159
                except errors.BzrError:
160
                    email = author
161
                committers.setdefault(email, []).append(rev)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
162
    finally:
163
        a_repo.unlock()
164
        pb.finished()
165
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
166
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
167
    return info
168
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
169
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
170
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
171
    """Write out the information"""
172
173
    for count, revs, emails, fullnames in info:
174
        # Get the most common email name
175
        sorted_emails = sorted(((count, email)
176
                               for email,count in emails.iteritems()),
177
                               reverse=True)
178
        sorted_fullnames = sorted(((count, fullname)
179
                                  for fullname,count in fullnames.iteritems()),
180
                                  reverse=True)
0.146.2 by Paul Hummer
Fixed a typo per review
181
        # There is a chance sometimes with svn imports that the full name and
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
182
        # email can BOTH be blank.
183
        if sorted_fullnames[0][1] == '':
184
            to_file.write('%4d %s\n'
185
                          % (count, 'Unknown'))
186
        else:
187
            to_file.write('%4d %s <%s>\n'
188
                          % (count, sorted_fullnames[0][1],
189
                             sorted_emails[0][1]))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
190
        if len(sorted_fullnames) > 1:
191
            print '     Other names:'
192
            for count, fname in sorted_fullnames[1:]:
193
                to_file.write('     %4d ' % (count,))
194
                if fname == '':
195
                    to_file.write("''\n")
196
                else:
197
                    to_file.write("%s\n" % (fname,))
198
        if len(sorted_emails) > 1:
199
            print '     Other email addresses:'
200
            for count, email in sorted_emails:
201
                to_file.write('     %4d ' % (count,))
202
                if email == '':
203
                    to_file.write("''\n")
204
                else:
205
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
206
        if gather_class_stats is not None:
207
            print '     Contributions:'
208
            classes, total = gather_class_stats(revs)
209
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
0.140.24 by Jelmer Vernooij
Remove 2.5ism.
210
                if name is None:
211
                    name = "Unknown"
212
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
213
214
0.140.14 by Jelmer Vernooij
Merge upstream.
215
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
216
    """Generate statistics for LOCATION."""
217
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
218
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
219
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
220
    takes_options = ['revision', 
221
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
222
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
223
    encoding_type = 'replace'
224
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
225
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
226
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
227
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
228
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
229
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
230
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
231
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
232
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
233
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
234
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
235
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
236
        if revision is not None:
237
            last_rev = revision[0].in_history(a_branch).rev_id
238
            if len(revision) > 1:
239
                alternate_rev = revision[1].in_history(a_branch).rev_id
240
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
241
        a_branch.lock_read()
242
        try:
243
            if alternate_rev:
244
                info = get_diff_info(a_branch.repository, last_rev,
245
                                     alternate_rev)
246
            else:
247
                info = get_info(a_branch.repository, last_rev)
248
        finally:
249
            a_branch.unlock()
0.140.25 by Jelmer Vernooij
Merge support for Python2.4.
250
        if show_class:
251
            def fetch_class_stats(revs):
252
                return gather_class_stats(a_branch.repository, revs)
253
        else:
254
            fetch_class_stats = None
0.145.1 by Russ Brown
Made to work with python 2.4
255
        display_info(info, self.outf, fetch_class_stats)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
256
257
0.140.14 by Jelmer Vernooij
Merge upstream.
258
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
259
260
261
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
262
    """Figure out the ancestor graph for LOCATION"""
263
264
    takes_args = ['location?']
265
266
    encoding_type = 'replace'
267
268
    def run(self, location='.'):
269
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
270
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
271
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
272
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
273
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
274
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
275
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
276
            last_rev = wt.last_revision()
277
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
278
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
279
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
280
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
281
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
282
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
283
284
        revno = 0
285
        cur_parents = 0
286
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
287
        for num, node_name, depth, isend in reversed(sorted_graph):
288
            cur_parents += 1
289
            if depth == 0:
290
                revno += 1
291
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
292
293
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
294
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
295
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
296
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
297
def gather_class_stats(repository, revs):
298
    ret = {}
299
    total = 0
300
    pb = ui.ui_factory.nested_progress_bar()
301
    try:
302
        repository.lock_read()
303
        try:
304
            i = 0
305
            for delta in repository.get_deltas_for_revisions(revs):
306
                pb.update("classifying commits", i, len(revs))
307
                for c in classify_delta(delta):
308
                    if not c in ret:
309
                        ret[c] = 0
310
                    ret[c] += 1
311
                    total += 1
312
                i += 1
313
        finally:
314
            repository.unlock()
315
    finally:
316
        pb.finished()
317
    return ret, total
318
319
320
def display_credits(credits):
321
    (coders, documenters, artists, translators) = credits
322
    def print_section(name, lst):
323
        if len(lst) == 0:
324
            return
325
        print "%s:" % name
326
        for name in lst:
327
            print "%s" % name
328
        print ""
329
    print_section("Code", coders)
330
    print_section("Documentation", documenters)
331
    print_section("Art", artists)
332
    print_section("Translations", translators)
333
334
335
def find_credits(repository, revid):
336
    """Find the credits of the contributors to a revision.
337
338
    :return: tuple with (authors, documenters, artists, translators)
339
    """
340
    ret = {"documentation": {},
341
           "code": {},
342
           "art": {},
343
           "translation": {},
344
           None: {}
345
           }
346
    repository.lock_read()
347
    try:
348
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
349
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
350
        pb = ui.ui_factory.nested_progress_bar()
351
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
352
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
353
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
354
                # Don't count merges
355
                if len(rev.parent_ids) > 1:
356
                    continue
357
                for c in set(classify_delta(delta)):
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
358
                    for author in rev.get_apparent_authors():
359
                        if not author in ret[c]:
360
                            ret[c][author] = 0
361
                        ret[c][author] += 1
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
362
        finally:
363
            pb.finished()
364
    finally:
365
        repository.unlock()
366
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
367
        return map(lambda (x,y): x, 
368
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
369
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
370
371
372
class cmd_credits(commands.Command):
373
    """Determine credits for LOCATION."""
374
375
    takes_args = ['location?']
376
    takes_options = ['revision']
377
378
    encoding_type = 'replace'
379
380
    def run(self, location='.', revision=None):
381
        try:
382
            wt = workingtree.WorkingTree.open_containing(location)[0]
383
        except errors.NoWorkingTree:
384
            a_branch = branch.Branch.open(location)
385
            last_rev = a_branch.last_revision()
386
        else:
387
            a_branch = wt.branch
388
            last_rev = wt.last_revision()
389
390
        if revision is not None:
391
            last_rev = revision[0].in_history(a_branch).rev_id
392
393
        a_branch.lock_read()
394
        try:
395
            credits = find_credits(a_branch.repository, last_rev)
396
            display_credits(credits)
397
        finally:
398
            a_branch.unlock()
399
400
401
commands.register_command(cmd_credits)
402
403
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
404
def test_suite():
405
    from unittest import TestSuite
406
    from bzrlib.tests import TestLoader
407
    suite = TestSuite()
408
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
409
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
410
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
411
    return suite
412