/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.26 by Jelmer Vernooij
Add copyright headers.
1
# Copyright (C) 2005-2008 Canonical Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
16
"""A Simple bzr plugin to generate statistics about the history."""
17
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
18
import re
19
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
20
from bzrlib.lazy_import import lazy_import
21
lazy_import(globals(), """
22
from bzrlib import (
23
    branch,
24
    commands,
25
    config,
26
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
27
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
28
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
29
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
30
    workingtree,
31
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
32
from bzrlib.plugins.stats.classify import classify_delta
33
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
34
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
35
36
37
def find_fullnames(lst):
38
    """Find the fullnames for a list committer names."""
39
40
    counts = {}
41
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
42
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
43
        counts.setdefault(fullname, 0)
44
        counts[fullname] += 1
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
45
    return sorted(((count, name) for name,count in counts.iteritems()),
46
        reverse=True)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
47
48
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
49
def collapse_by_person(committers):
50
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
51
52
    Some people commit with a similar username, but different email
53
    address. Which makes it hard to sort out when they have multiple
54
    entries. Email is actually more stable, though, since people
55
    frequently forget to set their name properly.
56
57
    So take the most common username for each email address, and
58
    combine them into one new list.
59
    """
60
    # Just an indirection so that multiple names can reference
61
    # the same record information
62
    name_to_counter = {}
63
    # indirection back to real information
64
    # [[full_rev_list], {email:count}, {fname:count}]
65
    counter_to_info = {}
66
    counter = 0
67
    for email, revs in committers.iteritems():
0.140.21 by Lukáš Lalinský
Some stats fixes:
68
        fullnames = find_fullnames(rev.get_apparent_author() for rev in revs)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
69
        match = None
70
        for count, fullname in fullnames:
71
            if fullname and fullname in name_to_counter:
72
                # We found a match
73
                match = name_to_counter[fullname]
74
                break
75
76
        if match:
77
            # One of the names matched, we need to collapse to records
78
            record = counter_to_info[match]
79
            record[0].extend(revs)
80
            record[1][email] = len(revs)
81
            for count, fullname in fullnames:
82
                name_to_counter[fullname] = match
83
                record[2].setdefault(fullname, 0)
84
                record[2][fullname] += count
85
        else:
86
            # just add this one to the list
87
            counter += 1
88
            for count, fullname in fullnames:
89
                if fullname:
90
                    name_to_counter[fullname] = counter
91
            fname_map = dict((fullname, count) for count, fullname in fullnames)
92
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
93
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
94
            for revs, email, fname in counter_to_info.values()), reverse=True)
95
96
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
97
def sort_by_committer(a_repo, revids):
98
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
99
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
100
    try:
101
        pb.note('getting revisions')
102
        revisions = a_repo.get_revisions(revids)
103
        for count, rev in enumerate(revisions):
104
            pb.update('checking', count, len(revids))
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
105
            username = config.parse_username(rev.get_apparent_author())
106
            if username[1] == '':
107
                email = username[0]
108
            else:
109
                email = username[1]
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
110
            committers.setdefault(email, []).append(rev)
111
    finally:
112
        pb.finished()
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
113
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
114
    return committers
115
116
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
117
def get_info(a_repo, revision):
118
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
119
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
120
    a_repo.lock_read()
121
    try:
122
        pb.note('getting ancestry')
123
        ancestry = a_repo.get_ancestry(revision)[1:]
124
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
125
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
126
    finally:
127
        a_repo.unlock()
128
        pb.finished()
129
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
130
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
131
132
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
133
def get_diff_info(a_repo, start_rev, end_rev):
134
    """Get only the info for new revisions between the two revisions
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
135
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
136
    This lets us figure out what has actually changed between 2 revisions.
137
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
138
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
139
    committers = {}
140
    a_repo.lock_read()
141
    try:
142
        pb.note('getting ancestry 1')
143
        start_ancestry = set(a_repo.get_ancestry(start_rev))
144
        pb.note('getting ancestry 2')
145
        ancestry = a_repo.get_ancestry(end_rev)[1:]
146
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
147
        pb.note('getting revisions')
148
        revisions = a_repo.get_revisions(ancestry)
149
150
        for count, rev in enumerate(revisions):
151
            pb.update('checking', count, len(ancestry))
152
            try:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
153
                email = config.extract_email_address(rev.get_apparent_author())
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
154
            except errors.BzrError:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
155
                email = rev.get_apparent_author()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
156
            committers.setdefault(email, []).append(rev)
157
    finally:
158
        a_repo.unlock()
159
        pb.finished()
160
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
161
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
162
    return info
163
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
164
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
165
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
166
    """Write out the information"""
167
168
    for count, revs, emails, fullnames in info:
169
        # Get the most common email name
170
        sorted_emails = sorted(((count, email)
171
                               for email,count in emails.iteritems()),
172
                               reverse=True)
173
        sorted_fullnames = sorted(((count, fullname)
174
                                  for fullname,count in fullnames.iteritems()),
175
                                  reverse=True)
0.146.2 by Paul Hummer
Fixed a typo per review
176
        # There is a chance sometimes with svn imports that the full name and
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
177
        # email can BOTH be blank.
178
        if sorted_fullnames[0][1] == '':
179
            to_file.write('%4d %s\n'
180
                          % (count, 'Unknown'))
181
        else:
182
            to_file.write('%4d %s <%s>\n'
183
                          % (count, sorted_fullnames[0][1],
184
                             sorted_emails[0][1]))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
185
        if len(sorted_fullnames) > 1:
186
            print '     Other names:'
187
            for count, fname in sorted_fullnames[1:]:
188
                to_file.write('     %4d ' % (count,))
189
                if fname == '':
190
                    to_file.write("''\n")
191
                else:
192
                    to_file.write("%s\n" % (fname,))
193
        if len(sorted_emails) > 1:
194
            print '     Other email addresses:'
195
            for count, email in sorted_emails:
196
                to_file.write('     %4d ' % (count,))
197
                if email == '':
198
                    to_file.write("''\n")
199
                else:
200
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
201
        if gather_class_stats is not None:
202
            print '     Contributions:'
203
            classes, total = gather_class_stats(revs)
204
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
0.140.24 by Jelmer Vernooij
Remove 2.5ism.
205
                if name is None:
206
                    name = "Unknown"
207
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
208
209
0.140.14 by Jelmer Vernooij
Merge upstream.
210
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
211
    """Generate statistics for LOCATION."""
212
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
213
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
214
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
215
    takes_options = ['revision', 
216
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
217
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
218
    encoding_type = 'replace'
219
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
220
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
221
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
222
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
223
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
224
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
225
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
226
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
227
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
228
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
229
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
230
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
231
        if revision is not None:
232
            last_rev = revision[0].in_history(a_branch).rev_id
233
            if len(revision) > 1:
234
                alternate_rev = revision[1].in_history(a_branch).rev_id
235
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
236
        a_branch.lock_read()
237
        try:
238
            if alternate_rev:
239
                info = get_diff_info(a_branch.repository, last_rev,
240
                                     alternate_rev)
241
            else:
242
                info = get_info(a_branch.repository, last_rev)
243
        finally:
244
            a_branch.unlock()
0.140.25 by Jelmer Vernooij
Merge support for Python2.4.
245
        if show_class:
246
            def fetch_class_stats(revs):
247
                return gather_class_stats(a_branch.repository, revs)
248
        else:
249
            fetch_class_stats = None
0.145.1 by Russ Brown
Made to work with python 2.4
250
        display_info(info, self.outf, fetch_class_stats)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
251
252
0.140.14 by Jelmer Vernooij
Merge upstream.
253
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
254
255
256
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
257
    """Figure out the ancestor graph for LOCATION"""
258
259
    takes_args = ['location?']
260
261
    encoding_type = 'replace'
262
263
    def run(self, location='.'):
264
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
265
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
266
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
267
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
268
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
269
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
270
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
271
            last_rev = wt.last_revision()
272
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
273
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
274
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
275
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
276
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
277
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
278
279
        revno = 0
280
        cur_parents = 0
281
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
282
        for num, node_name, depth, isend in reversed(sorted_graph):
283
            cur_parents += 1
284
            if depth == 0:
285
                revno += 1
286
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
287
288
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
289
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
290
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
291
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
292
def gather_class_stats(repository, revs):
293
    ret = {}
294
    total = 0
295
    pb = ui.ui_factory.nested_progress_bar()
296
    try:
297
        repository.lock_read()
298
        try:
299
            i = 0
300
            for delta in repository.get_deltas_for_revisions(revs):
301
                pb.update("classifying commits", i, len(revs))
302
                for c in classify_delta(delta):
303
                    if not c in ret:
304
                        ret[c] = 0
305
                    ret[c] += 1
306
                    total += 1
307
                i += 1
308
        finally:
309
            repository.unlock()
310
    finally:
311
        pb.finished()
312
    return ret, total
313
314
315
def display_credits(credits):
316
    (coders, documenters, artists, translators) = credits
317
    def print_section(name, lst):
318
        if len(lst) == 0:
319
            return
320
        print "%s:" % name
321
        for name in lst:
322
            print "%s" % name
323
        print ""
324
    print_section("Code", coders)
325
    print_section("Documentation", documenters)
326
    print_section("Art", artists)
327
    print_section("Translations", translators)
328
329
330
def find_credits(repository, revid):
331
    """Find the credits of the contributors to a revision.
332
333
    :return: tuple with (authors, documenters, artists, translators)
334
    """
335
    ret = {"documentation": {},
336
           "code": {},
337
           "art": {},
338
           "translation": {},
339
           None: {}
340
           }
341
    repository.lock_read()
342
    try:
343
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
344
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
345
        pb = ui.ui_factory.nested_progress_bar()
346
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
347
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
348
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
349
                # Don't count merges
350
                if len(rev.parent_ids) > 1:
351
                    continue
352
                for c in set(classify_delta(delta)):
353
                    author = rev.get_apparent_author()
354
                    if not author in ret[c]:
355
                        ret[c][author] = 0
356
                    ret[c][author] += 1
357
        finally:
358
            pb.finished()
359
    finally:
360
        repository.unlock()
361
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
362
        return map(lambda (x,y): x, 
363
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
364
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
365
366
367
class cmd_credits(commands.Command):
368
    """Determine credits for LOCATION."""
369
370
    takes_args = ['location?']
371
    takes_options = ['revision']
372
373
    encoding_type = 'replace'
374
375
    def run(self, location='.', revision=None):
376
        try:
377
            wt = workingtree.WorkingTree.open_containing(location)[0]
378
        except errors.NoWorkingTree:
379
            a_branch = branch.Branch.open(location)
380
            last_rev = a_branch.last_revision()
381
        else:
382
            a_branch = wt.branch
383
            last_rev = wt.last_revision()
384
385
        if revision is not None:
386
            last_rev = revision[0].in_history(a_branch).rev_id
387
388
        a_branch.lock_read()
389
        try:
390
            credits = find_credits(a_branch.repository, last_rev)
391
            display_credits(credits)
392
        finally:
393
            a_branch.unlock()
394
395
396
commands.register_command(cmd_credits)
397
398
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
399
def test_suite():
400
    from unittest import TestSuite
401
    from bzrlib.tests import TestLoader
402
    suite = TestSuite()
403
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
404
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
405
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
406
    return suite
407