/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.26 by Jelmer Vernooij
Add copyright headers.
1
# Copyright (C) 2005-2008 Canonical Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
16
"""A Simple bzr plugin to generate statistics about the history."""
17
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
18
from bzrlib.lazy_import import lazy_import
19
lazy_import(globals(), """
20
from bzrlib import (
21
    branch,
22
    commands,
23
    config,
24
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
25
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
26
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
27
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
28
    workingtree,
29
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
30
from bzrlib.plugins.stats.classify import classify_delta
31
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
32
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
33
34
35
def find_fullnames(lst):
36
    """Find the fullnames for a list committer names."""
37
38
    counts = {}
39
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
40
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
41
        counts.setdefault(fullname, 0)
42
        counts[fullname] += 1
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
43
    return sorted(((count, name) for name,count in counts.iteritems()),
44
        reverse=True)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
45
46
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
47
def collapse_by_person(committers):
48
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
49
50
    Some people commit with a similar username, but different email
51
    address. Which makes it hard to sort out when they have multiple
52
    entries. Email is actually more stable, though, since people
53
    frequently forget to set their name properly.
54
55
    So take the most common username for each email address, and
56
    combine them into one new list.
57
    """
58
    # Just an indirection so that multiple names can reference
59
    # the same record information
60
    name_to_counter = {}
61
    # indirection back to real information
62
    # [[full_rev_list], {email:count}, {fname:count}]
63
    counter_to_info = {}
64
    counter = 0
65
    for email, revs in committers.iteritems():
0.140.30 by Jelmer Vernooij
Remove use of get_apparent_authors.
66
        authors = []
67
        for rev in revs:
68
            authors += rev.get_apparent_authors()
69
        fullnames = find_fullnames(authors)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
70
        match = None
71
        for count, fullname in fullnames:
72
            if fullname and fullname in name_to_counter:
73
                # We found a match
74
                match = name_to_counter[fullname]
75
                break
76
77
        if match:
78
            # One of the names matched, we need to collapse to records
79
            record = counter_to_info[match]
80
            record[0].extend(revs)
81
            record[1][email] = len(revs)
82
            for count, fullname in fullnames:
83
                name_to_counter[fullname] = match
84
                record[2].setdefault(fullname, 0)
85
                record[2][fullname] += count
86
        else:
87
            # just add this one to the list
88
            counter += 1
89
            for count, fullname in fullnames:
90
                if fullname:
91
                    name_to_counter[fullname] = counter
92
            fname_map = dict((fullname, count) for count, fullname in fullnames)
93
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
94
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
95
            for revs, email, fname in counter_to_info.values()), reverse=True)
96
97
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
98
def sort_by_committer(a_repo, revids):
99
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
100
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
101
    try:
102
        pb.note('getting revisions')
103
        revisions = a_repo.get_revisions(revids)
104
        for count, rev in enumerate(revisions):
105
            pb.update('checking', count, len(revids))
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
106
            for author in rev.get_apparent_authors():
107
                username = config.parse_username(author)
108
                if username[1] == '':
109
                    email = username[0]
110
                else:
111
                    email = username[1]
112
                committers.setdefault(email, []).append(rev)
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
113
    finally:
114
        pb.finished()
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
115
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
116
    return committers
117
118
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
119
def get_info(a_repo, revision):
120
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
121
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
122
    a_repo.lock_read()
123
    try:
124
        pb.note('getting ancestry')
125
        ancestry = a_repo.get_ancestry(revision)[1:]
126
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
127
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
128
    finally:
129
        a_repo.unlock()
130
        pb.finished()
131
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
132
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
133
134
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
135
def get_diff_info(a_repo, start_rev, end_rev):
136
    """Get only the info for new revisions between the two revisions
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
137
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
138
    This lets us figure out what has actually changed between 2 revisions.
139
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
140
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
141
    committers = {}
142
    a_repo.lock_read()
143
    try:
144
        pb.note('getting ancestry 1')
145
        start_ancestry = set(a_repo.get_ancestry(start_rev))
146
        pb.note('getting ancestry 2')
147
        ancestry = a_repo.get_ancestry(end_rev)[1:]
148
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
149
        pb.note('getting revisions')
150
        revisions = a_repo.get_revisions(ancestry)
151
152
        for count, rev in enumerate(revisions):
153
            pb.update('checking', count, len(ancestry))
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
154
            for author in rev.get_apparent_authors():
155
                try:
156
                    email = config.extract_email_address(author)
157
                except errors.BzrError:
158
                    email = author
159
                committers.setdefault(email, []).append(rev)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
160
    finally:
161
        a_repo.unlock()
162
        pb.finished()
163
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
164
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
165
    return info
166
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
167
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
168
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
169
    """Write out the information"""
170
171
    for count, revs, emails, fullnames in info:
172
        # Get the most common email name
173
        sorted_emails = sorted(((count, email)
174
                               for email,count in emails.iteritems()),
175
                               reverse=True)
176
        sorted_fullnames = sorted(((count, fullname)
177
                                  for fullname,count in fullnames.iteritems()),
178
                                  reverse=True)
0.146.2 by Paul Hummer
Fixed a typo per review
179
        # There is a chance sometimes with svn imports that the full name and
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
180
        # email can BOTH be blank.
181
        if sorted_fullnames[0][1] == '':
182
            to_file.write('%4d %s\n'
183
                          % (count, 'Unknown'))
184
        else:
185
            to_file.write('%4d %s <%s>\n'
186
                          % (count, sorted_fullnames[0][1],
187
                             sorted_emails[0][1]))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
188
        if len(sorted_fullnames) > 1:
189
            print '     Other names:'
190
            for count, fname in sorted_fullnames[1:]:
191
                to_file.write('     %4d ' % (count,))
192
                if fname == '':
193
                    to_file.write("''\n")
194
                else:
195
                    to_file.write("%s\n" % (fname,))
196
        if len(sorted_emails) > 1:
197
            print '     Other email addresses:'
198
            for count, email in sorted_emails:
199
                to_file.write('     %4d ' % (count,))
200
                if email == '':
201
                    to_file.write("''\n")
202
                else:
203
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
204
        if gather_class_stats is not None:
205
            print '     Contributions:'
206
            classes, total = gather_class_stats(revs)
207
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
0.140.24 by Jelmer Vernooij
Remove 2.5ism.
208
                if name is None:
209
                    name = "Unknown"
210
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
211
212
0.140.14 by Jelmer Vernooij
Merge upstream.
213
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
214
    """Generate statistics for LOCATION."""
215
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
216
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
217
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
218
    takes_options = ['revision', 
219
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
220
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
221
    encoding_type = 'replace'
222
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
223
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
224
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
225
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
226
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
227
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
228
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
229
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
230
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
231
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
232
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
233
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
234
        if revision is not None:
235
            last_rev = revision[0].in_history(a_branch).rev_id
236
            if len(revision) > 1:
237
                alternate_rev = revision[1].in_history(a_branch).rev_id
238
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
239
        a_branch.lock_read()
240
        try:
241
            if alternate_rev:
242
                info = get_diff_info(a_branch.repository, last_rev,
243
                                     alternate_rev)
244
            else:
245
                info = get_info(a_branch.repository, last_rev)
246
        finally:
247
            a_branch.unlock()
0.140.25 by Jelmer Vernooij
Merge support for Python2.4.
248
        if show_class:
249
            def fetch_class_stats(revs):
250
                return gather_class_stats(a_branch.repository, revs)
251
        else:
252
            fetch_class_stats = None
0.145.1 by Russ Brown
Made to work with python 2.4
253
        display_info(info, self.outf, fetch_class_stats)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
254
255
0.140.14 by Jelmer Vernooij
Merge upstream.
256
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
257
258
259
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
260
    """Figure out the ancestor graph for LOCATION"""
261
262
    takes_args = ['location?']
263
264
    encoding_type = 'replace'
265
266
    def run(self, location='.'):
267
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
268
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
269
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
270
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
271
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
272
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
273
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
274
            last_rev = wt.last_revision()
275
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
276
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
277
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
278
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
279
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
280
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
281
282
        revno = 0
283
        cur_parents = 0
284
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
285
        for num, node_name, depth, isend in reversed(sorted_graph):
286
            cur_parents += 1
287
            if depth == 0:
288
                revno += 1
289
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
290
291
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
292
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
293
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
294
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
295
def gather_class_stats(repository, revs):
296
    ret = {}
297
    total = 0
298
    pb = ui.ui_factory.nested_progress_bar()
299
    try:
300
        repository.lock_read()
301
        try:
302
            i = 0
303
            for delta in repository.get_deltas_for_revisions(revs):
304
                pb.update("classifying commits", i, len(revs))
305
                for c in classify_delta(delta):
306
                    if not c in ret:
307
                        ret[c] = 0
308
                    ret[c] += 1
309
                    total += 1
310
                i += 1
311
        finally:
312
            repository.unlock()
313
    finally:
314
        pb.finished()
315
    return ret, total
316
317
318
def display_credits(credits):
319
    (coders, documenters, artists, translators) = credits
320
    def print_section(name, lst):
321
        if len(lst) == 0:
322
            return
323
        print "%s:" % name
324
        for name in lst:
325
            print "%s" % name
326
        print ""
327
    print_section("Code", coders)
328
    print_section("Documentation", documenters)
329
    print_section("Art", artists)
330
    print_section("Translations", translators)
331
332
333
def find_credits(repository, revid):
334
    """Find the credits of the contributors to a revision.
335
336
    :return: tuple with (authors, documenters, artists, translators)
337
    """
338
    ret = {"documentation": {},
339
           "code": {},
340
           "art": {},
341
           "translation": {},
342
           None: {}
343
           }
344
    repository.lock_read()
345
    try:
346
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
347
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
348
        pb = ui.ui_factory.nested_progress_bar()
349
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
350
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
351
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
352
                # Don't count merges
353
                if len(rev.parent_ids) > 1:
354
                    continue
355
                for c in set(classify_delta(delta)):
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
356
                    for author in rev.get_apparent_authors():
357
                        if not author in ret[c]:
358
                            ret[c][author] = 0
359
                        ret[c][author] += 1
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
360
        finally:
361
            pb.finished()
362
    finally:
363
        repository.unlock()
364
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
365
        return map(lambda (x,y): x, 
366
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
367
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
368
369
370
class cmd_credits(commands.Command):
371
    """Determine credits for LOCATION."""
372
373
    takes_args = ['location?']
374
    takes_options = ['revision']
375
376
    encoding_type = 'replace'
377
378
    def run(self, location='.', revision=None):
379
        try:
380
            wt = workingtree.WorkingTree.open_containing(location)[0]
381
        except errors.NoWorkingTree:
382
            a_branch = branch.Branch.open(location)
383
            last_rev = a_branch.last_revision()
384
        else:
385
            a_branch = wt.branch
386
            last_rev = wt.last_revision()
387
388
        if revision is not None:
389
            last_rev = revision[0].in_history(a_branch).rev_id
390
391
        a_branch.lock_read()
392
        try:
393
            credits = find_credits(a_branch.repository, last_rev)
394
            display_credits(credits)
395
        finally:
396
            a_branch.unlock()
397
398
399
commands.register_command(cmd_credits)
400
401
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
402
def test_suite():
403
    from unittest import TestSuite
404
    from bzrlib.tests import TestLoader
405
    suite = TestSuite()
406
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
407
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
408
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
409
    return suite
410