/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
1
# Copyright (C) 2006-2010 Canonical Ltd
0.140.26 by Jelmer Vernooij
Add copyright headers.
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
16
"""A Simple bzr plugin to generate statistics about the history."""
17
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
18
from bzrlib.lazy_import import lazy_import
19
lazy_import(globals(), """
20
from bzrlib import (
21
    branch,
22
    commands,
23
    config,
24
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
25
    option,
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
26
    trace,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
27
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
28
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
29
    workingtree,
30
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
31
from bzrlib.plugins.stats.classify import classify_delta
32
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
33
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
34
35
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
36
def collapse_by_person(revisions, canonical_committer):
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
37
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
38
39
    Some people commit with a similar username, but different email
40
    address. Which makes it hard to sort out when they have multiple
41
    entries. Email is actually more stable, though, since people
42
    frequently forget to set their name properly.
43
44
    So take the most common username for each email address, and
45
    combine them into one new list.
46
    """
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
47
    # Map from canonical committer to
48
    # {committer: ([rev_list], {email: count}, {fname:count})}
49
    committer_to_info = {}
50
    for rev in revisions:
51
        authors = rev.get_apparent_authors()
52
        for author in authors:
53
            username, email = config.parse_username(author)
54
            canon_author = canonical_committer[(username, email)]
55
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
56
            info[0].append(rev)
57
            info[1][email] = info[1].setdefault(email, 0) + 1
58
            info[2][username] = info[2].setdefault(username, 0) + 1
59
    res = [(len(revs), revs, emails, fnames)
60
           for revs, emails, fnames in committer_to_info.itervalues()]
61
    res.sort(reverse=True)
62
    return res
63
64
65
def collapse_email_and_users(email_users, combo_count):
66
    """Combine the mapping of User Name to email and email to User Name.
67
68
    If a given User Name is used for multiple emails, try to map it all to one
69
    entry.
70
    """
71
    id_to_combos = {}
72
    username_to_id = {}
73
    email_to_id = {}
74
    id_counter = 0
75
76
    def collapse_ids(old_id, new_id, new_combos):
77
        old_combos = id_to_combos.pop(old_id)
78
        new_combos.update(old_combos)
79
        for old_user, old_email in old_combos:
80
            if (old_user and old_user != user):
81
                old_user_id = username_to_id[old_user]
82
                assert old_user_id in (old_id, new_id)
83
                username_to_id[old_user] = new_id
84
            if (old_email and old_email != email):
85
                old_email_id = email_to_id[old_email]
86
                assert old_email_id in (old_id, new_id)
87
                email_to_id[old_email] = cur_id
88
    for email, usernames in email_users.iteritems():
89
        assert email not in email_to_id
90
        id_counter += 1
91
        cur_id = id_counter
92
        id_to_combos[cur_id] = id_combos = set()
93
        if email:
94
            email_to_id[email] = cur_id
95
96
        for user in usernames:
97
            combo = (user, email)
98
            id_combos.add(combo)
99
            if not user or not email:
100
                # We don't match on empty usernames and empty emails
101
                continue
102
            user_id = username_to_id.get(user)
103
            if user_id is not None:
104
                # This UserName was matched to an cur_id
105
                if user_id != cur_id:
106
                    # And it is a different identity than the current email
107
                    collapse_ids(user_id, cur_id, id_combos)
108
            username_to_id[user] = cur_id
109
    combo_to_best_combo = {}
110
    for cur_id, combos in id_to_combos.iteritems():
111
        best_combo = sorted(combos,
112
                            key=lambda x:combo_count[x],
113
                            reverse=True)[0]
114
        for combo in combos:
115
            combo_to_best_combo[combo] = best_combo
116
    return combo_to_best_combo
117
118
119
def get_revisions_and_committers(a_repo, revids):
120
    """Get the Revision information, and the best-match for committer."""
121
122
    email_users = {} # user@email.com => User Name
123
    combo_count = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
124
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
125
    try:
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
126
        trace.note('getting revisions')
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
127
        revisions = a_repo.get_revisions(revids)
128
        for count, rev in enumerate(revisions):
129
            pb.update('checking', count, len(revids))
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
130
            for author in rev.get_apparent_authors():
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
131
                # XXX: There is a chance sometimes with svn imports that the
132
                #      full name and email can BOTH be blank.
133
                username, email = config.parse_username(author)
134
                email_users.setdefault(email, set()).add(username)
135
                combo = (username, email)
136
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
137
    finally:
138
        pb.finished()
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
139
    return revisions, collapse_email_and_users(email_users, combo_count)
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
140
141
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
142
def get_info(a_repo, revision):
143
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
144
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
145
    a_repo.lock_read()
146
    try:
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
147
        trace.note('getting ancestry')
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
148
        ancestry = a_repo.get_ancestry(revision)[1:]
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
149
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
150
    finally:
151
        a_repo.unlock()
152
        pb.finished()
153
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
154
    return collapse_by_person(revs, canonical_committer)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
155
156
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
157
def get_diff_info(a_repo, start_rev, end_rev):
158
    """Get only the info for new revisions between the two revisions
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
159
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
160
    This lets us figure out what has actually changed between 2 revisions.
161
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
162
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
163
    a_repo.lock_read()
164
    try:
165
        pb.note('getting ancestry 1')
166
        start_ancestry = set(a_repo.get_ancestry(start_rev))
167
        pb.note('getting ancestry 2')
168
        ancestry = a_repo.get_ancestry(end_rev)[1:]
169
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
170
        revs, canonical_committer = sort_by_committer(a_repo, ancestry)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
171
    finally:
172
        a_repo.unlock()
173
        pb.finished()
174
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
175
    return collapse_by_person(revs, canonical_committer)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
176
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
177
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
178
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
179
    """Write out the information"""
180
181
    for count, revs, emails, fullnames in info:
182
        # Get the most common email name
183
        sorted_emails = sorted(((count, email)
184
                               for email,count in emails.iteritems()),
185
                               reverse=True)
186
        sorted_fullnames = sorted(((count, fullname)
187
                                  for fullname,count in fullnames.iteritems()),
188
                                  reverse=True)
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
189
        if sorted_fullnames[0][1] == '':
190
            to_file.write('%4d %s\n'
191
                          % (count, 'Unknown'))
192
        else:
193
            to_file.write('%4d %s <%s>\n'
194
                          % (count, sorted_fullnames[0][1],
195
                             sorted_emails[0][1]))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
196
        if len(sorted_fullnames) > 1:
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
197
            to_file.write('     Other names:\n')
198
            for count, fname in sorted_fullnames:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
199
                to_file.write('     %4d ' % (count,))
200
                if fname == '':
201
                    to_file.write("''\n")
202
                else:
203
                    to_file.write("%s\n" % (fname,))
204
        if len(sorted_emails) > 1:
0.147.1 by John Arbash Meinel
Improve the committer matcher tremendously.
205
            to_file.write('     Other email addresses:\n')
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
206
            for count, email in sorted_emails:
207
                to_file.write('     %4d ' % (count,))
208
                if email == '':
209
                    to_file.write("''\n")
210
                else:
211
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
212
        if gather_class_stats is not None:
213
            print '     Contributions:'
214
            classes, total = gather_class_stats(revs)
215
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
0.140.24 by Jelmer Vernooij
Remove 2.5ism.
216
                if name is None:
217
                    name = "Unknown"
218
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
219
220
0.140.14 by Jelmer Vernooij
Merge upstream.
221
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
222
    """Generate statistics for LOCATION."""
223
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
224
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
225
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
226
    takes_options = ['revision', 
227
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
228
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
229
    encoding_type = 'replace'
230
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
231
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
232
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
233
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
234
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
235
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
236
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
237
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
238
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
239
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
240
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
241
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
242
        if revision is not None:
243
            last_rev = revision[0].in_history(a_branch).rev_id
244
            if len(revision) > 1:
245
                alternate_rev = revision[1].in_history(a_branch).rev_id
246
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
247
        a_branch.lock_read()
248
        try:
249
            if alternate_rev:
250
                info = get_diff_info(a_branch.repository, last_rev,
251
                                     alternate_rev)
252
            else:
253
                info = get_info(a_branch.repository, last_rev)
254
        finally:
255
            a_branch.unlock()
0.140.25 by Jelmer Vernooij
Merge support for Python2.4.
256
        if show_class:
257
            def fetch_class_stats(revs):
258
                return gather_class_stats(a_branch.repository, revs)
259
        else:
260
            fetch_class_stats = None
0.145.1 by Russ Brown
Made to work with python 2.4
261
        display_info(info, self.outf, fetch_class_stats)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
262
263
0.140.14 by Jelmer Vernooij
Merge upstream.
264
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
265
266
267
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
268
    """Figure out the ancestor graph for LOCATION"""
269
270
    takes_args = ['location?']
271
272
    encoding_type = 'replace'
273
274
    def run(self, location='.'):
275
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
276
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
277
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
278
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
279
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
280
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
281
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
282
            last_rev = wt.last_revision()
283
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
284
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
285
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
286
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
287
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
288
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
289
290
        revno = 0
291
        cur_parents = 0
292
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
293
        for num, node_name, depth, isend in reversed(sorted_graph):
294
            cur_parents += 1
295
            if depth == 0:
296
                revno += 1
297
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
298
299
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
300
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
301
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
302
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
303
def gather_class_stats(repository, revs):
304
    ret = {}
305
    total = 0
306
    pb = ui.ui_factory.nested_progress_bar()
307
    try:
308
        repository.lock_read()
309
        try:
310
            i = 0
311
            for delta in repository.get_deltas_for_revisions(revs):
312
                pb.update("classifying commits", i, len(revs))
313
                for c in classify_delta(delta):
314
                    if not c in ret:
315
                        ret[c] = 0
316
                    ret[c] += 1
317
                    total += 1
318
                i += 1
319
        finally:
320
            repository.unlock()
321
    finally:
322
        pb.finished()
323
    return ret, total
324
325
326
def display_credits(credits):
327
    (coders, documenters, artists, translators) = credits
328
    def print_section(name, lst):
329
        if len(lst) == 0:
330
            return
331
        print "%s:" % name
332
        for name in lst:
333
            print "%s" % name
334
        print ""
335
    print_section("Code", coders)
336
    print_section("Documentation", documenters)
337
    print_section("Art", artists)
338
    print_section("Translations", translators)
339
340
341
def find_credits(repository, revid):
342
    """Find the credits of the contributors to a revision.
343
344
    :return: tuple with (authors, documenters, artists, translators)
345
    """
346
    ret = {"documentation": {},
347
           "code": {},
348
           "art": {},
349
           "translation": {},
350
           None: {}
351
           }
352
    repository.lock_read()
353
    try:
354
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
355
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
356
        pb = ui.ui_factory.nested_progress_bar()
357
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
358
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
359
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
360
                # Don't count merges
361
                if len(rev.parent_ids) > 1:
362
                    continue
363
                for c in set(classify_delta(delta)):
0.140.29 by Jelmer Vernooij
Remove some uses of get_apparent_author.
364
                    for author in rev.get_apparent_authors():
365
                        if not author in ret[c]:
366
                            ret[c][author] = 0
367
                        ret[c][author] += 1
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
368
        finally:
369
            pb.finished()
370
    finally:
371
        repository.unlock()
372
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
373
        return map(lambda (x,y): x, 
374
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
375
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
376
377
378
class cmd_credits(commands.Command):
379
    """Determine credits for LOCATION."""
380
381
    takes_args = ['location?']
382
    takes_options = ['revision']
383
384
    encoding_type = 'replace'
385
386
    def run(self, location='.', revision=None):
387
        try:
388
            wt = workingtree.WorkingTree.open_containing(location)[0]
389
        except errors.NoWorkingTree:
390
            a_branch = branch.Branch.open(location)
391
            last_rev = a_branch.last_revision()
392
        else:
393
            a_branch = wt.branch
394
            last_rev = wt.last_revision()
395
396
        if revision is not None:
397
            last_rev = revision[0].in_history(a_branch).rev_id
398
399
        a_branch.lock_read()
400
        try:
401
            credits = find_credits(a_branch.repository, last_rev)
402
            display_credits(credits)
403
        finally:
404
            a_branch.unlock()
405
406
407
commands.register_command(cmd_credits)
408
409
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
410
def test_suite():
411
    from unittest import TestSuite
412
    from bzrlib.tests import TestLoader
413
    suite = TestSuite()
414
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
415
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
416
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
417
    return suite
418