/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
5
from bzrlib.lazy_import import lazy_import
6
lazy_import(globals(), """
7
from bzrlib import (
8
    branch,
9
    commands,
10
    config,
11
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
12
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
13
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
14
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
15
    workingtree,
16
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
17
from bzrlib.plugins.stats.classify import classify_delta
18
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
19
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
20
21
22
def find_fullnames(lst):
23
    """Find the fullnames for a list committer names."""
24
25
    counts = {}
26
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
27
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
28
        counts.setdefault(fullname, 0)
29
        counts[fullname] += 1
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
30
    return sorted(((count, name) for name,count in counts.iteritems()),
31
        reverse=True)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
32
33
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
34
def collapse_by_person(committers):
35
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
36
37
    Some people commit with a similar username, but different email
38
    address. Which makes it hard to sort out when they have multiple
39
    entries. Email is actually more stable, though, since people
40
    frequently forget to set their name properly.
41
42
    So take the most common username for each email address, and
43
    combine them into one new list.
44
    """
45
    # Just an indirection so that multiple names can reference
46
    # the same record information
47
    name_to_counter = {}
48
    # indirection back to real information
49
    # [[full_rev_list], {email:count}, {fname:count}]
50
    counter_to_info = {}
51
    counter = 0
52
    for email, revs in committers.iteritems():
0.140.21 by Lukáš Lalinský
Some stats fixes:
53
        fullnames = find_fullnames(rev.get_apparent_author() for rev in revs)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
54
        match = None
55
        for count, fullname in fullnames:
56
            if fullname and fullname in name_to_counter:
57
                # We found a match
58
                match = name_to_counter[fullname]
59
                break
60
61
        if match:
62
            # One of the names matched, we need to collapse to records
63
            record = counter_to_info[match]
64
            record[0].extend(revs)
65
            record[1][email] = len(revs)
66
            for count, fullname in fullnames:
67
                name_to_counter[fullname] = match
68
                record[2].setdefault(fullname, 0)
69
                record[2][fullname] += count
70
        else:
71
            # just add this one to the list
72
            counter += 1
73
            for count, fullname in fullnames:
74
                if fullname:
75
                    name_to_counter[fullname] = counter
76
            fname_map = dict((fullname, count) for count, fullname in fullnames)
77
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
78
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
79
            for revs, email, fname in counter_to_info.values()), reverse=True)
80
81
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
82
def sort_by_committer(a_repo, revids):
83
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
84
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
85
    try:
86
        pb.note('getting revisions')
87
        revisions = a_repo.get_revisions(revids)
88
        for count, rev in enumerate(revisions):
89
            pb.update('checking', count, len(revids))
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
90
            username = config.parse_username(rev.get_apparent_author())
91
            if username[1] == '':
92
                email = username[0]
93
            else:
94
                email = username[1]
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
95
            committers.setdefault(email, []).append(rev)
96
    finally:
97
        pb.finished()
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
98
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
99
    return committers
100
101
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
102
def get_info(a_repo, revision):
103
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
104
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
105
    a_repo.lock_read()
106
    try:
107
        pb.note('getting ancestry')
108
        ancestry = a_repo.get_ancestry(revision)[1:]
109
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
110
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
111
    finally:
112
        a_repo.unlock()
113
        pb.finished()
114
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
115
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
116
117
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
118
def get_diff_info(a_repo, start_rev, end_rev):
119
    """Get only the info for new revisions between the two revisions
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
120
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
121
    This lets us figure out what has actually changed between 2 revisions.
122
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
123
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
124
    committers = {}
125
    a_repo.lock_read()
126
    try:
127
        pb.note('getting ancestry 1')
128
        start_ancestry = set(a_repo.get_ancestry(start_rev))
129
        pb.note('getting ancestry 2')
130
        ancestry = a_repo.get_ancestry(end_rev)[1:]
131
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
132
        pb.note('getting revisions')
133
        revisions = a_repo.get_revisions(ancestry)
134
135
        for count, rev in enumerate(revisions):
136
            pb.update('checking', count, len(ancestry))
137
            try:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
138
                email = config.extract_email_address(rev.get_apparent_author())
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
139
            except errors.BzrError:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
140
                email = rev.get_apparent_author()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
141
            committers.setdefault(email, []).append(rev)
142
    finally:
143
        a_repo.unlock()
144
        pb.finished()
145
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
146
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
147
    return info
148
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
149
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
150
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
151
    """Write out the information"""
152
153
    for count, revs, emails, fullnames in info:
154
        # Get the most common email name
155
        sorted_emails = sorted(((count, email)
156
                               for email,count in emails.iteritems()),
157
                               reverse=True)
158
        sorted_fullnames = sorted(((count, fullname)
159
                                  for fullname,count in fullnames.iteritems()),
160
                                  reverse=True)
0.146.2 by Paul Hummer
Fixed a typo per review
161
        # There is a chance sometimes with svn imports that the full name and
0.146.1 by Paul Hummer
Revisions with missing emails are no longer all attributed to the same person
162
        # email can BOTH be blank.
163
        if sorted_fullnames[0][1] == '':
164
            to_file.write('%4d %s\n'
165
                          % (count, 'Unknown'))
166
        else:
167
            to_file.write('%4d %s <%s>\n'
168
                          % (count, sorted_fullnames[0][1],
169
                             sorted_emails[0][1]))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
170
        if len(sorted_fullnames) > 1:
171
            print '     Other names:'
172
            for count, fname in sorted_fullnames[1:]:
173
                to_file.write('     %4d ' % (count,))
174
                if fname == '':
175
                    to_file.write("''\n")
176
                else:
177
                    to_file.write("%s\n" % (fname,))
178
        if len(sorted_emails) > 1:
179
            print '     Other email addresses:'
180
            for count, email in sorted_emails:
181
                to_file.write('     %4d ' % (count,))
182
                if email == '':
183
                    to_file.write("''\n")
184
                else:
185
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
186
        if gather_class_stats is not None:
187
            print '     Contributions:'
188
            classes, total = gather_class_stats(revs)
189
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
190
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, "Unknown" if name is None else name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
191
192
0.140.14 by Jelmer Vernooij
Merge upstream.
193
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
194
    """Generate statistics for LOCATION."""
195
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
196
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
197
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
198
    takes_options = ['revision', 
199
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
200
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
201
    encoding_type = 'replace'
202
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
203
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
204
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
205
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
206
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
207
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
208
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
209
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
210
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
211
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
212
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
213
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
214
        if revision is not None:
215
            last_rev = revision[0].in_history(a_branch).rev_id
216
            if len(revision) > 1:
217
                alternate_rev = revision[1].in_history(a_branch).rev_id
218
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
219
        a_branch.lock_read()
220
        try:
221
            if alternate_rev:
222
                info = get_diff_info(a_branch.repository, last_rev,
223
                                     alternate_rev)
224
            else:
225
                info = get_info(a_branch.repository, last_rev)
226
        finally:
227
            a_branch.unlock()
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
228
        def fetch_class_stats(revs):
229
            return gather_class_stats(a_branch.repository, revs)
230
        display_info(info, self.outf, fetch_class_stats if show_class else None)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
231
232
0.140.14 by Jelmer Vernooij
Merge upstream.
233
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
234
235
236
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
237
    """Figure out the ancestor graph for LOCATION"""
238
239
    takes_args = ['location?']
240
241
    encoding_type = 'replace'
242
243
    def run(self, location='.'):
244
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
245
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
246
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
247
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
248
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
249
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
250
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
251
            last_rev = wt.last_revision()
252
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
253
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
254
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
255
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
256
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
257
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
258
259
        revno = 0
260
        cur_parents = 0
261
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
262
        for num, node_name, depth, isend in reversed(sorted_graph):
263
            cur_parents += 1
264
            if depth == 0:
265
                revno += 1
266
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
267
268
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
269
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
270
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
271
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
272
def gather_class_stats(repository, revs):
273
    ret = {}
274
    total = 0
275
    pb = ui.ui_factory.nested_progress_bar()
276
    try:
277
        repository.lock_read()
278
        try:
279
            i = 0
280
            for delta in repository.get_deltas_for_revisions(revs):
281
                pb.update("classifying commits", i, len(revs))
282
                for c in classify_delta(delta):
283
                    if not c in ret:
284
                        ret[c] = 0
285
                    ret[c] += 1
286
                    total += 1
287
                i += 1
288
        finally:
289
            repository.unlock()
290
    finally:
291
        pb.finished()
292
    return ret, total
293
294
295
def display_credits(credits):
296
    (coders, documenters, artists, translators) = credits
297
    def print_section(name, lst):
298
        if len(lst) == 0:
299
            return
300
        print "%s:" % name
301
        for name in lst:
302
            print "%s" % name
303
        print ""
304
    print_section("Code", coders)
305
    print_section("Documentation", documenters)
306
    print_section("Art", artists)
307
    print_section("Translations", translators)
308
309
310
def find_credits(repository, revid):
311
    """Find the credits of the contributors to a revision.
312
313
    :return: tuple with (authors, documenters, artists, translators)
314
    """
315
    ret = {"documentation": {},
316
           "code": {},
317
           "art": {},
318
           "translation": {},
319
           None: {}
320
           }
321
    repository.lock_read()
322
    try:
323
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
324
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
325
        pb = ui.ui_factory.nested_progress_bar()
326
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
327
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
328
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
329
                # Don't count merges
330
                if len(rev.parent_ids) > 1:
331
                    continue
332
                for c in set(classify_delta(delta)):
333
                    author = rev.get_apparent_author()
334
                    if not author in ret[c]:
335
                        ret[c][author] = 0
336
                    ret[c][author] += 1
337
        finally:
338
            pb.finished()
339
    finally:
340
        repository.unlock()
341
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
342
        return map(lambda (x,y): x, 
343
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
344
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
345
346
347
class cmd_credits(commands.Command):
348
    """Determine credits for LOCATION."""
349
350
    takes_args = ['location?']
351
    takes_options = ['revision']
352
353
    encoding_type = 'replace'
354
355
    def run(self, location='.', revision=None):
356
        try:
357
            wt = workingtree.WorkingTree.open_containing(location)[0]
358
        except errors.NoWorkingTree:
359
            a_branch = branch.Branch.open(location)
360
            last_rev = a_branch.last_revision()
361
        else:
362
            a_branch = wt.branch
363
            last_rev = wt.last_revision()
364
365
        if revision is not None:
366
            last_rev = revision[0].in_history(a_branch).rev_id
367
368
        a_branch.lock_read()
369
        try:
370
            credits = find_credits(a_branch.repository, last_rev)
371
            display_credits(credits)
372
        finally:
373
            a_branch.unlock()
374
375
376
commands.register_command(cmd_credits)
377
378
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
379
def test_suite():
380
    from unittest import TestSuite
381
    from bzrlib.tests import TestLoader
382
    suite = TestSuite()
383
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
384
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
385
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
386
    return suite
387