/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
5
from bzrlib.lazy_import import lazy_import
6
lazy_import(globals(), """
7
from bzrlib import (
8
    branch,
9
    commands,
10
    config,
11
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
12
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
13
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
14
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
15
    workingtree,
16
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
17
from bzrlib.plugins.stats.classify import classify_delta
18
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
19
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
20
21
22
def find_fullnames(lst):
23
    """Find the fullnames for a list committer names."""
24
25
    counts = {}
26
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
27
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
28
        counts.setdefault(fullname, 0)
29
        counts[fullname] += 1
30
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
31
32
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
33
def collapse_by_person(committers):
34
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
35
36
    Some people commit with a similar username, but different email
37
    address. Which makes it hard to sort out when they have multiple
38
    entries. Email is actually more stable, though, since people
39
    frequently forget to set their name properly.
40
41
    So take the most common username for each email address, and
42
    combine them into one new list.
43
    """
44
    # Just an indirection so that multiple names can reference
45
    # the same record information
46
    name_to_counter = {}
47
    # indirection back to real information
48
    # [[full_rev_list], {email:count}, {fname:count}]
49
    counter_to_info = {}
50
    counter = 0
51
    for email, revs in committers.iteritems():
0.140.21 by Lukáš Lalinský
Some stats fixes:
52
        fullnames = find_fullnames(rev.get_apparent_author() for rev in revs)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
53
        match = None
54
        for count, fullname in fullnames:
55
            if fullname and fullname in name_to_counter:
56
                # We found a match
57
                match = name_to_counter[fullname]
58
                break
59
60
        if match:
61
            # One of the names matched, we need to collapse to records
62
            record = counter_to_info[match]
63
            record[0].extend(revs)
64
            record[1][email] = len(revs)
65
            for count, fullname in fullnames:
66
                name_to_counter[fullname] = match
67
                record[2].setdefault(fullname, 0)
68
                record[2][fullname] += count
69
        else:
70
            # just add this one to the list
71
            counter += 1
72
            for count, fullname in fullnames:
73
                if fullname:
74
                    name_to_counter[fullname] = counter
75
            fname_map = dict((fullname, count) for count, fullname in fullnames)
76
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
77
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
78
            for revs, email, fname in counter_to_info.values()), reverse=True)
79
80
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
81
def sort_by_committer(a_repo, revids):
82
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
83
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
84
    try:
85
        pb.note('getting revisions')
86
        revisions = a_repo.get_revisions(revids)
87
        for count, rev in enumerate(revisions):
88
            pb.update('checking', count, len(revids))
0.140.21 by Lukáš Lalinský
Some stats fixes:
89
            email = config.parse_username(rev.get_apparent_author())[1]
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
90
            committers.setdefault(email, []).append(rev)
91
    finally:
92
        pb.finished()
0.150.3 by Lukáš Lalinský
Reuse sort_by_committer in get_diff_info
93
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
94
    return committers
95
96
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
97
def get_info(a_repo, revision):
98
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
99
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
100
    a_repo.lock_read()
101
    try:
102
        pb.note('getting ancestry')
103
        ancestry = a_repo.get_ancestry(revision)[1:]
104
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
105
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
106
    finally:
107
        a_repo.unlock()
108
        pb.finished()
109
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
110
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
111
112
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
113
def get_diff_info(a_repo, start_rev, end_rev):
114
    """Get only the info for new revisions between the two revisions
0.150.3 by Lukáš Lalinský
Reuse sort_by_committer in get_diff_info
115
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
116
    This lets us figure out what has actually changed between 2 revisions.
117
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
118
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
119
    a_repo.lock_read()
120
    try:
121
        pb.note('getting ancestry 1')
122
        start_ancestry = set(a_repo.get_ancestry(start_rev))
123
        pb.note('getting ancestry 2')
124
        ancestry = a_repo.get_ancestry(end_rev)[1:]
125
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
126
0.150.3 by Lukáš Lalinský
Reuse sort_by_committer in get_diff_info
127
        committers = sort_by_committer(a_repo, ancestry)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
128
    finally:
129
        a_repo.unlock()
130
        pb.finished()
131
0.150.3 by Lukáš Lalinský
Reuse sort_by_committer in get_diff_info
132
    return collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
133
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
134
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
135
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
136
    """Write out the information"""
137
138
    for count, revs, emails, fullnames in info:
139
        # Get the most common email name
140
        sorted_emails = sorted(((count, email)
141
                               for email,count in emails.iteritems()),
142
                               reverse=True)
143
        sorted_fullnames = sorted(((count, fullname)
144
                                  for fullname,count in fullnames.iteritems()),
145
                                  reverse=True)
146
        to_file.write('%4d %s <%s>\n'
147
                      % (count, sorted_fullnames[0][1],
148
                         sorted_emails[0][1]))
149
        if len(sorted_fullnames) > 1:
150
            print '     Other names:'
151
            for count, fname in sorted_fullnames[1:]:
152
                to_file.write('     %4d ' % (count,))
153
                if fname == '':
154
                    to_file.write("''\n")
155
                else:
156
                    to_file.write("%s\n" % (fname,))
157
        if len(sorted_emails) > 1:
158
            print '     Other email addresses:'
159
            for count, email in sorted_emails:
160
                to_file.write('     %4d ' % (count,))
161
                if email == '':
162
                    to_file.write("''\n")
163
                else:
164
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
165
        if gather_class_stats is not None:
166
            print '     Contributions:'
167
            classes, total = gather_class_stats(revs)
168
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
169
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, "Unknown" if name is None else name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
170
171
0.140.14 by Jelmer Vernooij
Merge upstream.
172
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
173
    """Generate statistics for LOCATION."""
174
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
175
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
176
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
177
    takes_options = ['revision', 
178
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
179
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
180
    encoding_type = 'replace'
181
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
182
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
183
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
184
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
185
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
186
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
187
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
188
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
189
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
190
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
191
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
192
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
193
        if revision is not None:
0.150.2 by Lukáš Lalinský
Use r.as_revision_id(b) instead of r.in_histort(b).rev_id
194
            last_rev = revision[0].as_revision_id(a_branch)
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
195
            if len(revision) > 1:
0.150.2 by Lukáš Lalinský
Use r.as_revision_id(b) instead of r.in_histort(b).rev_id
196
                alternate_rev = revision[1].as_revision_id(a_branch)
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
197
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
198
        a_branch.lock_read()
199
        try:
200
            if alternate_rev:
201
                info = get_diff_info(a_branch.repository, last_rev,
202
                                     alternate_rev)
203
            else:
204
                info = get_info(a_branch.repository, last_rev)
205
        finally:
206
            a_branch.unlock()
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
207
        def fetch_class_stats(revs):
208
            return gather_class_stats(a_branch.repository, revs)
209
        display_info(info, self.outf, fetch_class_stats if show_class else None)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
210
211
0.140.14 by Jelmer Vernooij
Merge upstream.
212
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
213
214
215
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
216
    """Figure out the ancestor graph for LOCATION"""
217
218
    takes_args = ['location?']
219
220
    encoding_type = 'replace'
221
222
    def run(self, location='.'):
223
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
224
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
225
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
226
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
227
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
228
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
229
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
230
            last_rev = wt.last_revision()
231
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
232
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
233
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
234
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
235
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
236
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
237
238
        revno = 0
239
        cur_parents = 0
240
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
241
        for num, node_name, depth, isend in reversed(sorted_graph):
242
            cur_parents += 1
243
            if depth == 0:
244
                revno += 1
245
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
246
247
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
248
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
249
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
250
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
251
def gather_class_stats(repository, revs):
252
    ret = {}
253
    total = 0
254
    pb = ui.ui_factory.nested_progress_bar()
255
    try:
256
        repository.lock_read()
257
        try:
258
            i = 0
259
            for delta in repository.get_deltas_for_revisions(revs):
260
                pb.update("classifying commits", i, len(revs))
261
                for c in classify_delta(delta):
262
                    if not c in ret:
263
                        ret[c] = 0
264
                    ret[c] += 1
265
                    total += 1
266
                i += 1
267
        finally:
268
            repository.unlock()
269
    finally:
270
        pb.finished()
271
    return ret, total
272
273
274
def display_credits(credits):
275
    (coders, documenters, artists, translators) = credits
276
    def print_section(name, lst):
277
        if len(lst) == 0:
278
            return
279
        print "%s:" % name
280
        for name in lst:
281
            print "%s" % name
282
        print ""
283
    print_section("Code", coders)
284
    print_section("Documentation", documenters)
285
    print_section("Art", artists)
286
    print_section("Translations", translators)
287
288
289
def find_credits(repository, revid):
290
    """Find the credits of the contributors to a revision.
291
292
    :return: tuple with (authors, documenters, artists, translators)
293
    """
294
    ret = {"documentation": {},
295
           "code": {},
296
           "art": {},
297
           "translation": {},
298
           None: {}
299
           }
300
    repository.lock_read()
301
    try:
302
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
0.140.23 by Jelmer Vernooij
Add another progress bar.
303
        revs = repository.get_revisions(ancestry)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
304
        pb = ui.ui_factory.nested_progress_bar()
305
        try:
0.140.23 by Jelmer Vernooij
Add another progress bar.
306
            for i, (rev,delta) in enumerate(izip(revs, repository.get_deltas_for_revisions(revs))):
307
                pb.update("analysing revisions", i, len(revs))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
308
                # Don't count merges
309
                if len(rev.parent_ids) > 1:
310
                    continue
311
                for c in set(classify_delta(delta)):
312
                    author = rev.get_apparent_author()
313
                    if not author in ret[c]:
314
                        ret[c][author] = 0
315
                    ret[c][author] += 1
316
        finally:
317
            pb.finished()
318
    finally:
319
        repository.unlock()
320
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
321
        return map(lambda (x,y): x, 
322
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
323
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
324
325
326
class cmd_credits(commands.Command):
327
    """Determine credits for LOCATION."""
328
329
    takes_args = ['location?']
330
    takes_options = ['revision']
331
332
    encoding_type = 'replace'
333
334
    def run(self, location='.', revision=None):
335
        try:
336
            wt = workingtree.WorkingTree.open_containing(location)[0]
337
        except errors.NoWorkingTree:
338
            a_branch = branch.Branch.open(location)
339
            last_rev = a_branch.last_revision()
340
        else:
341
            a_branch = wt.branch
342
            last_rev = wt.last_revision()
343
344
        if revision is not None:
0.150.2 by Lukáš Lalinský
Use r.as_revision_id(b) instead of r.in_histort(b).rev_id
345
            last_rev = revision[0].as_revision_id(a_branch)
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
346
347
        a_branch.lock_read()
348
        try:
349
            credits = find_credits(a_branch.repository, last_rev)
350
            display_credits(credits)
351
        finally:
352
            a_branch.unlock()
353
354
355
commands.register_command(cmd_credits)
356
357
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
358
def test_suite():
359
    from unittest import TestSuite
360
    from bzrlib.tests import TestLoader
361
    suite = TestSuite()
362
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
363
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
364
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
365
    return suite
366