/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
5
from bzrlib.lazy_import import lazy_import
6
lazy_import(globals(), """
7
from bzrlib import (
8
    branch,
9
    commands,
10
    config,
11
    errors,
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
12
    option,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
13
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
14
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
15
    workingtree,
16
    )
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
17
from bzrlib.plugins.stats.classify import classify_delta
18
from itertools import izip
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
19
""")
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
20
21
22
def find_fullnames(lst):
23
    """Find the fullnames for a list committer names."""
24
25
    counts = {}
26
    for committer in lst:
0.140.21 by Lukáš Lalinský
Some stats fixes:
27
        fullname = config.parse_username(committer)[0]
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
28
        counts.setdefault(fullname, 0)
29
        counts[fullname] += 1
30
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
31
32
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
33
def collapse_by_person(committers):
34
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
35
36
    Some people commit with a similar username, but different email
37
    address. Which makes it hard to sort out when they have multiple
38
    entries. Email is actually more stable, though, since people
39
    frequently forget to set their name properly.
40
41
    So take the most common username for each email address, and
42
    combine them into one new list.
43
    """
44
    # Just an indirection so that multiple names can reference
45
    # the same record information
46
    name_to_counter = {}
47
    # indirection back to real information
48
    # [[full_rev_list], {email:count}, {fname:count}]
49
    counter_to_info = {}
50
    counter = 0
51
    for email, revs in committers.iteritems():
0.140.21 by Lukáš Lalinský
Some stats fixes:
52
        fullnames = find_fullnames(rev.get_apparent_author() for rev in revs)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
53
        match = None
54
        for count, fullname in fullnames:
55
            if fullname and fullname in name_to_counter:
56
                # We found a match
57
                match = name_to_counter[fullname]
58
                break
59
60
        if match:
61
            # One of the names matched, we need to collapse to records
62
            record = counter_to_info[match]
63
            record[0].extend(revs)
64
            record[1][email] = len(revs)
65
            for count, fullname in fullnames:
66
                name_to_counter[fullname] = match
67
                record[2].setdefault(fullname, 0)
68
                record[2][fullname] += count
69
        else:
70
            # just add this one to the list
71
            counter += 1
72
            for count, fullname in fullnames:
73
                if fullname:
74
                    name_to_counter[fullname] = counter
75
            fname_map = dict((fullname, count) for count, fullname in fullnames)
76
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
77
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
78
            for revs, email, fname in counter_to_info.values()), reverse=True)
79
80
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
81
def sort_by_committer(a_repo, revids):
82
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
83
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
84
    try:
85
        pb.note('getting revisions')
86
        revisions = a_repo.get_revisions(revids)
87
        for count, rev in enumerate(revisions):
88
            pb.update('checking', count, len(revids))
0.140.21 by Lukáš Lalinský
Some stats fixes:
89
            email = config.parse_username(rev.get_apparent_author())[1]
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
90
            committers.setdefault(email, []).append(rev)
91
    finally:
92
        pb.finished()
93
    
94
    return committers
95
96
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
97
def get_info(a_repo, revision):
98
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
99
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
100
    a_repo.lock_read()
101
    try:
102
        pb.note('getting ancestry')
103
        ancestry = a_repo.get_ancestry(revision)[1:]
104
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
105
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
106
    finally:
107
        a_repo.unlock()
108
        pb.finished()
109
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
110
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
111
112
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
113
def get_diff_info(a_repo, start_rev, end_rev):
114
    """Get only the info for new revisions between the two revisions
115
    
116
    This lets us figure out what has actually changed between 2 revisions.
117
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
118
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
119
    committers = {}
120
    a_repo.lock_read()
121
    try:
122
        pb.note('getting ancestry 1')
123
        start_ancestry = set(a_repo.get_ancestry(start_rev))
124
        pb.note('getting ancestry 2')
125
        ancestry = a_repo.get_ancestry(end_rev)[1:]
126
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
127
        pb.note('getting revisions')
128
        revisions = a_repo.get_revisions(ancestry)
129
130
        for count, rev in enumerate(revisions):
131
            pb.update('checking', count, len(ancestry))
132
            try:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
133
                email = config.extract_email_address(rev.get_apparent_author())
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
134
            except errors.BzrError:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
135
                email = rev.get_apparent_author()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
136
            committers.setdefault(email, []).append(rev)
137
    finally:
138
        a_repo.unlock()
139
        pb.finished()
140
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
141
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
142
    return info
143
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
144
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
145
def display_info(info, to_file, gather_class_stats=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
146
    """Write out the information"""
147
148
    for count, revs, emails, fullnames in info:
149
        # Get the most common email name
150
        sorted_emails = sorted(((count, email)
151
                               for email,count in emails.iteritems()),
152
                               reverse=True)
153
        sorted_fullnames = sorted(((count, fullname)
154
                                  for fullname,count in fullnames.iteritems()),
155
                                  reverse=True)
156
        to_file.write('%4d %s <%s>\n'
157
                      % (count, sorted_fullnames[0][1],
158
                         sorted_emails[0][1]))
159
        if len(sorted_fullnames) > 1:
160
            print '     Other names:'
161
            for count, fname in sorted_fullnames[1:]:
162
                to_file.write('     %4d ' % (count,))
163
                if fname == '':
164
                    to_file.write("''\n")
165
                else:
166
                    to_file.write("%s\n" % (fname,))
167
        if len(sorted_emails) > 1:
168
            print '     Other email addresses:'
169
            for count, email in sorted_emails:
170
                to_file.write('     %4d ' % (count,))
171
                if email == '':
172
                    to_file.write("''\n")
173
                else:
174
                    to_file.write("%s\n" % (email,))
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
175
        if gather_class_stats is not None:
176
            print '     Contributions:'
177
            classes, total = gather_class_stats(revs)
178
            for name,count in sorted(classes.items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0]))):
179
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, "Unknown" if name is None else name))
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
180
181
0.140.14 by Jelmer Vernooij
Merge upstream.
182
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
183
    """Generate statistics for LOCATION."""
184
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
185
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
186
    takes_args = ['location?']
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
187
    takes_options = ['revision', 
188
            option.Option('show-class', help="Show the class of contributions")]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
189
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
190
    encoding_type = 'replace'
191
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
192
    def run(self, location='.', revision=None, show_class=False):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
193
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
194
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
195
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
196
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
197
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
198
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
199
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
200
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
201
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
202
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
203
        if revision is not None:
204
            last_rev = revision[0].in_history(a_branch).rev_id
205
            if len(revision) > 1:
206
                alternate_rev = revision[1].in_history(a_branch).rev_id
207
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
208
        a_branch.lock_read()
209
        try:
210
            if alternate_rev:
211
                info = get_diff_info(a_branch.repository, last_rev,
212
                                     alternate_rev)
213
            else:
214
                info = get_info(a_branch.repository, last_rev)
215
        finally:
216
            a_branch.unlock()
0.140.20 by Jelmer Vernooij
Add --show-class argument to stats command.
217
        def fetch_class_stats(revs):
218
            return gather_class_stats(a_branch.repository, revs)
219
        display_info(info, self.outf, fetch_class_stats if show_class else None)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
220
221
0.140.14 by Jelmer Vernooij
Merge upstream.
222
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
223
224
225
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
226
    """Figure out the ancestor graph for LOCATION"""
227
228
    takes_args = ['location?']
229
230
    encoding_type = 'replace'
231
232
    def run(self, location='.'):
233
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
234
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
235
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
236
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
237
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
238
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
239
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
240
            last_rev = wt.last_revision()
241
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
242
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
243
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
244
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
245
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
246
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
247
248
        revno = 0
249
        cur_parents = 0
250
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
251
        for num, node_name, depth, isend in reversed(sorted_graph):
252
            cur_parents += 1
253
            if depth == 0:
254
                revno += 1
255
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
256
257
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
258
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
259
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
260
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
261
def gather_class_stats(repository, revs):
262
    ret = {}
263
    total = 0
264
    pb = ui.ui_factory.nested_progress_bar()
265
    try:
266
        repository.lock_read()
267
        try:
268
            i = 0
269
            for delta in repository.get_deltas_for_revisions(revs):
270
                pb.update("classifying commits", i, len(revs))
271
                for c in classify_delta(delta):
272
                    if not c in ret:
273
                        ret[c] = 0
274
                    ret[c] += 1
275
                    total += 1
276
                i += 1
277
        finally:
278
            repository.unlock()
279
    finally:
280
        pb.finished()
281
    return ret, total
282
283
284
def display_credits(credits):
285
    (coders, documenters, artists, translators) = credits
286
    def print_section(name, lst):
287
        if len(lst) == 0:
288
            return
289
        print "%s:" % name
290
        for name in lst:
291
            print "%s" % name
292
        print ""
293
    print_section("Code", coders)
294
    print_section("Documentation", documenters)
295
    print_section("Art", artists)
296
    print_section("Translations", translators)
297
298
299
def find_credits(repository, revid):
300
    """Find the credits of the contributors to a revision.
301
302
    :return: tuple with (authors, documenters, artists, translators)
303
    """
304
    ret = {"documentation": {},
305
           "code": {},
306
           "art": {},
307
           "translation": {},
308
           None: {}
309
           }
310
    repository.lock_read()
311
    try:
312
        ancestry = filter(lambda x: x is not None, repository.get_ancestry(revid))
313
        pb = ui.ui_factory.nested_progress_bar()
314
        try:
315
            revs = repository.get_revisions(ancestry)
316
            for rev,delta in izip(revs, repository.get_deltas_for_revisions(revs)):
317
                # Don't count merges
318
                if len(rev.parent_ids) > 1:
319
                    continue
320
                for c in set(classify_delta(delta)):
321
                    author = rev.get_apparent_author()
322
                    if not author in ret[c]:
323
                        ret[c][author] = 0
324
                    ret[c][author] += 1
325
        finally:
326
            pb.finished()
327
    finally:
328
        repository.unlock()
329
    def sort_class(name):
0.140.19 by Jelmer Vernooij
List contributors with more contributions first.
330
        return map(lambda (x,y): x, 
331
               sorted(ret[name].items(), lambda x,y: cmp((x[1], x[0]), (y[1], y[0])), reverse=True))
0.140.18 by Jelmer Vernooij
Add credits command, test classify code by default, add comments to classify code.
332
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
333
334
335
class cmd_credits(commands.Command):
336
    """Determine credits for LOCATION."""
337
338
    takes_args = ['location?']
339
    takes_options = ['revision']
340
341
    encoding_type = 'replace'
342
343
    def run(self, location='.', revision=None):
344
        try:
345
            wt = workingtree.WorkingTree.open_containing(location)[0]
346
        except errors.NoWorkingTree:
347
            a_branch = branch.Branch.open(location)
348
            last_rev = a_branch.last_revision()
349
        else:
350
            a_branch = wt.branch
351
            last_rev = wt.last_revision()
352
353
        if revision is not None:
354
            last_rev = revision[0].in_history(a_branch).rev_id
355
356
        a_branch.lock_read()
357
        try:
358
            credits = find_credits(a_branch.repository, last_rev)
359
            display_credits(credits)
360
        finally:
361
            a_branch.unlock()
362
363
364
commands.register_command(cmd_credits)
365
366
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
367
def test_suite():
368
    from unittest import TestSuite
369
    from bzrlib.tests import TestLoader
370
    suite = TestSuite()
371
    loader = TestLoader()
0.140.22 by Jelmer Vernooij
Remove now-obsolete tests.
372
    testmod_names = [ 'test_classify']
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
373
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
374
    return suite
375