/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
5
from bzrlib import errors, tsort
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
6
from bzrlib.branch import Branch
7
import bzrlib.commands
8
from bzrlib.config import extract_email_address
9
from bzrlib.workingtree import WorkingTree
10
11
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
12
_fullname_re = re.compile(r'(?P<fullname>.*?)\s*<')
13
14
def extract_fullname(committer):
15
    """Try to get the user's name from their committer info."""
16
    m = _fullname_re.match(committer)
17
    if m:
18
        return m.group('fullname')
19
    try:
20
        email = extract_email_address(committer)
21
    except errors.BzrError:
22
        return committer
23
    else:
24
        # We found an email address, but not a fullname
25
        # so there is no fullname
26
        return ''
27
28
29
def find_fullnames(lst):
30
    """Find the fullnames for a list committer names."""
31
32
    counts = {}
33
    for committer in lst:
34
        fullname = extract_fullname(committer)
35
        counts.setdefault(fullname, 0)
36
        counts[fullname] += 1
37
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
38
39
40
def collapse_by_author(committers):
41
    """The committers list is sorted by email, fix it up by author.
42
43
    Some people commit with a similar username, but different email
44
    address. Which makes it hard to sort out when they have multiple
45
    entries. Email is actually more stable, though, since people
46
    frequently forget to set their name properly.
47
48
    So take the most common username for each email address, and
49
    combine them into one new list.
50
    """
51
    # Just an indirection so that multiple names can reference
52
    # the same record information
53
    name_to_counter = {}
54
    # indirection back to real information
55
    # [[full_rev_list], {email:count}, {fname:count}]
56
    counter_to_info = {}
57
    counter = 0
58
    for email, revs in committers.iteritems():
59
        fullnames = find_fullnames(rev.committer for rev in revs)
60
        match = None
61
        for count, fullname in fullnames:
62
            if fullname and fullname in name_to_counter:
63
                # We found a match
64
                match = name_to_counter[fullname]
65
                break
66
67
        if match:
68
            # One of the names matched, we need to collapse to records
69
            record = counter_to_info[match]
70
            record[0].extend(revs)
71
            record[1][email] = len(revs)
72
            for count, fullname in fullnames:
73
                name_to_counter[fullname] = match
74
                record[2].setdefault(fullname, 0)
75
                record[2][fullname] += count
76
        else:
77
            # just add this one to the list
78
            counter += 1
79
            for count, fullname in fullnames:
80
                if fullname:
81
                    name_to_counter[fullname] = counter
82
            fname_map = dict((fullname, count) for count, fullname in fullnames)
83
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
84
    return sorted(((len(revs), revs, email, fname) 
85
            for revs, email, fname in counter_to_info.values()), reverse=True)
86
87
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
88
class cmd_statistics(bzrlib.commands.Command):
89
    """Generate statistics for LOCATION."""
90
91
    aliases = ['stats']
92
    takes_args = ['location?']
93
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
94
    encoding_type = 'replace'
95
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
96
    def run(self, location='.'):
97
        try:
98
            wt = WorkingTree.open_containing(location)[0]
99
        except errors.NoWorkingTree:
100
            b = Branch.open(location)
101
            last_rev = b.last_revision()
102
        else:
103
            b = wt.branch
104
            last_rev = wt.last_revision()
105
0.140.2 by John Arbash Meinel
add a little bit of progress since it takes a while.
106
        pb = bzrlib.ui.ui_factory.nested_progress_bar()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
107
        committers = {}
108
        b.lock_read()
109
        try:
0.140.2 by John Arbash Meinel
add a little bit of progress since it takes a while.
110
            pb.note('getting ancestry')
111
            ancestry = b.repository.get_ancestry(last_rev)[1:]
112
            pb.note('getting revisions')
113
            revisions = b.repository.get_revisions(ancestry)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
114
0.140.2 by John Arbash Meinel
add a little bit of progress since it takes a while.
115
            for count, rev in enumerate(revisions):
116
                pb.update('checking', count, len(ancestry))
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
117
                try:
118
                    email = extract_email_address(rev.committer)
119
                except errors.BzrError:
120
                    email = rev.committer
121
                committers.setdefault(email, []).append(rev)
122
        finally:
123
            b.unlock()
0.140.2 by John Arbash Meinel
add a little bit of progress since it takes a while.
124
        pb.clear()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
125
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
126
        info = collapse_by_author(committers)
127
        for count, revs, emails, fullnames in info:
128
            # Get the most common email name
129
            sorted_emails = sorted(((count, email) 
130
                                   for email,count in emails.iteritems()),
131
                                   reverse=True)
132
            sorted_fullnames = sorted(((count, fullname) 
133
                                      for fullname,count in fullnames.iteritems()),
134
                                      reverse=True)
135
            self.outf.write('%4d %s <%s>\n' 
136
                            % (count, sorted_fullnames[0][1],
137
                               sorted_emails[0][1]))
138
            if len(sorted_fullnames) > 1:
139
                print '     Other names:'
140
                for count, fname in sorted_fullnames[1:]:
141
                    self.outf.write('     %4d ' % (count,))
142
                    if fname == '':
143
                        self.outf.write("''\n")
144
                    else:
145
                        self.outf.write("%s\n" % (fname,))
146
            if len(sorted_emails) > 1:
147
                print '     Other email addresses:'
148
                for count, email in sorted_emails:
149
                    self.outf.write('     %4d ' % (count,))
150
                    if email == '':
151
                        self.outf.write("''\n")
152
                    else:
153
                        self.outf.write("%s\n" % (email,))
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
154
155
156
bzrlib.commands.register_command(cmd_statistics)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
157
158
159
class cmd_ancestor_growth(bzrlib.commands.Command):
160
    """Figure out the ancestor graph for LOCATION"""
161
162
    takes_args = ['location?']
163
164
    encoding_type = 'replace'
165
166
    def run(self, location='.'):
167
        try:
168
            wt = WorkingTree.open_containing(location)[0]
169
        except errors.NoWorkingTree:
170
            b = Branch.open(location)
171
            last_rev = b.last_revision()
172
        else:
173
            b = wt.branch
174
            last_rev = wt.last_revision()
175
176
        b.lock_read()
177
        try:
178
            graph = b.repository.get_revision_graph(last_rev)
179
        finally:
180
            b.unlock()
181
182
        revno = 0
183
        cur_parents = 0
184
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
185
        for num, node_name, depth, isend in reversed(sorted_graph):
186
            cur_parents += 1
187
            if depth == 0:
188
                revno += 1
189
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
190
191
192
bzrlib.commands.register_command(cmd_ancestor_growth)