/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
5
from bzrlib import errors, tsort
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
6
from bzrlib.branch import Branch
7
import bzrlib.commands
8
from bzrlib.config import extract_email_address
9
from bzrlib.workingtree import WorkingTree
10
11
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
12
_fullname_re = re.compile(r'(?P<fullname>.*?)\s*<')
13
14
def extract_fullname(committer):
15
    """Try to get the user's name from their committer info."""
16
    m = _fullname_re.match(committer)
17
    if m:
18
        return m.group('fullname')
19
    try:
20
        email = extract_email_address(committer)
21
    except errors.BzrError:
22
        return committer
23
    else:
24
        # We found an email address, but not a fullname
25
        # so there is no fullname
26
        return ''
27
28
29
def find_fullnames(lst):
30
    """Find the fullnames for a list committer names."""
31
32
    counts = {}
33
    for committer in lst:
34
        fullname = extract_fullname(committer)
35
        counts.setdefault(fullname, 0)
36
        counts[fullname] += 1
37
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
38
39
40
def collapse_by_author(committers):
41
    """The committers list is sorted by email, fix it up by author.
42
43
    Some people commit with a similar username, but different email
44
    address. Which makes it hard to sort out when they have multiple
45
    entries. Email is actually more stable, though, since people
46
    frequently forget to set their name properly.
47
48
    So take the most common username for each email address, and
49
    combine them into one new list.
50
    """
51
    # Just an indirection so that multiple names can reference
52
    # the same record information
53
    name_to_counter = {}
54
    # indirection back to real information
55
    # [[full_rev_list], {email:count}, {fname:count}]
56
    counter_to_info = {}
57
    counter = 0
58
    for email, revs in committers.iteritems():
59
        fullnames = find_fullnames(rev.committer for rev in revs)
60
        match = None
61
        for count, fullname in fullnames:
62
            if fullname and fullname in name_to_counter:
63
                # We found a match
64
                match = name_to_counter[fullname]
65
                break
66
67
        if match:
68
            # One of the names matched, we need to collapse to records
69
            record = counter_to_info[match]
70
            record[0].extend(revs)
71
            record[1][email] = len(revs)
72
            for count, fullname in fullnames:
73
                name_to_counter[fullname] = match
74
                record[2].setdefault(fullname, 0)
75
                record[2][fullname] += count
76
        else:
77
            # just add this one to the list
78
            counter += 1
79
            for count, fullname in fullnames:
80
                if fullname:
81
                    name_to_counter[fullname] = counter
82
            fname_map = dict((fullname, count) for count, fullname in fullnames)
83
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
84
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
85
            for revs, email, fname in counter_to_info.values()), reverse=True)
86
87
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
88
def get_info(a_repo, revision):
89
    """Get all of the information for a particular revision"""
90
    pb = bzrlib.ui.ui_factory.nested_progress_bar()
91
    committers = {}
92
    a_repo.lock_read()
93
    try:
94
        pb.note('getting ancestry')
95
        ancestry = a_repo.get_ancestry(revision)[1:]
96
        pb.note('getting revisions')
97
        revisions = a_repo.get_revisions(ancestry)
98
99
        for count, rev in enumerate(revisions):
100
            pb.update('checking', count, len(ancestry))
101
            try:
102
                email = extract_email_address(rev.committer)
103
            except errors.BzrError:
104
                email = rev.committer
105
            committers.setdefault(email, []).append(rev)
106
    finally:
107
        a_repo.unlock()
108
        pb.finished()
109
110
    info = collapse_by_author(committers)
111
    return info
112
113
114
def display_info(info, to_file):
115
    """Write out the information"""
116
117
    for count, revs, emails, fullnames in info:
118
        # Get the most common email name
119
        sorted_emails = sorted(((count, email)
120
                               for email,count in emails.iteritems()),
121
                               reverse=True)
122
        sorted_fullnames = sorted(((count, fullname)
123
                                  for fullname,count in fullnames.iteritems()),
124
                                  reverse=True)
125
        to_file.write('%4d %s <%s>\n'
126
                      % (count, sorted_fullnames[0][1],
127
                         sorted_emails[0][1]))
128
        if len(sorted_fullnames) > 1:
129
            print '     Other names:'
130
            for count, fname in sorted_fullnames[1:]:
131
                to_file.write('     %4d ' % (count,))
132
                if fname == '':
133
                    to_file.write("''\n")
134
                else:
135
                    to_file.write("%s\n" % (fname,))
136
        if len(sorted_emails) > 1:
137
            print '     Other email addresses:'
138
            for count, email in sorted_emails:
139
                to_file.write('     %4d ' % (count,))
140
                if email == '':
141
                    to_file.write("''\n")
142
                else:
143
                    to_file.write("%s\n" % (email,))
144
145
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
146
class cmd_statistics(bzrlib.commands.Command):
147
    """Generate statistics for LOCATION."""
148
149
    aliases = ['stats']
150
    takes_args = ['location?']
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
151
    takes_options = ['revision']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
152
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
153
    encoding_type = 'replace'
154
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
155
    def run(self, location='.', revision=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
156
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
157
        try:
158
            wt = WorkingTree.open_containing(location)[0]
159
        except errors.NoWorkingTree:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
160
            a_branch = Branch.open(location)
161
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
162
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
163
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
164
            last_rev = wt.last_revision()
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
165
        if revision is not None:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
166
            last_rev = revision[0].in_history(a_branch).rev_id
167
            if len(revision) > 1:
168
                alternate_rev = revision[1].in_history(a_branch).rev_id
169
170
        info = get_info(a_branch.repository, last_rev)
171
        display_info(info, self.outf)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
172
173
174
bzrlib.commands.register_command(cmd_statistics)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
175
176
177
class cmd_ancestor_growth(bzrlib.commands.Command):
178
    """Figure out the ancestor graph for LOCATION"""
179
180
    takes_args = ['location?']
181
182
    encoding_type = 'replace'
183
184
    def run(self, location='.'):
185
        try:
186
            wt = WorkingTree.open_containing(location)[0]
187
        except errors.NoWorkingTree:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
188
            a_branch = Branch.open(location)
189
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
190
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
191
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
192
            last_rev = wt.last_revision()
193
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
194
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
195
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
196
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
197
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
198
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
199
200
        revno = 0
201
        cur_parents = 0
202
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
203
        for num, node_name, depth, isend in reversed(sorted_graph):
204
            cur_parents += 1
205
            if depth == 0:
206
                revno += 1
207
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
208
209
210
bzrlib.commands.register_command(cmd_ancestor_growth)