/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
5
from bzrlib import errors, tsort
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
6
from bzrlib.branch import Branch
7
import bzrlib.commands
8
from bzrlib.config import extract_email_address
9
from bzrlib.workingtree import WorkingTree
10
11
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
12
_fullname_re = re.compile(r'(?P<fullname>.*?)\s*<')
13
14
def extract_fullname(committer):
15
    """Try to get the user's name from their committer info."""
16
    m = _fullname_re.match(committer)
17
    if m:
18
        return m.group('fullname')
19
    try:
20
        email = extract_email_address(committer)
21
    except errors.BzrError:
22
        return committer
23
    else:
24
        # We found an email address, but not a fullname
25
        # so there is no fullname
26
        return ''
27
28
29
def find_fullnames(lst):
30
    """Find the fullnames for a list committer names."""
31
32
    counts = {}
33
    for committer in lst:
34
        fullname = extract_fullname(committer)
35
        counts.setdefault(fullname, 0)
36
        counts[fullname] += 1
37
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
38
39
40
def collapse_by_author(committers):
41
    """The committers list is sorted by email, fix it up by author.
42
43
    Some people commit with a similar username, but different email
44
    address. Which makes it hard to sort out when they have multiple
45
    entries. Email is actually more stable, though, since people
46
    frequently forget to set their name properly.
47
48
    So take the most common username for each email address, and
49
    combine them into one new list.
50
    """
51
    # Just an indirection so that multiple names can reference
52
    # the same record information
53
    name_to_counter = {}
54
    # indirection back to real information
55
    # [[full_rev_list], {email:count}, {fname:count}]
56
    counter_to_info = {}
57
    counter = 0
58
    for email, revs in committers.iteritems():
59
        fullnames = find_fullnames(rev.committer for rev in revs)
60
        match = None
61
        for count, fullname in fullnames:
62
            if fullname and fullname in name_to_counter:
63
                # We found a match
64
                match = name_to_counter[fullname]
65
                break
66
67
        if match:
68
            # One of the names matched, we need to collapse to records
69
            record = counter_to_info[match]
70
            record[0].extend(revs)
71
            record[1][email] = len(revs)
72
            for count, fullname in fullnames:
73
                name_to_counter[fullname] = match
74
                record[2].setdefault(fullname, 0)
75
                record[2][fullname] += count
76
        else:
77
            # just add this one to the list
78
            counter += 1
79
            for count, fullname in fullnames:
80
                if fullname:
81
                    name_to_counter[fullname] = counter
82
            fname_map = dict((fullname, count) for count, fullname in fullnames)
83
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
84
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
85
            for revs, email, fname in counter_to_info.values()), reverse=True)
86
87
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
88
def sort_by_committer(a_repo, revids):
89
    committers = {}
90
    pb = bzrlib.ui.ui_factory.nested_progress_bar()
91
    try:
92
        pb.note('getting revisions')
93
        revisions = a_repo.get_revisions(revids)
94
        for count, rev in enumerate(revisions):
95
            pb.update('checking', count, len(revids))
96
            try:
97
                email = extract_email_address(rev.committer)
98
            except errors.BzrError:
99
                email = rev.committer
100
            committers.setdefault(email, []).append(rev)
101
    finally:
102
        pb.finished()
103
    
104
    return committers
105
106
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
107
def get_info(a_repo, revision):
108
    """Get all of the information for a particular revision"""
109
    pb = bzrlib.ui.ui_factory.nested_progress_bar()
110
    a_repo.lock_read()
111
    try:
112
        pb.note('getting ancestry')
113
        ancestry = a_repo.get_ancestry(revision)[1:]
114
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
115
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
116
    finally:
117
        a_repo.unlock()
118
        pb.finished()
119
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
120
    return collapse_by_author(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
121
122
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
123
def get_diff_info(a_repo, start_rev, end_rev):
124
    """Get only the info for new revisions between the two revisions
125
    
126
    This lets us figure out what has actually changed between 2 revisions.
127
    """
128
    pb = bzrlib.ui.ui_factory.nested_progress_bar()
129
    committers = {}
130
    a_repo.lock_read()
131
    try:
132
        pb.note('getting ancestry 1')
133
        start_ancestry = set(a_repo.get_ancestry(start_rev))
134
        pb.note('getting ancestry 2')
135
        ancestry = a_repo.get_ancestry(end_rev)[1:]
136
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
137
        pb.note('getting revisions')
138
        revisions = a_repo.get_revisions(ancestry)
139
140
        for count, rev in enumerate(revisions):
141
            pb.update('checking', count, len(ancestry))
142
            try:
143
                email = extract_email_address(rev.committer)
144
            except errors.BzrError:
145
                email = rev.committer
146
            committers.setdefault(email, []).append(rev)
147
    finally:
148
        a_repo.unlock()
149
        pb.finished()
150
151
    info = collapse_by_author(committers)
152
    return info
153
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
154
def display_info(info, to_file):
155
    """Write out the information"""
156
157
    for count, revs, emails, fullnames in info:
158
        # Get the most common email name
159
        sorted_emails = sorted(((count, email)
160
                               for email,count in emails.iteritems()),
161
                               reverse=True)
162
        sorted_fullnames = sorted(((count, fullname)
163
                                  for fullname,count in fullnames.iteritems()),
164
                                  reverse=True)
165
        to_file.write('%4d %s <%s>\n'
166
                      % (count, sorted_fullnames[0][1],
167
                         sorted_emails[0][1]))
168
        if len(sorted_fullnames) > 1:
169
            print '     Other names:'
170
            for count, fname in sorted_fullnames[1:]:
171
                to_file.write('     %4d ' % (count,))
172
                if fname == '':
173
                    to_file.write("''\n")
174
                else:
175
                    to_file.write("%s\n" % (fname,))
176
        if len(sorted_emails) > 1:
177
            print '     Other email addresses:'
178
            for count, email in sorted_emails:
179
                to_file.write('     %4d ' % (count,))
180
                if email == '':
181
                    to_file.write("''\n")
182
                else:
183
                    to_file.write("%s\n" % (email,))
184
185
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
186
class cmd_statistics(bzrlib.commands.Command):
187
    """Generate statistics for LOCATION."""
188
189
    aliases = ['stats']
190
    takes_args = ['location?']
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
191
    takes_options = ['revision']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
192
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
193
    encoding_type = 'replace'
194
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
195
    def run(self, location='.', revision=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
196
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
197
        try:
198
            wt = WorkingTree.open_containing(location)[0]
199
        except errors.NoWorkingTree:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
200
            a_branch = Branch.open(location)
201
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
202
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
203
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
204
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
205
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
206
        if revision is not None:
207
            last_rev = revision[0].in_history(a_branch).rev_id
208
            if len(revision) > 1:
209
                alternate_rev = revision[1].in_history(a_branch).rev_id
210
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
211
        a_branch.lock_read()
212
        try:
213
            if alternate_rev:
214
                info = get_diff_info(a_branch.repository, last_rev,
215
                                     alternate_rev)
216
            else:
217
                info = get_info(a_branch.repository, last_rev)
218
        finally:
219
            a_branch.unlock()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
220
        display_info(info, self.outf)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
221
222
223
bzrlib.commands.register_command(cmd_statistics)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
224
225
226
class cmd_ancestor_growth(bzrlib.commands.Command):
227
    """Figure out the ancestor graph for LOCATION"""
228
229
    takes_args = ['location?']
230
231
    encoding_type = 'replace'
232
233
    def run(self, location='.'):
234
        try:
235
            wt = WorkingTree.open_containing(location)[0]
236
        except errors.NoWorkingTree:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
237
            a_branch = Branch.open(location)
238
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
239
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
240
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
241
            last_rev = wt.last_revision()
242
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
243
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
244
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
245
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
246
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
247
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
248
249
        revno = 0
250
        cur_parents = 0
251
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
252
        for num, node_name, depth, isend in reversed(sorted_graph):
253
            cur_parents += 1
254
            if depth == 0:
255
                revno += 1
256
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
257
258
259
bzrlib.commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
260
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
261
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
262
def test_suite():
263
    from unittest import TestSuite
264
    from bzrlib.tests import TestLoader
265
    import test_stats
266
    suite = TestSuite()
267
    loader = TestLoader()
268
    testmod_names = ['test_stats']
269
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
270
    return suite
271