/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
5
from bzrlib.lazy_import import lazy_import
6
lazy_import(globals(), """
7
from bzrlib import (
8
    branch,
9
    commands,
10
    config,
11
    errors,
12
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
13
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
14
    workingtree,
15
    )
16
""")
17
from bzrlib import lazy_regex
18
19
20
_fullname_re = lazy_regex.lazy_compile(r'(?P<fullname>.*?)\s*<')
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
21
22
def extract_fullname(committer):
23
    """Try to get the user's name from their committer info."""
24
    m = _fullname_re.match(committer)
25
    if m:
26
        return m.group('fullname')
27
    try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
28
        email = config.extract_email_address(committer)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
29
    except errors.BzrError:
30
        return committer
31
    else:
32
        # We found an email address, but not a fullname
33
        # so there is no fullname
34
        return ''
35
36
37
def find_fullnames(lst):
38
    """Find the fullnames for a list committer names."""
39
40
    counts = {}
41
    for committer in lst:
42
        fullname = extract_fullname(committer)
43
        counts.setdefault(fullname, 0)
44
        counts[fullname] += 1
45
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
46
47
48
def collapse_by_author(committers):
49
    """The committers list is sorted by email, fix it up by author.
50
51
    Some people commit with a similar username, but different email
52
    address. Which makes it hard to sort out when they have multiple
53
    entries. Email is actually more stable, though, since people
54
    frequently forget to set their name properly.
55
56
    So take the most common username for each email address, and
57
    combine them into one new list.
58
    """
59
    # Just an indirection so that multiple names can reference
60
    # the same record information
61
    name_to_counter = {}
62
    # indirection back to real information
63
    # [[full_rev_list], {email:count}, {fname:count}]
64
    counter_to_info = {}
65
    counter = 0
66
    for email, revs in committers.iteritems():
67
        fullnames = find_fullnames(rev.committer for rev in revs)
68
        match = None
69
        for count, fullname in fullnames:
70
            if fullname and fullname in name_to_counter:
71
                # We found a match
72
                match = name_to_counter[fullname]
73
                break
74
75
        if match:
76
            # One of the names matched, we need to collapse to records
77
            record = counter_to_info[match]
78
            record[0].extend(revs)
79
            record[1][email] = len(revs)
80
            for count, fullname in fullnames:
81
                name_to_counter[fullname] = match
82
                record[2].setdefault(fullname, 0)
83
                record[2][fullname] += count
84
        else:
85
            # just add this one to the list
86
            counter += 1
87
            for count, fullname in fullnames:
88
                if fullname:
89
                    name_to_counter[fullname] = counter
90
            fname_map = dict((fullname, count) for count, fullname in fullnames)
91
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
92
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
93
            for revs, email, fname in counter_to_info.values()), reverse=True)
94
95
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
96
def sort_by_committer(a_repo, revids):
97
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
98
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
99
    try:
100
        pb.note('getting revisions')
101
        revisions = a_repo.get_revisions(revids)
102
        for count, rev in enumerate(revisions):
103
            pb.update('checking', count, len(revids))
104
            try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
105
                email = config.extract_email_address(rev.committer)
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
106
            except errors.BzrError:
107
                email = rev.committer
108
            committers.setdefault(email, []).append(rev)
109
    finally:
110
        pb.finished()
111
    
112
    return committers
113
114
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
115
def get_info(a_repo, revision):
116
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
117
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
118
    a_repo.lock_read()
119
    try:
120
        pb.note('getting ancestry')
121
        ancestry = a_repo.get_ancestry(revision)[1:]
122
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
123
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
124
    finally:
125
        a_repo.unlock()
126
        pb.finished()
127
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
128
    return collapse_by_author(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
129
130
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
131
def get_diff_info(a_repo, start_rev, end_rev):
132
    """Get only the info for new revisions between the two revisions
133
    
134
    This lets us figure out what has actually changed between 2 revisions.
135
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
136
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
137
    committers = {}
138
    a_repo.lock_read()
139
    try:
140
        pb.note('getting ancestry 1')
141
        start_ancestry = set(a_repo.get_ancestry(start_rev))
142
        pb.note('getting ancestry 2')
143
        ancestry = a_repo.get_ancestry(end_rev)[1:]
144
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
145
        pb.note('getting revisions')
146
        revisions = a_repo.get_revisions(ancestry)
147
148
        for count, rev in enumerate(revisions):
149
            pb.update('checking', count, len(ancestry))
150
            try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
151
                email = config.extract_email_address(rev.committer)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
152
            except errors.BzrError:
153
                email = rev.committer
154
            committers.setdefault(email, []).append(rev)
155
    finally:
156
        a_repo.unlock()
157
        pb.finished()
158
159
    info = collapse_by_author(committers)
160
    return info
161
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
162
def display_info(info, to_file):
163
    """Write out the information"""
164
165
    for count, revs, emails, fullnames in info:
166
        # Get the most common email name
167
        sorted_emails = sorted(((count, email)
168
                               for email,count in emails.iteritems()),
169
                               reverse=True)
170
        sorted_fullnames = sorted(((count, fullname)
171
                                  for fullname,count in fullnames.iteritems()),
172
                                  reverse=True)
173
        to_file.write('%4d %s <%s>\n'
174
                      % (count, sorted_fullnames[0][1],
175
                         sorted_emails[0][1]))
176
        if len(sorted_fullnames) > 1:
177
            print '     Other names:'
178
            for count, fname in sorted_fullnames[1:]:
179
                to_file.write('     %4d ' % (count,))
180
                if fname == '':
181
                    to_file.write("''\n")
182
                else:
183
                    to_file.write("%s\n" % (fname,))
184
        if len(sorted_emails) > 1:
185
            print '     Other email addresses:'
186
            for count, email in sorted_emails:
187
                to_file.write('     %4d ' % (count,))
188
                if email == '':
189
                    to_file.write("''\n")
190
                else:
191
                    to_file.write("%s\n" % (email,))
192
193
0.140.14 by Jelmer Vernooij
Merge upstream.
194
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
195
    """Generate statistics for LOCATION."""
196
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
197
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
198
    takes_args = ['location?']
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
199
    takes_options = ['revision']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
200
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
201
    encoding_type = 'replace'
202
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
203
    def run(self, location='.', revision=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
204
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
205
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
206
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
207
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
208
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
209
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
210
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
211
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
212
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
213
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
214
        if revision is not None:
215
            last_rev = revision[0].in_history(a_branch).rev_id
216
            if len(revision) > 1:
217
                alternate_rev = revision[1].in_history(a_branch).rev_id
218
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
219
        a_branch.lock_read()
220
        try:
221
            if alternate_rev:
222
                info = get_diff_info(a_branch.repository, last_rev,
223
                                     alternate_rev)
224
            else:
225
                info = get_info(a_branch.repository, last_rev)
226
        finally:
227
            a_branch.unlock()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
228
        display_info(info, self.outf)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
229
230
0.140.14 by Jelmer Vernooij
Merge upstream.
231
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
232
233
234
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
235
    """Figure out the ancestor graph for LOCATION"""
236
237
    takes_args = ['location?']
238
239
    encoding_type = 'replace'
240
241
    def run(self, location='.'):
242
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
243
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
244
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
245
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
246
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
247
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
248
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
249
            last_rev = wt.last_revision()
250
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
251
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
252
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
253
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
254
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
255
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
256
257
        revno = 0
258
        cur_parents = 0
259
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
260
        for num, node_name, depth, isend in reversed(sorted_graph):
261
            cur_parents += 1
262
            if depth == 0:
263
                revno += 1
264
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
265
266
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
267
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
268
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
269
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
270
def test_suite():
271
    from unittest import TestSuite
272
    from bzrlib.tests import TestLoader
273
    import test_stats
274
    suite = TestSuite()
275
    loader = TestLoader()
276
    testmod_names = ['test_stats']
277
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
278
    return suite
279