/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
5
from bzrlib.lazy_import import lazy_import
6
lazy_import(globals(), """
7
from bzrlib import (
8
    branch,
9
    commands,
10
    config,
11
    errors,
12
    tsort,
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
13
    ui,
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
14
    workingtree,
15
    )
16
""")
17
from bzrlib import lazy_regex
18
19
20
_fullname_re = lazy_regex.lazy_compile(r'(?P<fullname>.*?)\s*<')
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
21
22
def extract_fullname(committer):
23
    """Try to get the user's name from their committer info."""
24
    m = _fullname_re.match(committer)
25
    if m:
26
        return m.group('fullname')
27
    try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
28
        email = config.extract_email_address(committer)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
29
    except errors.BzrError:
30
        return committer
31
    else:
32
        # We found an email address, but not a fullname
33
        # so there is no fullname
34
        return ''
35
36
37
def find_fullnames(lst):
38
    """Find the fullnames for a list committer names."""
39
40
    counts = {}
41
    for committer in lst:
42
        fullname = extract_fullname(committer)
43
        counts.setdefault(fullname, 0)
44
        counts[fullname] += 1
45
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
46
47
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
48
def collapse_by_person(committers):
49
    """The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
50
51
    Some people commit with a similar username, but different email
52
    address. Which makes it hard to sort out when they have multiple
53
    entries. Email is actually more stable, though, since people
54
    frequently forget to set their name properly.
55
56
    So take the most common username for each email address, and
57
    combine them into one new list.
58
    """
59
    # Just an indirection so that multiple names can reference
60
    # the same record information
61
    name_to_counter = {}
62
    # indirection back to real information
63
    # [[full_rev_list], {email:count}, {fname:count}]
64
    counter_to_info = {}
65
    counter = 0
66
    for email, revs in committers.iteritems():
67
        fullnames = find_fullnames(rev.committer for rev in revs)
68
        match = None
69
        for count, fullname in fullnames:
70
            if fullname and fullname in name_to_counter:
71
                # We found a match
72
                match = name_to_counter[fullname]
73
                break
74
75
        if match:
76
            # One of the names matched, we need to collapse to records
77
            record = counter_to_info[match]
78
            record[0].extend(revs)
79
            record[1][email] = len(revs)
80
            for count, fullname in fullnames:
81
                name_to_counter[fullname] = match
82
                record[2].setdefault(fullname, 0)
83
                record[2][fullname] += count
84
        else:
85
            # just add this one to the list
86
            counter += 1
87
            for count, fullname in fullnames:
88
                if fullname:
89
                    name_to_counter[fullname] = counter
90
            fname_map = dict((fullname, count) for count, fullname in fullnames)
91
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
92
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
93
            for revs, email, fname in counter_to_info.values()), reverse=True)
94
95
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
96
def sort_by_committer(a_repo, revids):
97
    committers = {}
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
98
    pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
99
    try:
100
        pb.note('getting revisions')
101
        revisions = a_repo.get_revisions(revids)
102
        for count, rev in enumerate(revisions):
103
            pb.update('checking', count, len(revids))
104
            try:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
105
                email = config.extract_email_address(rev.get_apparent_author())
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
106
            except errors.BzrError:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
107
                email = rev.get_apparent_author()
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
108
            committers.setdefault(email, []).append(rev)
109
    finally:
110
        pb.finished()
111
    
112
    return committers
113
114
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
115
def get_info(a_repo, revision):
116
    """Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
117
    pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
118
    a_repo.lock_read()
119
    try:
120
        pb.note('getting ancestry')
121
        ancestry = a_repo.get_ancestry(revision)[1:]
122
0.142.2 by Jelmer Vernooij
Split out functionality that sorts revids by commmitter.
123
        committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
124
    finally:
125
        a_repo.unlock()
126
        pb.finished()
127
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
128
    return collapse_by_person(committers)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
129
130
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
131
def get_diff_info(a_repo, start_rev, end_rev):
132
    """Get only the info for new revisions between the two revisions
133
    
134
    This lets us figure out what has actually changed between 2 revisions.
135
    """
0.144.1 by Wesley J. Landaker
Added ui to bzrlib lazy imports.
136
    pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
137
    committers = {}
138
    a_repo.lock_read()
139
    try:
140
        pb.note('getting ancestry 1')
141
        start_ancestry = set(a_repo.get_ancestry(start_rev))
142
        pb.note('getting ancestry 2')
143
        ancestry = a_repo.get_ancestry(end_rev)[1:]
144
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
145
        pb.note('getting revisions')
146
        revisions = a_repo.get_revisions(ancestry)
147
148
        for count, rev in enumerate(revisions):
149
            pb.update('checking', count, len(ancestry))
150
            try:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
151
                email = config.extract_email_address(rev.get_apparent_author())
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
152
            except errors.BzrError:
0.140.17 by Jelmer Vernooij
Use get_apparent_author rather than committer.
153
                email = rev.get_apparent_author()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
154
            committers.setdefault(email, []).append(rev)
155
    finally:
156
        a_repo.unlock()
157
        pb.finished()
158
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
159
    info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
160
    return info
161
0.140.16 by Jelmer Vernooij
Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning
162
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
163
def display_info(info, to_file):
164
    """Write out the information"""
165
166
    for count, revs, emails, fullnames in info:
167
        # Get the most common email name
168
        sorted_emails = sorted(((count, email)
169
                               for email,count in emails.iteritems()),
170
                               reverse=True)
171
        sorted_fullnames = sorted(((count, fullname)
172
                                  for fullname,count in fullnames.iteritems()),
173
                                  reverse=True)
174
        to_file.write('%4d %s <%s>\n'
175
                      % (count, sorted_fullnames[0][1],
176
                         sorted_emails[0][1]))
177
        if len(sorted_fullnames) > 1:
178
            print '     Other names:'
179
            for count, fname in sorted_fullnames[1:]:
180
                to_file.write('     %4d ' % (count,))
181
                if fname == '':
182
                    to_file.write("''\n")
183
                else:
184
                    to_file.write("%s\n" % (fname,))
185
        if len(sorted_emails) > 1:
186
            print '     Other email addresses:'
187
            for count, email in sorted_emails:
188
                to_file.write('     %4d ' % (count,))
189
                if email == '':
190
                    to_file.write("''\n")
191
                else:
192
                    to_file.write("%s\n" % (email,))
193
194
0.140.14 by Jelmer Vernooij
Merge upstream.
195
class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
196
    """Generate statistics for LOCATION."""
197
0.140.12 by Jelmer Vernooij
Change name to committer-stats, to allow for other sorts of stats too.
198
    aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
199
    takes_args = ['location?']
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
200
    takes_options = ['revision']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
201
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
202
    encoding_type = 'replace'
203
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
204
    def run(self, location='.', revision=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
205
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
206
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
207
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
208
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
209
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
210
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
211
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
212
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
213
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
214
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
215
        if revision is not None:
216
            last_rev = revision[0].in_history(a_branch).rev_id
217
            if len(revision) > 1:
218
                alternate_rev = revision[1].in_history(a_branch).rev_id
219
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
220
        a_branch.lock_read()
221
        try:
222
            if alternate_rev:
223
                info = get_diff_info(a_branch.repository, last_rev,
224
                                     alternate_rev)
225
            else:
226
                info = get_info(a_branch.repository, last_rev)
227
        finally:
228
            a_branch.unlock()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
229
        display_info(info, self.outf)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
230
231
0.140.14 by Jelmer Vernooij
Merge upstream.
232
commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
233
234
235
class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
236
    """Figure out the ancestor graph for LOCATION"""
237
238
    takes_args = ['location?']
239
240
    encoding_type = 'replace'
241
242
    def run(self, location='.'):
243
        try:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
244
            wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
245
        except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
246
            a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
247
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
248
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
249
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
250
            last_rev = wt.last_revision()
251
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
252
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
253
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
254
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
255
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
256
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
257
258
        revno = 0
259
        cur_parents = 0
260
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
261
        for num, node_name, depth, isend in reversed(sorted_graph):
262
            cur_parents += 1
263
            if depth == 0:
264
                revno += 1
265
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
266
267
0.143.1 by John Arbash Meinel
Make a lot of imports lazy since they may not actually be used.
268
commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
269
0.140.10 by John Arbash Meinel
Minor whitespace cleanup
270
0.141.1 by Jelmer Vernooij
Add some simple tests for extract_fullname.
271
def test_suite():
272
    from unittest import TestSuite
273
    from bzrlib.tests import TestLoader
274
    import test_stats
275
    suite = TestSuite()
276
    loader = TestLoader()
277
    testmod_names = ['test_stats']
278
    suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
279
    return suite
280