/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
1
"""A Simple bzr plugin to generate statistics about the history."""
2
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
3
import re
4
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
5
from bzrlib import errors, tsort
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
6
from bzrlib.branch import Branch
7
import bzrlib.commands
8
from bzrlib.config import extract_email_address
9
from bzrlib.workingtree import WorkingTree
10
11
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
12
_fullname_re = re.compile(r'(?P<fullname>.*?)\s*<')
13
14
def extract_fullname(committer):
15
    """Try to get the user's name from their committer info."""
16
    m = _fullname_re.match(committer)
17
    if m:
18
        return m.group('fullname')
19
    try:
20
        email = extract_email_address(committer)
21
    except errors.BzrError:
22
        return committer
23
    else:
24
        # We found an email address, but not a fullname
25
        # so there is no fullname
26
        return ''
27
28
29
def find_fullnames(lst):
30
    """Find the fullnames for a list committer names."""
31
32
    counts = {}
33
    for committer in lst:
34
        fullname = extract_fullname(committer)
35
        counts.setdefault(fullname, 0)
36
        counts[fullname] += 1
37
    return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
38
39
40
def collapse_by_author(committers):
41
    """The committers list is sorted by email, fix it up by author.
42
43
    Some people commit with a similar username, but different email
44
    address. Which makes it hard to sort out when they have multiple
45
    entries. Email is actually more stable, though, since people
46
    frequently forget to set their name properly.
47
48
    So take the most common username for each email address, and
49
    combine them into one new list.
50
    """
51
    # Just an indirection so that multiple names can reference
52
    # the same record information
53
    name_to_counter = {}
54
    # indirection back to real information
55
    # [[full_rev_list], {email:count}, {fname:count}]
56
    counter_to_info = {}
57
    counter = 0
58
    for email, revs in committers.iteritems():
59
        fullnames = find_fullnames(rev.committer for rev in revs)
60
        match = None
61
        for count, fullname in fullnames:
62
            if fullname and fullname in name_to_counter:
63
                # We found a match
64
                match = name_to_counter[fullname]
65
                break
66
67
        if match:
68
            # One of the names matched, we need to collapse to records
69
            record = counter_to_info[match]
70
            record[0].extend(revs)
71
            record[1][email] = len(revs)
72
            for count, fullname in fullnames:
73
                name_to_counter[fullname] = match
74
                record[2].setdefault(fullname, 0)
75
                record[2][fullname] += count
76
        else:
77
            # just add this one to the list
78
            counter += 1
79
            for count, fullname in fullnames:
80
                if fullname:
81
                    name_to_counter[fullname] = counter
82
            fname_map = dict((fullname, count) for count, fullname in fullnames)
83
            counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
84
    return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
85
            for revs, email, fname in counter_to_info.values()), reverse=True)
86
87
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
88
def get_info(a_repo, revision):
89
    """Get all of the information for a particular revision"""
90
    pb = bzrlib.ui.ui_factory.nested_progress_bar()
91
    committers = {}
92
    a_repo.lock_read()
93
    try:
94
        pb.note('getting ancestry')
95
        ancestry = a_repo.get_ancestry(revision)[1:]
96
        pb.note('getting revisions')
97
        revisions = a_repo.get_revisions(ancestry)
98
99
        for count, rev in enumerate(revisions):
100
            pb.update('checking', count, len(ancestry))
101
            try:
102
                email = extract_email_address(rev.committer)
103
            except errors.BzrError:
104
                email = rev.committer
105
            committers.setdefault(email, []).append(rev)
106
    finally:
107
        a_repo.unlock()
108
        pb.finished()
109
110
    info = collapse_by_author(committers)
111
    return info
112
113
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
114
def get_diff_info(a_repo, start_rev, end_rev):
115
    """Get only the info for new revisions between the two revisions
116
    
117
    This lets us figure out what has actually changed between 2 revisions.
118
    """
119
    pb = bzrlib.ui.ui_factory.nested_progress_bar()
120
    committers = {}
121
    a_repo.lock_read()
122
    try:
123
        pb.note('getting ancestry 1')
124
        start_ancestry = set(a_repo.get_ancestry(start_rev))
125
        pb.note('getting ancestry 2')
126
        ancestry = a_repo.get_ancestry(end_rev)[1:]
127
        ancestry = [rev for rev in ancestry if rev not in start_ancestry]
128
        pb.note('getting revisions')
129
        revisions = a_repo.get_revisions(ancestry)
130
131
        for count, rev in enumerate(revisions):
132
            pb.update('checking', count, len(ancestry))
133
            try:
134
                email = extract_email_address(rev.committer)
135
            except errors.BzrError:
136
                email = rev.committer
137
            committers.setdefault(email, []).append(rev)
138
    finally:
139
        a_repo.unlock()
140
        pb.finished()
141
142
    info = collapse_by_author(committers)
143
    return info
144
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
145
def display_info(info, to_file):
146
    """Write out the information"""
147
148
    for count, revs, emails, fullnames in info:
149
        # Get the most common email name
150
        sorted_emails = sorted(((count, email)
151
                               for email,count in emails.iteritems()),
152
                               reverse=True)
153
        sorted_fullnames = sorted(((count, fullname)
154
                                  for fullname,count in fullnames.iteritems()),
155
                                  reverse=True)
156
        to_file.write('%4d %s <%s>\n'
157
                      % (count, sorted_fullnames[0][1],
158
                         sorted_emails[0][1]))
159
        if len(sorted_fullnames) > 1:
160
            print '     Other names:'
161
            for count, fname in sorted_fullnames[1:]:
162
                to_file.write('     %4d ' % (count,))
163
                if fname == '':
164
                    to_file.write("''\n")
165
                else:
166
                    to_file.write("%s\n" % (fname,))
167
        if len(sorted_emails) > 1:
168
            print '     Other email addresses:'
169
            for count, email in sorted_emails:
170
                to_file.write('     %4d ' % (count,))
171
                if email == '':
172
                    to_file.write("''\n")
173
                else:
174
                    to_file.write("%s\n" % (email,))
175
176
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
177
class cmd_statistics(bzrlib.commands.Command):
178
    """Generate statistics for LOCATION."""
179
180
    aliases = ['stats']
181
    takes_args = ['location?']
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
182
    takes_options = ['revision']
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
183
0.140.3 by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses
184
    encoding_type = 'replace'
185
0.140.5 by John Arbash Meinel
Allow to specify an exact revision to generate ancestry.
186
    def run(self, location='.', revision=None):
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
187
        alternate_rev = None
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
188
        try:
189
            wt = WorkingTree.open_containing(location)[0]
190
        except errors.NoWorkingTree:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
191
            a_branch = Branch.open(location)
192
            last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
193
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
194
            a_branch = wt.branch
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
195
            last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
196
0.140.8 by John Arbash Meinel
Allow branch: to work, which needs a write lock
197
        if revision is not None:
198
            last_rev = revision[0].in_history(a_branch).rev_id
199
            if len(revision) > 1:
200
                alternate_rev = revision[1].in_history(a_branch).rev_id
201
0.140.7 by John Arbash Meinel
Compute the revisions using a difference check
202
        a_branch.lock_read()
203
        try:
204
            if alternate_rev:
205
                info = get_diff_info(a_branch.repository, last_rev,
206
                                     alternate_rev)
207
            else:
208
                info = get_info(a_branch.repository, last_rev)
209
        finally:
210
            a_branch.unlock()
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
211
        display_info(info, self.outf)
0.140.1 by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more.
212
213
214
bzrlib.commands.register_command(cmd_statistics)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
215
216
217
class cmd_ancestor_growth(bzrlib.commands.Command):
218
    """Figure out the ancestor graph for LOCATION"""
219
220
    takes_args = ['location?']
221
222
    encoding_type = 'replace'
223
224
    def run(self, location='.'):
225
        try:
226
            wt = WorkingTree.open_containing(location)[0]
227
        except errors.NoWorkingTree:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
228
            a_branch = Branch.open(location)
229
            last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
230
        else:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
231
            a_branch = wt.branch
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
232
            last_rev = wt.last_revision()
233
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
234
        a_branch.lock_read()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
235
        try:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
236
            graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
237
        finally:
0.140.6 by John Arbash Meinel
refactor in preparation for supporting 2 revision specs
238
            a_branch.unlock()
0.140.4 by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors.
239
240
        revno = 0
241
        cur_parents = 0
242
        sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
243
        for num, node_name, depth, isend in reversed(sorted_graph):
244
            cur_parents += 1
245
            if depth == 0:
246
                revno += 1
247
                self.outf.write('%4d, %4d\n' % (revno, cur_parents))
248
249
250
bzrlib.commands.register_command(cmd_ancestor_growth)