bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
1 |
"""A Simple bzr plugin to generate statistics about the history."""
|
2 |
||
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
3 |
import re |
4 |
||
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
5 |
from bzrlib import errors, tsort |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
6 |
from bzrlib.branch import Branch |
7 |
import bzrlib.commands |
|
8 |
from bzrlib.config import extract_email_address |
|
9 |
from bzrlib.workingtree import WorkingTree |
|
10 |
||
11 |
||
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
12 |
_fullname_re = re.compile(r'(?P<fullname>.*?)\s*<') |
13 |
||
14 |
def extract_fullname(committer): |
|
15 |
"""Try to get the user's name from their committer info.""" |
|
16 |
m = _fullname_re.match(committer) |
|
17 |
if m: |
|
18 |
return m.group('fullname') |
|
19 |
try: |
|
20 |
email = extract_email_address(committer) |
|
21 |
except errors.BzrError: |
|
22 |
return committer |
|
23 |
else: |
|
24 |
# We found an email address, but not a fullname
|
|
25 |
# so there is no fullname
|
|
26 |
return '' |
|
27 |
||
28 |
||
29 |
def find_fullnames(lst): |
|
30 |
"""Find the fullnames for a list committer names.""" |
|
31 |
||
32 |
counts = {} |
|
33 |
for committer in lst: |
|
34 |
fullname = extract_fullname(committer) |
|
35 |
counts.setdefault(fullname, 0) |
|
36 |
counts[fullname] += 1 |
|
37 |
return sorted(((count, name) for name,count in counts.iteritems()), reverse=True) |
|
38 |
||
39 |
||
40 |
def collapse_by_author(committers): |
|
41 |
"""The committers list is sorted by email, fix it up by author. |
|
42 |
||
43 |
Some people commit with a similar username, but different email
|
|
44 |
address. Which makes it hard to sort out when they have multiple
|
|
45 |
entries. Email is actually more stable, though, since people
|
|
46 |
frequently forget to set their name properly.
|
|
47 |
||
48 |
So take the most common username for each email address, and
|
|
49 |
combine them into one new list.
|
|
50 |
"""
|
|
51 |
# Just an indirection so that multiple names can reference
|
|
52 |
# the same record information
|
|
53 |
name_to_counter = {} |
|
54 |
# indirection back to real information
|
|
55 |
# [[full_rev_list], {email:count}, {fname:count}]
|
|
56 |
counter_to_info = {} |
|
57 |
counter = 0 |
|
58 |
for email, revs in committers.iteritems(): |
|
59 |
fullnames = find_fullnames(rev.committer for rev in revs) |
|
60 |
match = None |
|
61 |
for count, fullname in fullnames: |
|
62 |
if fullname and fullname in name_to_counter: |
|
63 |
# We found a match
|
|
64 |
match = name_to_counter[fullname] |
|
65 |
break
|
|
66 |
||
67 |
if match: |
|
68 |
# One of the names matched, we need to collapse to records
|
|
69 |
record = counter_to_info[match] |
|
70 |
record[0].extend(revs) |
|
71 |
record[1][email] = len(revs) |
|
72 |
for count, fullname in fullnames: |
|
73 |
name_to_counter[fullname] = match |
|
74 |
record[2].setdefault(fullname, 0) |
|
75 |
record[2][fullname] += count |
|
76 |
else: |
|
77 |
# just add this one to the list
|
|
78 |
counter += 1 |
|
79 |
for count, fullname in fullnames: |
|
80 |
if fullname: |
|
81 |
name_to_counter[fullname] = counter |
|
82 |
fname_map = dict((fullname, count) for count, fullname in fullnames) |
|
83 |
counter_to_info[counter] = [revs, {email:len(revs)}, fname_map] |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
84 |
return sorted(((len(revs), revs, email, fname) |
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
85 |
for revs, email, fname in counter_to_info.values()), reverse=True) |
86 |
||
87 |
||
|
0.142.2
by Jelmer Vernooij
Split out functionality that sorts revids by commmitter. |
88 |
def sort_by_committer(a_repo, revids): |
89 |
committers = {} |
|
90 |
pb = bzrlib.ui.ui_factory.nested_progress_bar() |
|
91 |
try: |
|
92 |
pb.note('getting revisions') |
|
93 |
revisions = a_repo.get_revisions(revids) |
|
94 |
for count, rev in enumerate(revisions): |
|
95 |
pb.update('checking', count, len(revids)) |
|
96 |
try: |
|
97 |
email = extract_email_address(rev.committer) |
|
98 |
except errors.BzrError: |
|
99 |
email = rev.committer |
|
100 |
committers.setdefault(email, []).append(rev) |
|
101 |
finally: |
|
102 |
pb.finished() |
|
103 |
||
104 |
return committers |
|
105 |
||
106 |
||
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
107 |
def get_info(a_repo, revision): |
108 |
"""Get all of the information for a particular revision""" |
|
109 |
pb = bzrlib.ui.ui_factory.nested_progress_bar() |
|
110 |
a_repo.lock_read() |
|
111 |
try: |
|
112 |
pb.note('getting ancestry') |
|
113 |
ancestry = a_repo.get_ancestry(revision)[1:] |
|
114 |
||
|
0.142.2
by Jelmer Vernooij
Split out functionality that sorts revids by commmitter. |
115 |
committers = sort_by_committer(a_repo, ancestry) |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
116 |
finally: |
117 |
a_repo.unlock() |
|
118 |
pb.finished() |
|
119 |
||
|
0.142.2
by Jelmer Vernooij
Split out functionality that sorts revids by commmitter. |
120 |
return collapse_by_author(committers) |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
121 |
|
122 |
||
|
0.140.7
by John Arbash Meinel
Compute the revisions using a difference check |
123 |
def get_diff_info(a_repo, start_rev, end_rev): |
124 |
"""Get only the info for new revisions between the two revisions |
|
125 |
|
|
126 |
This lets us figure out what has actually changed between 2 revisions.
|
|
127 |
"""
|
|
128 |
pb = bzrlib.ui.ui_factory.nested_progress_bar() |
|
129 |
committers = {} |
|
130 |
a_repo.lock_read() |
|
131 |
try: |
|
132 |
pb.note('getting ancestry 1') |
|
133 |
start_ancestry = set(a_repo.get_ancestry(start_rev)) |
|
134 |
pb.note('getting ancestry 2') |
|
135 |
ancestry = a_repo.get_ancestry(end_rev)[1:] |
|
136 |
ancestry = [rev for rev in ancestry if rev not in start_ancestry] |
|
137 |
pb.note('getting revisions') |
|
138 |
revisions = a_repo.get_revisions(ancestry) |
|
139 |
||
140 |
for count, rev in enumerate(revisions): |
|
141 |
pb.update('checking', count, len(ancestry)) |
|
142 |
try: |
|
143 |
email = extract_email_address(rev.committer) |
|
144 |
except errors.BzrError: |
|
145 |
email = rev.committer |
|
146 |
committers.setdefault(email, []).append(rev) |
|
147 |
finally: |
|
148 |
a_repo.unlock() |
|
149 |
pb.finished() |
|
150 |
||
151 |
info = collapse_by_author(committers) |
|
152 |
return info |
|
153 |
||
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
154 |
def display_info(info, to_file): |
155 |
"""Write out the information""" |
|
156 |
||
157 |
for count, revs, emails, fullnames in info: |
|
158 |
# Get the most common email name
|
|
159 |
sorted_emails = sorted(((count, email) |
|
160 |
for email,count in emails.iteritems()), |
|
161 |
reverse=True) |
|
162 |
sorted_fullnames = sorted(((count, fullname) |
|
163 |
for fullname,count in fullnames.iteritems()), |
|
164 |
reverse=True) |
|
165 |
to_file.write('%4d %s <%s>\n' |
|
166 |
% (count, sorted_fullnames[0][1], |
|
167 |
sorted_emails[0][1])) |
|
168 |
if len(sorted_fullnames) > 1: |
|
169 |
print ' Other names:' |
|
170 |
for count, fname in sorted_fullnames[1:]: |
|
171 |
to_file.write(' %4d ' % (count,)) |
|
172 |
if fname == '': |
|
173 |
to_file.write("''\n") |
|
174 |
else: |
|
175 |
to_file.write("%s\n" % (fname,)) |
|
176 |
if len(sorted_emails) > 1: |
|
177 |
print ' Other email addresses:' |
|
178 |
for count, email in sorted_emails: |
|
179 |
to_file.write(' %4d ' % (count,)) |
|
180 |
if email == '': |
|
181 |
to_file.write("''\n") |
|
182 |
else: |
|
183 |
to_file.write("%s\n" % (email,)) |
|
184 |
||
185 |
||
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
186 |
class cmd_statistics(bzrlib.commands.Command): |
187 |
"""Generate statistics for LOCATION.""" |
|
188 |
||
189 |
aliases = ['stats'] |
|
190 |
takes_args = ['location?'] |
|
|
0.140.5
by John Arbash Meinel
Allow to specify an exact revision to generate ancestry. |
191 |
takes_options = ['revision'] |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
192 |
|
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
193 |
encoding_type = 'replace' |
194 |
||
|
0.140.5
by John Arbash Meinel
Allow to specify an exact revision to generate ancestry. |
195 |
def run(self, location='.', revision=None): |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
196 |
alternate_rev = None |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
197 |
try: |
198 |
wt = WorkingTree.open_containing(location)[0] |
|
199 |
except errors.NoWorkingTree: |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
200 |
a_branch = Branch.open(location) |
201 |
last_rev = a_branch.last_revision() |
|
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
202 |
else: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
203 |
a_branch = wt.branch |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
204 |
last_rev = wt.last_revision() |
|
0.140.7
by John Arbash Meinel
Compute the revisions using a difference check |
205 |
|
|
0.140.8
by John Arbash Meinel
Allow branch: to work, which needs a write lock |
206 |
if revision is not None: |
207 |
last_rev = revision[0].in_history(a_branch).rev_id |
|
208 |
if len(revision) > 1: |
|
209 |
alternate_rev = revision[1].in_history(a_branch).rev_id |
|
210 |
||
|
0.140.7
by John Arbash Meinel
Compute the revisions using a difference check |
211 |
a_branch.lock_read() |
212 |
try: |
|
213 |
if alternate_rev: |
|
214 |
info = get_diff_info(a_branch.repository, last_rev, |
|
215 |
alternate_rev) |
|
216 |
else: |
|
217 |
info = get_info(a_branch.repository, last_rev) |
|
218 |
finally: |
|
219 |
a_branch.unlock() |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
220 |
display_info(info, self.outf) |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
221 |
|
222 |
||
223 |
bzrlib.commands.register_command(cmd_statistics) |
|
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
224 |
|
225 |
||
226 |
class cmd_ancestor_growth(bzrlib.commands.Command): |
|
227 |
"""Figure out the ancestor graph for LOCATION""" |
|
228 |
||
229 |
takes_args = ['location?'] |
|
230 |
||
231 |
encoding_type = 'replace' |
|
232 |
||
233 |
def run(self, location='.'): |
|
234 |
try: |
|
235 |
wt = WorkingTree.open_containing(location)[0] |
|
236 |
except errors.NoWorkingTree: |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
237 |
a_branch = Branch.open(location) |
238 |
last_rev = a_branch.last_revision() |
|
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
239 |
else: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
240 |
a_branch = wt.branch |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
241 |
last_rev = wt.last_revision() |
242 |
||
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
243 |
a_branch.lock_read() |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
244 |
try: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
245 |
graph = a_branch.repository.get_revision_graph(last_rev) |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
246 |
finally: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
247 |
a_branch.unlock() |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
248 |
|
249 |
revno = 0 |
|
250 |
cur_parents = 0 |
|
251 |
sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev) |
|
252 |
for num, node_name, depth, isend in reversed(sorted_graph): |
|
253 |
cur_parents += 1 |
|
254 |
if depth == 0: |
|
255 |
revno += 1 |
|
256 |
self.outf.write('%4d, %4d\n' % (revno, cur_parents)) |
|
257 |
||
258 |
||
259 |
bzrlib.commands.register_command(cmd_ancestor_growth) |
|
|
0.141.1
by Jelmer Vernooij
Add some simple tests for extract_fullname. |
260 |
|
|
0.140.10
by John Arbash Meinel
Minor whitespace cleanup |
261 |
|
|
0.141.1
by Jelmer Vernooij
Add some simple tests for extract_fullname. |
262 |
def test_suite(): |
263 |
from unittest import TestSuite |
|
264 |
from bzrlib.tests import TestLoader |
|
265 |
import test_stats |
|
266 |
suite = TestSuite() |
|
267 |
loader = TestLoader() |
|
268 |
testmod_names = ['test_stats'] |
|
269 |
suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names])) |
|
270 |
return suite |
|
271 |