bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
1 |
"""A Simple bzr plugin to generate statistics about the history."""
|
2 |
||
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
3 |
import re |
4 |
||
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
5 |
from bzrlib import errors, tsort |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
6 |
from bzrlib.branch import Branch |
7 |
import bzrlib.commands |
|
8 |
from bzrlib.config import extract_email_address |
|
9 |
from bzrlib.workingtree import WorkingTree |
|
10 |
||
11 |
||
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
12 |
_fullname_re = re.compile(r'(?P<fullname>.*?)\s*<') |
13 |
||
14 |
def extract_fullname(committer): |
|
15 |
"""Try to get the user's name from their committer info.""" |
|
16 |
m = _fullname_re.match(committer) |
|
17 |
if m: |
|
18 |
return m.group('fullname') |
|
19 |
try: |
|
20 |
email = extract_email_address(committer) |
|
21 |
except errors.BzrError: |
|
22 |
return committer |
|
23 |
else: |
|
24 |
# We found an email address, but not a fullname
|
|
25 |
# so there is no fullname
|
|
26 |
return '' |
|
27 |
||
28 |
||
29 |
def find_fullnames(lst): |
|
30 |
"""Find the fullnames for a list committer names.""" |
|
31 |
||
32 |
counts = {} |
|
33 |
for committer in lst: |
|
34 |
fullname = extract_fullname(committer) |
|
35 |
counts.setdefault(fullname, 0) |
|
36 |
counts[fullname] += 1 |
|
37 |
return sorted(((count, name) for name,count in counts.iteritems()), reverse=True) |
|
38 |
||
39 |
||
40 |
def collapse_by_author(committers): |
|
41 |
"""The committers list is sorted by email, fix it up by author. |
|
42 |
||
43 |
Some people commit with a similar username, but different email
|
|
44 |
address. Which makes it hard to sort out when they have multiple
|
|
45 |
entries. Email is actually more stable, though, since people
|
|
46 |
frequently forget to set their name properly.
|
|
47 |
||
48 |
So take the most common username for each email address, and
|
|
49 |
combine them into one new list.
|
|
50 |
"""
|
|
51 |
# Just an indirection so that multiple names can reference
|
|
52 |
# the same record information
|
|
53 |
name_to_counter = {} |
|
54 |
# indirection back to real information
|
|
55 |
# [[full_rev_list], {email:count}, {fname:count}]
|
|
56 |
counter_to_info = {} |
|
57 |
counter = 0 |
|
58 |
for email, revs in committers.iteritems(): |
|
59 |
fullnames = find_fullnames(rev.committer for rev in revs) |
|
60 |
match = None |
|
61 |
for count, fullname in fullnames: |
|
62 |
if fullname and fullname in name_to_counter: |
|
63 |
# We found a match
|
|
64 |
match = name_to_counter[fullname] |
|
65 |
break
|
|
66 |
||
67 |
if match: |
|
68 |
# One of the names matched, we need to collapse to records
|
|
69 |
record = counter_to_info[match] |
|
70 |
record[0].extend(revs) |
|
71 |
record[1][email] = len(revs) |
|
72 |
for count, fullname in fullnames: |
|
73 |
name_to_counter[fullname] = match |
|
74 |
record[2].setdefault(fullname, 0) |
|
75 |
record[2][fullname] += count |
|
76 |
else: |
|
77 |
# just add this one to the list
|
|
78 |
counter += 1 |
|
79 |
for count, fullname in fullnames: |
|
80 |
if fullname: |
|
81 |
name_to_counter[fullname] = counter |
|
82 |
fname_map = dict((fullname, count) for count, fullname in fullnames) |
|
83 |
counter_to_info[counter] = [revs, {email:len(revs)}, fname_map] |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
84 |
return sorted(((len(revs), revs, email, fname) |
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
85 |
for revs, email, fname in counter_to_info.values()), reverse=True) |
86 |
||
87 |
||
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
88 |
def get_info(a_repo, revision): |
89 |
"""Get all of the information for a particular revision""" |
|
90 |
pb = bzrlib.ui.ui_factory.nested_progress_bar() |
|
91 |
committers = {} |
|
92 |
a_repo.lock_read() |
|
93 |
try: |
|
94 |
pb.note('getting ancestry') |
|
95 |
ancestry = a_repo.get_ancestry(revision)[1:] |
|
96 |
pb.note('getting revisions') |
|
97 |
revisions = a_repo.get_revisions(ancestry) |
|
98 |
||
99 |
for count, rev in enumerate(revisions): |
|
100 |
pb.update('checking', count, len(ancestry)) |
|
101 |
try: |
|
102 |
email = extract_email_address(rev.committer) |
|
103 |
except errors.BzrError: |
|
104 |
email = rev.committer |
|
105 |
committers.setdefault(email, []).append(rev) |
|
106 |
finally: |
|
107 |
a_repo.unlock() |
|
108 |
pb.finished() |
|
109 |
||
110 |
info = collapse_by_author(committers) |
|
111 |
return info |
|
112 |
||
113 |
||
|
0.140.7
by John Arbash Meinel
Compute the revisions using a difference check |
114 |
def get_diff_info(a_repo, start_rev, end_rev): |
115 |
"""Get only the info for new revisions between the two revisions |
|
116 |
|
|
117 |
This lets us figure out what has actually changed between 2 revisions.
|
|
118 |
"""
|
|
119 |
pb = bzrlib.ui.ui_factory.nested_progress_bar() |
|
120 |
committers = {} |
|
121 |
a_repo.lock_read() |
|
122 |
try: |
|
123 |
pb.note('getting ancestry 1') |
|
124 |
start_ancestry = set(a_repo.get_ancestry(start_rev)) |
|
125 |
pb.note('getting ancestry 2') |
|
126 |
ancestry = a_repo.get_ancestry(end_rev)[1:] |
|
127 |
ancestry = [rev for rev in ancestry if rev not in start_ancestry] |
|
128 |
pb.note('getting revisions') |
|
129 |
revisions = a_repo.get_revisions(ancestry) |
|
130 |
||
131 |
for count, rev in enumerate(revisions): |
|
132 |
pb.update('checking', count, len(ancestry)) |
|
133 |
try: |
|
134 |
email = extract_email_address(rev.committer) |
|
135 |
except errors.BzrError: |
|
136 |
email = rev.committer |
|
137 |
committers.setdefault(email, []).append(rev) |
|
138 |
finally: |
|
139 |
a_repo.unlock() |
|
140 |
pb.finished() |
|
141 |
||
142 |
info = collapse_by_author(committers) |
|
143 |
return info |
|
144 |
||
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
145 |
def display_info(info, to_file): |
146 |
"""Write out the information""" |
|
147 |
||
148 |
for count, revs, emails, fullnames in info: |
|
149 |
# Get the most common email name
|
|
150 |
sorted_emails = sorted(((count, email) |
|
151 |
for email,count in emails.iteritems()), |
|
152 |
reverse=True) |
|
153 |
sorted_fullnames = sorted(((count, fullname) |
|
154 |
for fullname,count in fullnames.iteritems()), |
|
155 |
reverse=True) |
|
156 |
to_file.write('%4d %s <%s>\n' |
|
157 |
% (count, sorted_fullnames[0][1], |
|
158 |
sorted_emails[0][1])) |
|
159 |
if len(sorted_fullnames) > 1: |
|
160 |
print ' Other names:' |
|
161 |
for count, fname in sorted_fullnames[1:]: |
|
162 |
to_file.write(' %4d ' % (count,)) |
|
163 |
if fname == '': |
|
164 |
to_file.write("''\n") |
|
165 |
else: |
|
166 |
to_file.write("%s\n" % (fname,)) |
|
167 |
if len(sorted_emails) > 1: |
|
168 |
print ' Other email addresses:' |
|
169 |
for count, email in sorted_emails: |
|
170 |
to_file.write(' %4d ' % (count,)) |
|
171 |
if email == '': |
|
172 |
to_file.write("''\n") |
|
173 |
else: |
|
174 |
to_file.write("%s\n" % (email,)) |
|
175 |
||
176 |
||
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
177 |
class cmd_statistics(bzrlib.commands.Command): |
178 |
"""Generate statistics for LOCATION.""" |
|
179 |
||
180 |
aliases = ['stats'] |
|
181 |
takes_args = ['location?'] |
|
|
0.140.5
by John Arbash Meinel
Allow to specify an exact revision to generate ancestry. |
182 |
takes_options = ['revision'] |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
183 |
|
|
0.140.3
by John Arbash Meinel
Updated to combine by author name, as well as by email address, and report on multiple names/addresses |
184 |
encoding_type = 'replace' |
185 |
||
|
0.140.5
by John Arbash Meinel
Allow to specify an exact revision to generate ancestry. |
186 |
def run(self, location='.', revision=None): |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
187 |
alternate_rev = None |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
188 |
try: |
189 |
wt = WorkingTree.open_containing(location)[0] |
|
190 |
except errors.NoWorkingTree: |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
191 |
a_branch = Branch.open(location) |
192 |
last_rev = a_branch.last_revision() |
|
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
193 |
else: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
194 |
a_branch = wt.branch |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
195 |
last_rev = wt.last_revision() |
|
0.140.7
by John Arbash Meinel
Compute the revisions using a difference check |
196 |
|
|
0.140.8
by John Arbash Meinel
Allow branch: to work, which needs a write lock |
197 |
if revision is not None: |
198 |
last_rev = revision[0].in_history(a_branch).rev_id |
|
199 |
if len(revision) > 1: |
|
200 |
alternate_rev = revision[1].in_history(a_branch).rev_id |
|
201 |
||
|
0.140.7
by John Arbash Meinel
Compute the revisions using a difference check |
202 |
a_branch.lock_read() |
203 |
try: |
|
204 |
if alternate_rev: |
|
205 |
info = get_diff_info(a_branch.repository, last_rev, |
|
206 |
alternate_rev) |
|
207 |
else: |
|
208 |
info = get_info(a_branch.repository, last_rev) |
|
209 |
finally: |
|
210 |
a_branch.unlock() |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
211 |
display_info(info, self.outf) |
|
0.140.1
by John Arbash Meinel
A simple plugin for generating author statistics, may grow into more. |
212 |
|
213 |
||
214 |
bzrlib.commands.register_command(cmd_statistics) |
|
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
215 |
|
216 |
||
217 |
class cmd_ancestor_growth(bzrlib.commands.Command): |
|
218 |
"""Figure out the ancestor graph for LOCATION""" |
|
219 |
||
220 |
takes_args = ['location?'] |
|
221 |
||
222 |
encoding_type = 'replace' |
|
223 |
||
224 |
def run(self, location='.'): |
|
225 |
try: |
|
226 |
wt = WorkingTree.open_containing(location)[0] |
|
227 |
except errors.NoWorkingTree: |
|
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
228 |
a_branch = Branch.open(location) |
229 |
last_rev = a_branch.last_revision() |
|
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
230 |
else: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
231 |
a_branch = wt.branch |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
232 |
last_rev = wt.last_revision() |
233 |
||
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
234 |
a_branch.lock_read() |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
235 |
try: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
236 |
graph = a_branch.repository.get_revision_graph(last_rev) |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
237 |
finally: |
|
0.140.6
by John Arbash Meinel
refactor in preparation for supporting 2 revision specs |
238 |
a_branch.unlock() |
|
0.140.4
by John Arbash Meinel
added ancestry_growth to generate a csv of ancestors. |
239 |
|
240 |
revno = 0 |
|
241 |
cur_parents = 0 |
|
242 |
sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev) |
|
243 |
for num, node_name, depth, isend in reversed(sorted_graph): |
|
244 |
cur_parents += 1 |
|
245 |
if depth == 0: |
|
246 |
revno += 1 |
|
247 |
self.outf.write('%4d, %4d\n' % (revno, cur_parents)) |
|
248 |
||
249 |
||
250 |
bzrlib.commands.register_command(cmd_ancestor_growth) |