1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
33
from ...revision import NULL_REVISION
34
from .classify import classify_delta
37
def collapse_by_person(revisions, canonical_committer):
38
"""The committers list is sorted by email, fix it up by person.
40
Some people commit with a similar username, but different email
41
address. Which makes it hard to sort out when they have multiple
42
entries. Email is actually more stable, though, since people
43
frequently forget to set their name properly.
45
So take the most common username for each email address, and
46
combine them into one new list.
48
# Map from canonical committer to
49
# {committer: ([rev_list], {email: count}, {fname:count})}
50
committer_to_info = {}
52
authors = rev.get_apparent_authors()
53
for author in authors:
54
username, email = config.parse_username(author)
55
if len(username) == 0 and len(email) == 0:
57
canon_author = canonical_committer[(username, email)]
58
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
60
info[1][email] = info[1].setdefault(email, 0) + 1
61
info[2][username] = info[2].setdefault(username, 0) + 1
62
res = [(len(revs), revs, emails, fnames)
63
for revs, emails, fnames in committer_to_info.values()]
66
return item[0], list(item[2].keys())
67
res.sort(reverse=True, key=key_fn)
71
def collapse_email_and_users(email_users, combo_count):
72
"""Combine the mapping of User Name to email and email to User Name.
74
If a given User Name is used for multiple emails, try to map it all to one
82
def collapse_ids(old_id, new_id, new_combos):
83
old_combos = id_to_combos.pop(old_id)
84
new_combos.update(old_combos)
85
for old_user, old_email in old_combos:
86
if (old_user and old_user != user):
87
low_old_user = old_user.lower()
88
old_user_id = username_to_id[low_old_user]
89
assert old_user_id in (old_id, new_id)
90
username_to_id[low_old_user] = new_id
91
if (old_email and old_email != email):
92
old_email_id = email_to_id[old_email]
93
assert old_email_id in (old_id, new_id)
94
email_to_id[old_email] = cur_id
95
for email, usernames in email_users.items():
96
assert email not in email_to_id
98
# We use a different algorithm for usernames that have no email
99
# address, we just try to match by username, and not at all by
101
for user in usernames:
103
continue # The mysterious ('', '') user
104
# When mapping, use case-insensitive names
105
low_user = user.lower()
106
user_id = username_to_id.get(low_user)
110
username_to_id[low_user] = user_id
111
id_to_combos[user_id] = id_combos = set()
113
id_combos = id_to_combos[user_id]
114
id_combos.add((user, email))
119
id_to_combos[cur_id] = id_combos = set()
120
email_to_id[email] = cur_id
122
for user in usernames:
123
combo = (user, email)
126
# We don't match on empty usernames
128
low_user = user.lower()
129
user_id = username_to_id.get(low_user)
130
if user_id is not None:
131
# This UserName was matched to an cur_id
132
if user_id != cur_id:
133
# And it is a different identity than the current email
134
collapse_ids(user_id, cur_id, id_combos)
135
username_to_id[low_user] = cur_id
136
combo_to_best_combo = {}
137
for cur_id, combos in id_to_combos.items():
138
best_combo = sorted(combos,
139
key=lambda x: combo_count[x],
142
combo_to_best_combo[combo] = best_combo
143
return combo_to_best_combo
146
def get_revisions_and_committers(a_repo, revids):
147
"""Get the Revision information, and the best-match for committer."""
149
email_users = {} # user@email.com => User Name
151
with ui.ui_factory.nested_progress_bar() as pb:
152
trace.note('getting revisions')
153
revisions = list(a_repo.iter_revisions(revids))
154
for count, (revid, rev) in enumerate(revisions):
155
pb.update('checking', count, len(revids))
156
for author in rev.get_apparent_authors():
157
# XXX: There is a chance sometimes with svn imports that the
158
# full name and email can BOTH be blank.
159
username, email = config.parse_username(author)
160
email_users.setdefault(email, set()).add(username)
161
combo = (username, email)
162
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
163
return ((rev for (revid, rev) in revisions),
164
collapse_email_and_users(email_users, combo_count))
167
def get_info(a_repo, revision):
168
"""Get all of the information for a particular revision"""
169
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
170
trace.note('getting ancestry')
171
graph = a_repo.get_graph()
173
r for (r, ps) in graph.iter_ancestry([revision])
174
if ps is not None and r != NULL_REVISION]
175
revs, canonical_committer = get_revisions_and_committers(
178
return collapse_by_person(revs, canonical_committer)
181
def get_diff_info(a_repo, start_rev, end_rev):
182
"""Get only the info for new revisions between the two revisions
184
This lets us figure out what has actually changed between 2 revisions.
186
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
187
graph = a_repo.get_graph()
188
trace.note('getting ancestry diff')
189
ancestry = graph.find_difference(start_rev, end_rev)[1]
190
revs, canonical_committer = get_revisions_and_committers(
193
return collapse_by_person(revs, canonical_committer)
196
def display_info(info, to_file, gather_class_stats=None):
197
"""Write out the information"""
199
for count, revs, emails, fullnames in info:
200
# Get the most common email name
201
sorted_emails = sorted(((count, email)
202
for email, count in emails.items()),
204
sorted_fullnames = sorted(((count, fullname)
205
for fullname, count in fullnames.items()),
207
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
208
to_file.write('%4d %s\n'
209
% (count, 'Unknown'))
211
to_file.write('%4d %s <%s>\n'
212
% (count, sorted_fullnames[0][1],
213
sorted_emails[0][1]))
214
if len(sorted_fullnames) > 1:
215
to_file.write(' Other names:\n')
216
for count, fname in sorted_fullnames:
217
to_file.write(' %4d ' % (count,))
219
to_file.write("''\n")
221
to_file.write("%s\n" % (fname,))
222
if len(sorted_emails) > 1:
223
to_file.write(' Other email addresses:\n')
224
for count, email in sorted_emails:
225
to_file.write(' %4d ' % (count,))
227
to_file.write("''\n")
229
to_file.write("%s\n" % (email,))
230
if gather_class_stats is not None:
231
to_file.write(' Contributions:\n')
232
classes, total = gather_class_stats(revs)
233
for name, count in sorted(classes.items(), key=classify_key):
236
to_file.write(" %4.0f%% %s\n" %
237
((float(count) / total) * 100.0, name))
240
class cmd_committer_statistics(commands.Command):
241
"""Generate statistics for LOCATION."""
243
aliases = ['stats', 'committer-stats']
244
takes_args = ['location?']
245
takes_options = ['revision',
246
option.Option('show-class', help="Show the class of contributions.")]
248
encoding_type = 'replace'
250
def run(self, location='.', revision=None, show_class=False):
253
wt = workingtree.WorkingTree.open_containing(location)[0]
254
except errors.NoWorkingTree:
255
a_branch = branch.Branch.open(location)
256
last_rev = a_branch.last_revision()
259
last_rev = wt.last_revision()
261
if revision is not None:
262
last_rev = revision[0].in_history(a_branch).rev_id
263
if len(revision) > 1:
264
alternate_rev = revision[1].in_history(a_branch).rev_id
266
with a_branch.lock_read():
268
info = get_diff_info(a_branch.repository, last_rev,
271
info = get_info(a_branch.repository, last_rev)
273
def fetch_class_stats(revs):
274
return gather_class_stats(a_branch.repository, revs)
276
fetch_class_stats = None
277
display_info(info, self.outf, fetch_class_stats)
280
class cmd_ancestor_growth(commands.Command):
281
"""Figure out the ancestor graph for LOCATION"""
283
takes_args = ['location?']
285
encoding_type = 'replace'
289
def run(self, location='.'):
291
wt = workingtree.WorkingTree.open_containing(location)[0]
292
except errors.NoWorkingTree:
293
a_branch = branch.Branch.open(location)
294
last_rev = a_branch.last_revision()
297
last_rev = wt.last_revision()
299
with a_branch.lock_read():
300
graph = a_branch.repository.get_graph()
303
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
305
for num, node_name, depth, isend in reversed(sorted_graph):
309
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
312
def gather_class_stats(repository, revs):
315
with ui.ui_factory.nested_progress_bar() as pb:
316
with repository.lock_read():
318
for delta in repository.get_deltas_for_revisions(revs):
319
pb.update("classifying commits", i, len(revs))
320
for c in classify_delta(delta):
329
def classify_key(item):
330
"""Sort key for item of (author, count) from classify_delta."""
331
return -item[1], item[0]
334
def display_credits(credits, to_file):
335
(coders, documenters, artists, translators) = credits
337
def print_section(name, lst):
340
to_file.write("%s:\n" % name)
342
to_file.write("%s\n" % name)
344
print_section("Code", coders)
345
print_section("Documentation", documenters)
346
print_section("Art", artists)
347
print_section("Translations", translators)
350
def find_credits(repository, revid):
351
"""Find the credits of the contributors to a revision.
353
:return: tuple with (authors, documenters, artists, translators)
355
ret = {"documentation": {},
361
with repository.lock_read():
362
graph = repository.get_graph()
363
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
364
if ps is not None and r != NULL_REVISION]
365
revs = repository.get_revisions(ancestry)
366
with ui.ui_factory.nested_progress_bar() as pb:
367
iterator = zip(revs, repository.get_deltas_for_revisions(revs))
368
for i, (rev, delta) in enumerate(iterator):
369
pb.update("analysing revisions", i, len(revs))
371
if len(rev.parent_ids) > 1:
373
for c in set(classify_delta(delta)):
374
for author in rev.get_apparent_authors():
375
if author not in ret[c]:
379
def sort_class(name):
381
for author, _ in sorted(ret[name].items(), key=classify_key)]
382
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
385
class cmd_credits(commands.Command):
386
"""Determine credits for LOCATION."""
388
takes_args = ['location?']
389
takes_options = ['revision']
391
encoding_type = 'replace'
393
def run(self, location='.', revision=None):
395
wt = workingtree.WorkingTree.open_containing(location)[0]
396
except errors.NoWorkingTree:
397
a_branch = branch.Branch.open(location)
398
last_rev = a_branch.last_revision()
401
last_rev = wt.last_revision()
403
if revision is not None:
404
last_rev = revision[0].in_history(a_branch).rev_id
406
with a_branch.lock_read():
407
credits = find_credits(a_branch.repository, last_rev)
408
display_credits(credits, self.outf)