1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
31
from ...revision import NULL_REVISION
32
from .classify import classify_delta
35
def collapse_by_person(revisions, canonical_committer):
36
"""The committers list is sorted by email, fix it up by person.
38
Some people commit with a similar username, but different email
39
address. Which makes it hard to sort out when they have multiple
40
entries. Email is actually more stable, though, since people
41
frequently forget to set their name properly.
43
So take the most common username for each email address, and
44
combine them into one new list.
46
# Map from canonical committer to
47
# {committer: ([rev_list], {email: count}, {fname:count})}
48
committer_to_info = {}
50
authors = rev.get_apparent_authors()
51
for author in authors:
52
username, email = config.parse_username(author)
53
if len(username) == 0 and len(email) == 0:
55
canon_author = canonical_committer[(username, email)]
56
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
58
info[1][email] = info[1].setdefault(email, 0) + 1
59
info[2][username] = info[2].setdefault(username, 0) + 1
60
res = [(len(revs), revs, emails, fnames)
61
for revs, emails, fnames in committer_to_info.values()]
62
res.sort(reverse=True)
66
def collapse_email_and_users(email_users, combo_count):
67
"""Combine the mapping of User Name to email and email to User Name.
69
If a given User Name is used for multiple emails, try to map it all to one
77
def collapse_ids(old_id, new_id, new_combos):
78
old_combos = id_to_combos.pop(old_id)
79
new_combos.update(old_combos)
80
for old_user, old_email in old_combos:
81
if (old_user and old_user != user):
82
low_old_user = old_user.lower()
83
old_user_id = username_to_id[low_old_user]
84
assert old_user_id in (old_id, new_id)
85
username_to_id[low_old_user] = new_id
86
if (old_email and old_email != email):
87
old_email_id = email_to_id[old_email]
88
assert old_email_id in (old_id, new_id)
89
email_to_id[old_email] = cur_id
90
for email, usernames in email_users.items():
91
assert email not in email_to_id
93
# We use a different algorithm for usernames that have no email
94
# address, we just try to match by username, and not at all by
96
for user in usernames:
98
continue # The mysterious ('', '') user
99
# When mapping, use case-insensitive names
100
low_user = user.lower()
101
user_id = username_to_id.get(low_user)
105
username_to_id[low_user] = user_id
106
id_to_combos[user_id] = id_combos = set()
108
id_combos = id_to_combos[user_id]
109
id_combos.add((user, email))
114
id_to_combos[cur_id] = id_combos = set()
115
email_to_id[email] = cur_id
117
for user in usernames:
118
combo = (user, email)
121
# We don't match on empty usernames
123
low_user = user.lower()
124
user_id = username_to_id.get(low_user)
125
if user_id is not None:
126
# This UserName was matched to an cur_id
127
if user_id != cur_id:
128
# And it is a different identity than the current email
129
collapse_ids(user_id, cur_id, id_combos)
130
username_to_id[low_user] = cur_id
131
combo_to_best_combo = {}
132
for cur_id, combos in id_to_combos.items():
133
best_combo = sorted(combos,
134
key=lambda x:combo_count[x],
137
combo_to_best_combo[combo] = best_combo
138
return combo_to_best_combo
141
def get_revisions_and_committers(a_repo, revids):
142
"""Get the Revision information, and the best-match for committer."""
144
email_users = {} # user@email.com => User Name
146
pb = ui.ui_factory.nested_progress_bar()
148
trace.note('getting revisions')
149
revisions = a_repo.iter_revisions(revids)
150
for count, (revid, rev) in enumerate(revisions):
151
pb.update('checking', count, len(revids))
152
for author in rev.get_apparent_authors():
153
# XXX: There is a chance sometimes with svn imports that the
154
# full name and email can BOTH be blank.
155
username, email = config.parse_username(author)
156
email_users.setdefault(email, set()).add(username)
157
combo = (username, email)
158
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
161
return revisions, collapse_email_and_users(email_users, combo_count)
164
def get_info(a_repo, revision):
165
"""Get all of the information for a particular revision"""
166
pb = ui.ui_factory.nested_progress_bar()
169
trace.note('getting ancestry')
170
graph = a_repo.get_graph()
172
r for (r, ps) in graph.iter_ancestry([revision])
173
if ps is not None and r != NULL_REVISION]
174
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
179
return collapse_by_person(revs, canonical_committer)
182
def get_diff_info(a_repo, start_rev, end_rev):
183
"""Get only the info for new revisions between the two revisions
185
This lets us figure out what has actually changed between 2 revisions.
187
pb = ui.ui_factory.nested_progress_bar()
190
graph = a_repo.get_graph()
191
trace.note('getting ancestry diff')
192
ancestry = graph.find_difference(start_rev, end_rev)[1]
193
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
198
return collapse_by_person(revs, canonical_committer)
201
def display_info(info, to_file, gather_class_stats=None):
202
"""Write out the information"""
204
for count, revs, emails, fullnames in info:
205
# Get the most common email name
206
sorted_emails = sorted(((count, email)
207
for email, count in emails.items()),
209
sorted_fullnames = sorted(((count, fullname)
210
for fullname, count in fullnames.items()),
212
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
213
to_file.write('%4d %s\n'
214
% (count, 'Unknown'))
216
to_file.write('%4d %s <%s>\n'
217
% (count, sorted_fullnames[0][1],
218
sorted_emails[0][1]))
219
if len(sorted_fullnames) > 1:
220
to_file.write(' Other names:\n')
221
for count, fname in sorted_fullnames:
222
to_file.write(' %4d ' % (count,))
224
to_file.write("''\n")
226
to_file.write("%s\n" % (fname,))
227
if len(sorted_emails) > 1:
228
to_file.write(' Other email addresses:\n')
229
for count, email in sorted_emails:
230
to_file.write(' %4d ' % (count,))
232
to_file.write("''\n")
234
to_file.write("%s\n" % (email,))
235
if gather_class_stats is not None:
236
to_file.write(' Contributions:\n')
237
classes, total = gather_class_stats(revs)
238
for name, count in sorted(classes.items(), key=classify_key):
241
to_file.write(" %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
244
class cmd_committer_statistics(commands.Command):
245
"""Generate statistics for LOCATION."""
247
aliases = ['stats', 'committer-stats']
248
takes_args = ['location?']
249
takes_options = ['revision',
250
option.Option('show-class', help="Show the class of contributions.")]
252
encoding_type = 'replace'
254
def run(self, location='.', revision=None, show_class=False):
257
wt = workingtree.WorkingTree.open_containing(location)[0]
258
except errors.NoWorkingTree:
259
a_branch = branch.Branch.open(location)
260
last_rev = a_branch.last_revision()
263
last_rev = wt.last_revision()
265
if revision is not None:
266
last_rev = revision[0].in_history(a_branch).rev_id
267
if len(revision) > 1:
268
alternate_rev = revision[1].in_history(a_branch).rev_id
270
with a_branch.lock_read():
272
info = get_diff_info(a_branch.repository, last_rev,
275
info = get_info(a_branch.repository, last_rev)
277
def fetch_class_stats(revs):
278
return gather_class_stats(a_branch.repository, revs)
280
fetch_class_stats = None
281
display_info(info, self.outf, fetch_class_stats)
284
class cmd_ancestor_growth(commands.Command):
285
"""Figure out the ancestor graph for LOCATION"""
287
takes_args = ['location?']
289
encoding_type = 'replace'
291
def run(self, location='.'):
293
wt = workingtree.WorkingTree.open_containing(location)[0]
294
except errors.NoWorkingTree:
295
a_branch = branch.Branch.open(location)
296
last_rev = a_branch.last_revision()
299
last_rev = wt.last_revision()
301
with a_branch.lock_read():
302
graph = a_branch.repository.get_graph()
305
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
307
for num, node_name, depth, isend in reversed(sorted_graph):
311
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
314
def gather_class_stats(repository, revs):
317
pb = ui.ui_factory.nested_progress_bar()
319
with repository.lock_read():
321
for delta in repository.get_deltas_for_revisions(revs):
322
pb.update("classifying commits", i, len(revs))
323
for c in classify_delta(delta):
334
def classify_key(item):
335
"""Sort key for item of (author, count) from classify_delta."""
336
return -item[1], item[0]
339
def display_credits(credits, to_file):
340
(coders, documenters, artists, translators) = credits
341
def print_section(name, lst):
344
to_file.write("%s:\n" % name)
346
to_file.write("%s\n" % name)
348
print_section("Code", coders)
349
print_section("Documentation", documenters)
350
print_section("Art", artists)
351
print_section("Translations", translators)
354
def find_credits(repository, revid):
355
"""Find the credits of the contributors to a revision.
357
:return: tuple with (authors, documenters, artists, translators)
359
ret = {"documentation": {},
365
with repository.lock_read():
366
graph = repository.get_graph()
367
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
368
if ps is not None and r != NULL_REVISION]
369
revs = repository.get_revisions(ancestry)
370
pb = ui.ui_factory.nested_progress_bar()
372
iterator = zip(revs, repository.get_deltas_for_revisions(revs))
373
for i, (rev, delta) in enumerate(iterator):
374
pb.update("analysing revisions", i, len(revs))
376
if len(rev.parent_ids) > 1:
378
for c in set(classify_delta(delta)):
379
for author in rev.get_apparent_authors():
380
if not author in ret[c]:
385
def sort_class(name):
387
for author, _ in sorted(ret[name].items(), key=classify_key)]
388
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
391
class cmd_credits(commands.Command):
392
"""Determine credits for LOCATION."""
394
takes_args = ['location?']
395
takes_options = ['revision']
397
encoding_type = 'replace'
399
def run(self, location='.', revision=None):
401
wt = workingtree.WorkingTree.open_containing(location)[0]
402
except errors.NoWorkingTree:
403
a_branch = branch.Branch.open(location)
404
last_rev = a_branch.last_revision()
407
last_rev = wt.last_revision()
409
if revision is not None:
410
last_rev = revision[0].in_history(a_branch).rev_id
412
with a_branch.lock_read():
413
credits = find_credits(a_branch.repository, last_rev)
414
display_credits(credits, self.outf)