1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
31
from ...revision import NULL_REVISION
32
from .classify import classify_delta
35
def collapse_by_person(revisions, canonical_committer):
36
"""The committers list is sorted by email, fix it up by person.
38
Some people commit with a similar username, but different email
39
address. Which makes it hard to sort out when they have multiple
40
entries. Email is actually more stable, though, since people
41
frequently forget to set their name properly.
43
So take the most common username for each email address, and
44
combine them into one new list.
46
# Map from canonical committer to
47
# {committer: ([rev_list], {email: count}, {fname:count})}
48
committer_to_info = {}
50
authors = rev.get_apparent_authors()
51
for author in authors:
52
username, email = config.parse_username(author)
53
if len(username) == 0 and len(email) == 0:
55
canon_author = canonical_committer[(username, email)]
56
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
58
info[1][email] = info[1].setdefault(email, 0) + 1
59
info[2][username] = info[2].setdefault(username, 0) + 1
60
res = [(len(revs), revs, emails, fnames)
61
for revs, emails, fnames in committer_to_info.values()]
62
res.sort(reverse=True)
66
def collapse_email_and_users(email_users, combo_count):
67
"""Combine the mapping of User Name to email and email to User Name.
69
If a given User Name is used for multiple emails, try to map it all to one
77
def collapse_ids(old_id, new_id, new_combos):
78
old_combos = id_to_combos.pop(old_id)
79
new_combos.update(old_combos)
80
for old_user, old_email in old_combos:
81
if (old_user and old_user != user):
82
low_old_user = old_user.lower()
83
old_user_id = username_to_id[low_old_user]
84
assert old_user_id in (old_id, new_id)
85
username_to_id[low_old_user] = new_id
86
if (old_email and old_email != email):
87
old_email_id = email_to_id[old_email]
88
assert old_email_id in (old_id, new_id)
89
email_to_id[old_email] = cur_id
90
for email, usernames in email_users.items():
91
assert email not in email_to_id
93
# We use a different algorithm for usernames that have no email
94
# address, we just try to match by username, and not at all by
96
for user in usernames:
98
continue # The mysterious ('', '') user
99
# When mapping, use case-insensitive names
100
low_user = user.lower()
101
user_id = username_to_id.get(low_user)
105
username_to_id[low_user] = user_id
106
id_to_combos[user_id] = id_combos = set()
108
id_combos = id_to_combos[user_id]
109
id_combos.add((user, email))
114
id_to_combos[cur_id] = id_combos = set()
115
email_to_id[email] = cur_id
117
for user in usernames:
118
combo = (user, email)
121
# We don't match on empty usernames
123
low_user = user.lower()
124
user_id = username_to_id.get(low_user)
125
if user_id is not None:
126
# This UserName was matched to an cur_id
127
if user_id != cur_id:
128
# And it is a different identity than the current email
129
collapse_ids(user_id, cur_id, id_combos)
130
username_to_id[low_user] = cur_id
131
combo_to_best_combo = {}
132
for cur_id, combos in id_to_combos.items():
133
best_combo = sorted(combos,
134
key=lambda x:combo_count[x],
137
combo_to_best_combo[combo] = best_combo
138
return combo_to_best_combo
141
def get_revisions_and_committers(a_repo, revids):
142
"""Get the Revision information, and the best-match for committer."""
144
email_users = {} # user@email.com => User Name
146
pb = ui.ui_factory.nested_progress_bar()
148
trace.note('getting revisions')
149
revisions = a_repo.iter_revisions(revids)
150
for count, (revid, rev) in enumerate(revisions):
151
pb.update('checking', count, len(revids))
152
for author in rev.get_apparent_authors():
153
# XXX: There is a chance sometimes with svn imports that the
154
# full name and email can BOTH be blank.
155
username, email = config.parse_username(author)
156
email_users.setdefault(email, set()).add(username)
157
combo = (username, email)
158
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
161
return revisions, collapse_email_and_users(email_users, combo_count)
164
def get_info(a_repo, revision):
165
"""Get all of the information for a particular revision"""
166
pb = ui.ui_factory.nested_progress_bar()
169
trace.note('getting ancestry')
170
graph = a_repo.get_graph()
172
r for (r, ps) in graph.iter_ancestry([revision])
173
if ps is not None and r != NULL_REVISION]
174
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
179
return collapse_by_person(revs, canonical_committer)
182
def get_diff_info(a_repo, start_rev, end_rev):
183
"""Get only the info for new revisions between the two revisions
185
This lets us figure out what has actually changed between 2 revisions.
187
pb = ui.ui_factory.nested_progress_bar()
190
graph = a_repo.get_graph()
191
trace.note('getting ancestry diff')
192
ancestry = graph.find_difference(start_rev, end_rev)[1]
193
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
198
return collapse_by_person(revs, canonical_committer)
201
def display_info(info, to_file, gather_class_stats=None):
202
"""Write out the information"""
204
for count, revs, emails, fullnames in info:
205
# Get the most common email name
206
sorted_emails = sorted(((count, email)
207
for email, count in emails.items()),
209
sorted_fullnames = sorted(((count, fullname)
210
for fullname, count in fullnames.items()),
212
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
213
to_file.write('%4d %s\n'
214
% (count, 'Unknown'))
216
to_file.write('%4d %s <%s>\n'
217
% (count, sorted_fullnames[0][1],
218
sorted_emails[0][1]))
219
if len(sorted_fullnames) > 1:
220
to_file.write(' Other names:\n')
221
for count, fname in sorted_fullnames:
222
to_file.write(' %4d ' % (count,))
224
to_file.write("''\n")
226
to_file.write("%s\n" % (fname,))
227
if len(sorted_emails) > 1:
228
to_file.write(' Other email addresses:\n')
229
for count, email in sorted_emails:
230
to_file.write(' %4d ' % (count,))
232
to_file.write("''\n")
234
to_file.write("%s\n" % (email,))
235
if gather_class_stats is not None:
236
to_file.write(' Contributions:\n')
237
classes, total = gather_class_stats(revs)
238
for name, count in sorted(classes.items(), key=classify_key):
241
to_file.write(" %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
244
class cmd_committer_statistics(commands.Command):
245
"""Generate statistics for LOCATION."""
247
aliases = ['stats', 'committer-stats']
248
takes_args = ['location?']
249
takes_options = ['revision',
250
option.Option('show-class', help="Show the class of contributions.")]
252
encoding_type = 'replace'
254
def run(self, location='.', revision=None, show_class=False):
257
wt = workingtree.WorkingTree.open_containing(location)[0]
258
except errors.NoWorkingTree:
259
a_branch = branch.Branch.open(location)
260
last_rev = a_branch.last_revision()
263
last_rev = wt.last_revision()
265
if revision is not None:
266
last_rev = revision[0].in_history(a_branch).rev_id
267
if len(revision) > 1:
268
alternate_rev = revision[1].in_history(a_branch).rev_id
273
info = get_diff_info(a_branch.repository, last_rev,
276
info = get_info(a_branch.repository, last_rev)
280
def fetch_class_stats(revs):
281
return gather_class_stats(a_branch.repository, revs)
283
fetch_class_stats = None
284
display_info(info, self.outf, fetch_class_stats)
287
class cmd_ancestor_growth(commands.Command):
288
"""Figure out the ancestor graph for LOCATION"""
290
takes_args = ['location?']
292
encoding_type = 'replace'
294
def run(self, location='.'):
296
wt = workingtree.WorkingTree.open_containing(location)[0]
297
except errors.NoWorkingTree:
298
a_branch = branch.Branch.open(location)
299
last_rev = a_branch.last_revision()
302
last_rev = wt.last_revision()
306
graph = a_branch.repository.get_graph()
309
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
311
for num, node_name, depth, isend in reversed(sorted_graph):
315
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
320
def gather_class_stats(repository, revs):
323
pb = ui.ui_factory.nested_progress_bar()
325
repository.lock_read()
328
for delta in repository.get_deltas_for_revisions(revs):
329
pb.update("classifying commits", i, len(revs))
330
for c in classify_delta(delta):
343
def classify_key(item):
344
"""Sort key for item of (author, count) from classify_delta."""
345
return -item[1], item[0]
348
def display_credits(credits, to_file):
349
(coders, documenters, artists, translators) = credits
350
def print_section(name, lst):
353
to_file.write("%s:\n" % name)
355
to_file.write("%s\n" % name)
357
print_section("Code", coders)
358
print_section("Documentation", documenters)
359
print_section("Art", artists)
360
print_section("Translations", translators)
363
def find_credits(repository, revid):
364
"""Find the credits of the contributors to a revision.
366
:return: tuple with (authors, documenters, artists, translators)
368
ret = {"documentation": {},
374
repository.lock_read()
376
graph = repository.get_graph()
377
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
378
if ps is not None and r != NULL_REVISION]
379
revs = repository.get_revisions(ancestry)
380
pb = ui.ui_factory.nested_progress_bar()
382
iterator = zip(revs, repository.get_deltas_for_revisions(revs))
383
for i, (rev,delta) in enumerate(iterator):
384
pb.update("analysing revisions", i, len(revs))
386
if len(rev.parent_ids) > 1:
388
for c in set(classify_delta(delta)):
389
for author in rev.get_apparent_authors():
390
if not author in ret[c]:
397
def sort_class(name):
399
for author, _ in sorted(ret[name].items(), key=classify_key)]
400
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
403
class cmd_credits(commands.Command):
404
"""Determine credits for LOCATION."""
406
takes_args = ['location?']
407
takes_options = ['revision']
409
encoding_type = 'replace'
411
def run(self, location='.', revision=None):
413
wt = workingtree.WorkingTree.open_containing(location)[0]
414
except errors.NoWorkingTree:
415
a_branch = branch.Branch.open(location)
416
last_rev = a_branch.last_revision()
419
last_rev = wt.last_revision()
421
if revision is not None:
422
last_rev = revision[0].in_history(a_branch).rev_id
426
credits = find_credits(a_branch.repository, last_rev)
427
display_credits(credits, self.outf)