1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
31
from ...revision import NULL_REVISION
32
from .classify import classify_delta
34
from itertools import izip
37
def collapse_by_person(revisions, canonical_committer):
38
"""The committers list is sorted by email, fix it up by person.
40
Some people commit with a similar username, but different email
41
address. Which makes it hard to sort out when they have multiple
42
entries. Email is actually more stable, though, since people
43
frequently forget to set their name properly.
45
So take the most common username for each email address, and
46
combine them into one new list.
48
# Map from canonical committer to
49
# {committer: ([rev_list], {email: count}, {fname:count})}
50
committer_to_info = {}
52
authors = rev.get_apparent_authors()
53
for author in authors:
54
username, email = config.parse_username(author)
55
if len(username) == 0 and len(email) == 0:
57
canon_author = canonical_committer[(username, email)]
58
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
60
info[1][email] = info[1].setdefault(email, 0) + 1
61
info[2][username] = info[2].setdefault(username, 0) + 1
62
res = [(len(revs), revs, emails, fnames)
63
for revs, emails, fnames in committer_to_info.values()]
64
res.sort(reverse=True)
68
def collapse_email_and_users(email_users, combo_count):
69
"""Combine the mapping of User Name to email and email to User Name.
71
If a given User Name is used for multiple emails, try to map it all to one
79
def collapse_ids(old_id, new_id, new_combos):
80
old_combos = id_to_combos.pop(old_id)
81
new_combos.update(old_combos)
82
for old_user, old_email in old_combos:
83
if (old_user and old_user != user):
84
low_old_user = old_user.lower()
85
old_user_id = username_to_id[low_old_user]
86
assert old_user_id in (old_id, new_id)
87
username_to_id[low_old_user] = new_id
88
if (old_email and old_email != email):
89
old_email_id = email_to_id[old_email]
90
assert old_email_id in (old_id, new_id)
91
email_to_id[old_email] = cur_id
92
for email, usernames in email_users.items():
93
assert email not in email_to_id
95
# We use a different algorithm for usernames that have no email
96
# address, we just try to match by username, and not at all by
98
for user in usernames:
100
continue # The mysterious ('', '') user
101
# When mapping, use case-insensitive names
102
low_user = user.lower()
103
user_id = username_to_id.get(low_user)
107
username_to_id[low_user] = user_id
108
id_to_combos[user_id] = id_combos = set()
110
id_combos = id_to_combos[user_id]
111
id_combos.add((user, email))
116
id_to_combos[cur_id] = id_combos = set()
117
email_to_id[email] = cur_id
119
for user in usernames:
120
combo = (user, email)
123
# We don't match on empty usernames
125
low_user = user.lower()
126
user_id = username_to_id.get(low_user)
127
if user_id is not None:
128
# This UserName was matched to an cur_id
129
if user_id != cur_id:
130
# And it is a different identity than the current email
131
collapse_ids(user_id, cur_id, id_combos)
132
username_to_id[low_user] = cur_id
133
combo_to_best_combo = {}
134
for cur_id, combos in id_to_combos.items():
135
best_combo = sorted(combos,
136
key=lambda x:combo_count[x],
139
combo_to_best_combo[combo] = best_combo
140
return combo_to_best_combo
143
def get_revisions_and_committers(a_repo, revids):
144
"""Get the Revision information, and the best-match for committer."""
146
email_users = {} # user@email.com => User Name
148
pb = ui.ui_factory.nested_progress_bar()
150
trace.note('getting revisions')
151
revisions = a_repo.iter_revisions(revids)
152
for count, (revid, rev) in enumerate(revisions):
153
pb.update('checking', count, len(revids))
154
for author in rev.get_apparent_authors():
155
# XXX: There is a chance sometimes with svn imports that the
156
# full name and email can BOTH be blank.
157
username, email = config.parse_username(author)
158
email_users.setdefault(email, set()).add(username)
159
combo = (username, email)
160
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
163
return revisions, collapse_email_and_users(email_users, combo_count)
166
def get_info(a_repo, revision):
167
"""Get all of the information for a particular revision"""
168
pb = ui.ui_factory.nested_progress_bar()
171
trace.note('getting ancestry')
172
graph = a_repo.get_graph()
174
r for (r, ps) in graph.iter_ancestry([revision])
175
if ps is not None and r != NULL_REVISION]
176
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
181
return collapse_by_person(revs, canonical_committer)
184
def get_diff_info(a_repo, start_rev, end_rev):
185
"""Get only the info for new revisions between the two revisions
187
This lets us figure out what has actually changed between 2 revisions.
189
pb = ui.ui_factory.nested_progress_bar()
192
graph = a_repo.get_graph()
193
trace.note('getting ancestry diff')
194
ancestry = graph.find_difference(start_rev, end_rev)[1]
195
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
200
return collapse_by_person(revs, canonical_committer)
203
def display_info(info, to_file, gather_class_stats=None):
204
"""Write out the information"""
206
for count, revs, emails, fullnames in info:
207
# Get the most common email name
208
sorted_emails = sorted(((count, email)
209
for email, count in emails.items()),
211
sorted_fullnames = sorted(((count, fullname)
212
for fullname, count in fullnames.items()),
214
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
215
to_file.write('%4d %s\n'
216
% (count, 'Unknown'))
218
to_file.write('%4d %s <%s>\n'
219
% (count, sorted_fullnames[0][1],
220
sorted_emails[0][1]))
221
if len(sorted_fullnames) > 1:
222
to_file.write(' Other names:\n')
223
for count, fname in sorted_fullnames:
224
to_file.write(' %4d ' % (count,))
226
to_file.write("''\n")
228
to_file.write("%s\n" % (fname,))
229
if len(sorted_emails) > 1:
230
to_file.write(' Other email addresses:\n')
231
for count, email in sorted_emails:
232
to_file.write(' %4d ' % (count,))
234
to_file.write("''\n")
236
to_file.write("%s\n" % (email,))
237
if gather_class_stats is not None:
238
to_file.write(' Contributions:\n')
239
classes, total = gather_class_stats(revs)
240
for name, count in sorted(classes.items(), key=classify_key):
243
to_file.write(" %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
246
class cmd_committer_statistics(commands.Command):
247
"""Generate statistics for LOCATION."""
249
aliases = ['stats', 'committer-stats']
250
takes_args = ['location?']
251
takes_options = ['revision',
252
option.Option('show-class', help="Show the class of contributions.")]
254
encoding_type = 'replace'
256
def run(self, location='.', revision=None, show_class=False):
259
wt = workingtree.WorkingTree.open_containing(location)[0]
260
except errors.NoWorkingTree:
261
a_branch = branch.Branch.open(location)
262
last_rev = a_branch.last_revision()
265
last_rev = wt.last_revision()
267
if revision is not None:
268
last_rev = revision[0].in_history(a_branch).rev_id
269
if len(revision) > 1:
270
alternate_rev = revision[1].in_history(a_branch).rev_id
275
info = get_diff_info(a_branch.repository, last_rev,
278
info = get_info(a_branch.repository, last_rev)
282
def fetch_class_stats(revs):
283
return gather_class_stats(a_branch.repository, revs)
285
fetch_class_stats = None
286
display_info(info, self.outf, fetch_class_stats)
289
class cmd_ancestor_growth(commands.Command):
290
"""Figure out the ancestor graph for LOCATION"""
292
takes_args = ['location?']
294
encoding_type = 'replace'
296
def run(self, location='.'):
298
wt = workingtree.WorkingTree.open_containing(location)[0]
299
except errors.NoWorkingTree:
300
a_branch = branch.Branch.open(location)
301
last_rev = a_branch.last_revision()
304
last_rev = wt.last_revision()
308
graph = a_branch.repository.get_graph()
311
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
313
for num, node_name, depth, isend in reversed(sorted_graph):
317
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
322
def gather_class_stats(repository, revs):
325
pb = ui.ui_factory.nested_progress_bar()
327
repository.lock_read()
330
for delta in repository.get_deltas_for_revisions(revs):
331
pb.update("classifying commits", i, len(revs))
332
for c in classify_delta(delta):
345
def classify_key(item):
346
"""Sort key for item of (author, count) from classify_delta."""
347
return -item[1], item[0]
350
def display_credits(credits, to_file):
351
(coders, documenters, artists, translators) = credits
352
def print_section(name, lst):
355
to_file.write("%s:\n" % name)
357
to_file.write("%s\n" % name)
359
print_section("Code", coders)
360
print_section("Documentation", documenters)
361
print_section("Art", artists)
362
print_section("Translations", translators)
365
def find_credits(repository, revid):
366
"""Find the credits of the contributors to a revision.
368
:return: tuple with (authors, documenters, artists, translators)
370
ret = {"documentation": {},
376
repository.lock_read()
378
graph = repository.get_graph()
379
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
380
if ps is not None and r != NULL_REVISION]
381
revs = repository.get_revisions(ancestry)
382
pb = ui.ui_factory.nested_progress_bar()
384
iterator = izip(revs, repository.get_deltas_for_revisions(revs))
385
for i, (rev,delta) in enumerate(iterator):
386
pb.update("analysing revisions", i, len(revs))
388
if len(rev.parent_ids) > 1:
390
for c in set(classify_delta(delta)):
391
for author in rev.get_apparent_authors():
392
if not author in ret[c]:
399
def sort_class(name):
401
for author, _ in sorted(ret[name].items(), key=classify_key)]
402
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
405
class cmd_credits(commands.Command):
406
"""Determine credits for LOCATION."""
408
takes_args = ['location?']
409
takes_options = ['revision']
411
encoding_type = 'replace'
413
def run(self, location='.', revision=None):
415
wt = workingtree.WorkingTree.open_containing(location)[0]
416
except errors.NoWorkingTree:
417
a_branch = branch.Branch.open(location)
418
last_rev = a_branch.last_revision()
421
last_rev = wt.last_revision()
423
if revision is not None:
424
last_rev = revision[0].in_history(a_branch).rev_id
428
credits = find_credits(a_branch.repository, last_rev)
429
display_credits(credits, self.outf)