1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
31
from ...revision import NULL_REVISION
32
from .classify import classify_delta
35
def collapse_by_person(revisions, canonical_committer):
36
"""The committers list is sorted by email, fix it up by person.
38
Some people commit with a similar username, but different email
39
address. Which makes it hard to sort out when they have multiple
40
entries. Email is actually more stable, though, since people
41
frequently forget to set their name properly.
43
So take the most common username for each email address, and
44
combine them into one new list.
46
# Map from canonical committer to
47
# {committer: ([rev_list], {email: count}, {fname:count})}
48
committer_to_info = {}
50
authors = rev.get_apparent_authors()
51
for author in authors:
52
username, email = config.parse_username(author)
53
if len(username) == 0 and len(email) == 0:
55
canon_author = canonical_committer[(username, email)]
56
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
58
info[1][email] = info[1].setdefault(email, 0) + 1
59
info[2][username] = info[2].setdefault(username, 0) + 1
60
res = [(len(revs), revs, emails, fnames)
61
for revs, emails, fnames in committer_to_info.values()]
62
res.sort(reverse=True)
66
def collapse_email_and_users(email_users, combo_count):
67
"""Combine the mapping of User Name to email and email to User Name.
69
If a given User Name is used for multiple emails, try to map it all to one
77
def collapse_ids(old_id, new_id, new_combos):
78
old_combos = id_to_combos.pop(old_id)
79
new_combos.update(old_combos)
80
for old_user, old_email in old_combos:
81
if (old_user and old_user != user):
82
low_old_user = old_user.lower()
83
old_user_id = username_to_id[low_old_user]
84
assert old_user_id in (old_id, new_id)
85
username_to_id[low_old_user] = new_id
86
if (old_email and old_email != email):
87
old_email_id = email_to_id[old_email]
88
assert old_email_id in (old_id, new_id)
89
email_to_id[old_email] = cur_id
90
for email, usernames in email_users.items():
91
assert email not in email_to_id
93
# We use a different algorithm for usernames that have no email
94
# address, we just try to match by username, and not at all by
96
for user in usernames:
98
continue # The mysterious ('', '') user
99
# When mapping, use case-insensitive names
100
low_user = user.lower()
101
user_id = username_to_id.get(low_user)
105
username_to_id[low_user] = user_id
106
id_to_combos[user_id] = id_combos = set()
108
id_combos = id_to_combos[user_id]
109
id_combos.add((user, email))
114
id_to_combos[cur_id] = id_combos = set()
115
email_to_id[email] = cur_id
117
for user in usernames:
118
combo = (user, email)
121
# We don't match on empty usernames
123
low_user = user.lower()
124
user_id = username_to_id.get(low_user)
125
if user_id is not None:
126
# This UserName was matched to an cur_id
127
if user_id != cur_id:
128
# And it is a different identity than the current email
129
collapse_ids(user_id, cur_id, id_combos)
130
username_to_id[low_user] = cur_id
131
combo_to_best_combo = {}
132
for cur_id, combos in id_to_combos.items():
133
best_combo = sorted(combos,
134
key=lambda x: combo_count[x],
137
combo_to_best_combo[combo] = best_combo
138
return combo_to_best_combo
141
def get_revisions_and_committers(a_repo, revids):
142
"""Get the Revision information, and the best-match for committer."""
144
email_users = {} # user@email.com => User Name
146
with ui.ui_factory.nested_progress_bar() as pb:
147
trace.note('getting revisions')
148
revisions = list(a_repo.iter_revisions(revids))
149
for count, (revid, rev) in enumerate(revisions):
150
pb.update('checking', count, len(revids))
151
for author in rev.get_apparent_authors():
152
# XXX: There is a chance sometimes with svn imports that the
153
# full name and email can BOTH be blank.
154
username, email = config.parse_username(author)
155
email_users.setdefault(email, set()).add(username)
156
combo = (username, email)
157
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
158
return ((rev for (revid, rev) in revisions),
159
collapse_email_and_users(email_users, combo_count))
162
def get_info(a_repo, revision):
163
"""Get all of the information for a particular revision"""
164
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
165
trace.note('getting ancestry')
166
graph = a_repo.get_graph()
168
r for (r, ps) in graph.iter_ancestry([revision])
169
if ps is not None and r != NULL_REVISION]
170
revs, canonical_committer = get_revisions_and_committers(
173
return collapse_by_person(revs, canonical_committer)
176
def get_diff_info(a_repo, start_rev, end_rev):
177
"""Get only the info for new revisions between the two revisions
179
This lets us figure out what has actually changed between 2 revisions.
181
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
182
graph = a_repo.get_graph()
183
trace.note('getting ancestry diff')
184
ancestry = graph.find_difference(start_rev, end_rev)[1]
185
revs, canonical_committer = get_revisions_and_committers(
188
return collapse_by_person(revs, canonical_committer)
191
def display_info(info, to_file, gather_class_stats=None):
192
"""Write out the information"""
194
for count, revs, emails, fullnames in info:
195
# Get the most common email name
196
sorted_emails = sorted(((count, email)
197
for email, count in emails.items()),
199
sorted_fullnames = sorted(((count, fullname)
200
for fullname, count in fullnames.items()),
202
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
203
to_file.write('%4d %s\n'
204
% (count, 'Unknown'))
206
to_file.write('%4d %s <%s>\n'
207
% (count, sorted_fullnames[0][1],
208
sorted_emails[0][1]))
209
if len(sorted_fullnames) > 1:
210
to_file.write(' Other names:\n')
211
for count, fname in sorted_fullnames:
212
to_file.write(' %4d ' % (count,))
214
to_file.write("''\n")
216
to_file.write("%s\n" % (fname,))
217
if len(sorted_emails) > 1:
218
to_file.write(' Other email addresses:\n')
219
for count, email in sorted_emails:
220
to_file.write(' %4d ' % (count,))
222
to_file.write("''\n")
224
to_file.write("%s\n" % (email,))
225
if gather_class_stats is not None:
226
to_file.write(' Contributions:\n')
227
classes, total = gather_class_stats(revs)
228
for name, count in sorted(classes.items(), key=classify_key):
231
to_file.write(" %4.0f%% %s\n" %
232
((float(count) / total) * 100.0, name))
235
class cmd_committer_statistics(commands.Command):
236
"""Generate statistics for LOCATION."""
238
aliases = ['stats', 'committer-stats']
239
takes_args = ['location?']
240
takes_options = ['revision',
241
option.Option('show-class', help="Show the class of contributions.")]
243
encoding_type = 'replace'
245
def run(self, location='.', revision=None, show_class=False):
248
wt = workingtree.WorkingTree.open_containing(location)[0]
249
except errors.NoWorkingTree:
250
a_branch = branch.Branch.open(location)
251
last_rev = a_branch.last_revision()
254
last_rev = wt.last_revision()
256
if revision is not None:
257
last_rev = revision[0].in_history(a_branch).rev_id
258
if len(revision) > 1:
259
alternate_rev = revision[1].in_history(a_branch).rev_id
261
with a_branch.lock_read():
263
info = get_diff_info(a_branch.repository, last_rev,
266
info = get_info(a_branch.repository, last_rev)
268
def fetch_class_stats(revs):
269
return gather_class_stats(a_branch.repository, revs)
271
fetch_class_stats = None
272
display_info(info, self.outf, fetch_class_stats)
275
class cmd_ancestor_growth(commands.Command):
276
"""Figure out the ancestor graph for LOCATION"""
278
takes_args = ['location?']
280
encoding_type = 'replace'
282
def run(self, location='.'):
284
wt = workingtree.WorkingTree.open_containing(location)[0]
285
except errors.NoWorkingTree:
286
a_branch = branch.Branch.open(location)
287
last_rev = a_branch.last_revision()
290
last_rev = wt.last_revision()
292
with a_branch.lock_read():
293
graph = a_branch.repository.get_graph()
296
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
298
for num, node_name, depth, isend in reversed(sorted_graph):
302
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
305
def gather_class_stats(repository, revs):
308
with ui.ui_factory.nested_progress_bar() as pb:
309
with repository.lock_read():
311
for delta in repository.get_deltas_for_revisions(revs):
312
pb.update("classifying commits", i, len(revs))
313
for c in classify_delta(delta):
322
def classify_key(item):
323
"""Sort key for item of (author, count) from classify_delta."""
324
return -item[1], item[0]
327
def display_credits(credits, to_file):
328
(coders, documenters, artists, translators) = credits
330
def print_section(name, lst):
333
to_file.write("%s:\n" % name)
335
to_file.write("%s\n" % name)
337
print_section("Code", coders)
338
print_section("Documentation", documenters)
339
print_section("Art", artists)
340
print_section("Translations", translators)
343
def find_credits(repository, revid):
344
"""Find the credits of the contributors to a revision.
346
:return: tuple with (authors, documenters, artists, translators)
348
ret = {"documentation": {},
354
with repository.lock_read():
355
graph = repository.get_graph()
356
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
357
if ps is not None and r != NULL_REVISION]
358
revs = repository.get_revisions(ancestry)
359
with ui.ui_factory.nested_progress_bar() as pb:
360
iterator = zip(revs, repository.get_deltas_for_revisions(revs))
361
for i, (rev, delta) in enumerate(iterator):
362
pb.update("analysing revisions", i, len(revs))
364
if len(rev.parent_ids) > 1:
366
for c in set(classify_delta(delta)):
367
for author in rev.get_apparent_authors():
368
if not author in ret[c]:
372
def sort_class(name):
374
for author, _ in sorted(ret[name].items(), key=classify_key)]
375
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
378
class cmd_credits(commands.Command):
379
"""Determine credits for LOCATION."""
381
takes_args = ['location?']
382
takes_options = ['revision']
384
encoding_type = 'replace'
386
def run(self, location='.', revision=None):
388
wt = workingtree.WorkingTree.open_containing(location)[0]
389
except errors.NoWorkingTree:
390
a_branch = branch.Branch.open(location)
391
last_rev = a_branch.last_revision()
394
last_rev = wt.last_revision()
396
if revision is not None:
397
last_rev = revision[0].in_history(a_branch).rev_id
399
with a_branch.lock_read():
400
credits = find_credits(a_branch.repository, last_rev)
401
display_credits(credits, self.outf)