1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
33
from ...revision import NULL_REVISION
34
from .classify import classify_delta
37
def collapse_by_person(revisions, canonical_committer):
38
"""The committers list is sorted by email, fix it up by person.
40
Some people commit with a similar username, but different email
41
address. Which makes it hard to sort out when they have multiple
42
entries. Email is actually more stable, though, since people
43
frequently forget to set their name properly.
45
So take the most common username for each email address, and
46
combine them into one new list.
48
# Map from canonical committer to
49
# {committer: ([rev_list], {email: count}, {fname:count})}
50
committer_to_info = {}
52
authors = rev.get_apparent_authors()
53
for author in authors:
54
username, email = config.parse_username(author)
55
if len(username) == 0 and len(email) == 0:
57
canon_author = canonical_committer[(username, email)]
58
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
60
info[1][email] = info[1].setdefault(email, 0) + 1
61
info[2][username] = info[2].setdefault(username, 0) + 1
62
res = [(len(revs), revs, emails, fnames)
63
for revs, emails, fnames in committer_to_info.values()]
64
res.sort(reverse=True, key=operator.itemgetter(0))
68
def collapse_email_and_users(email_users, combo_count):
69
"""Combine the mapping of User Name to email and email to User Name.
71
If a given User Name is used for multiple emails, try to map it all to one
79
def collapse_ids(old_id, new_id, new_combos):
80
old_combos = id_to_combos.pop(old_id)
81
new_combos.update(old_combos)
82
for old_user, old_email in old_combos:
83
if (old_user and old_user != user):
84
low_old_user = old_user.lower()
85
old_user_id = username_to_id[low_old_user]
86
assert old_user_id in (old_id, new_id)
87
username_to_id[low_old_user] = new_id
88
if (old_email and old_email != email):
89
old_email_id = email_to_id[old_email]
90
assert old_email_id in (old_id, new_id)
91
email_to_id[old_email] = cur_id
92
for email, usernames in email_users.items():
93
assert email not in email_to_id
95
# We use a different algorithm for usernames that have no email
96
# address, we just try to match by username, and not at all by
98
for user in usernames:
100
continue # The mysterious ('', '') user
101
# When mapping, use case-insensitive names
102
low_user = user.lower()
103
user_id = username_to_id.get(low_user)
107
username_to_id[low_user] = user_id
108
id_to_combos[user_id] = id_combos = set()
110
id_combos = id_to_combos[user_id]
111
id_combos.add((user, email))
116
id_to_combos[cur_id] = id_combos = set()
117
email_to_id[email] = cur_id
119
for user in usernames:
120
combo = (user, email)
123
# We don't match on empty usernames
125
low_user = user.lower()
126
user_id = username_to_id.get(low_user)
127
if user_id is not None:
128
# This UserName was matched to an cur_id
129
if user_id != cur_id:
130
# And it is a different identity than the current email
131
collapse_ids(user_id, cur_id, id_combos)
132
username_to_id[low_user] = cur_id
133
combo_to_best_combo = {}
134
for cur_id, combos in id_to_combos.items():
135
best_combo = sorted(combos,
136
key=lambda x: combo_count[x],
139
combo_to_best_combo[combo] = best_combo
140
return combo_to_best_combo
143
def get_revisions_and_committers(a_repo, revids):
144
"""Get the Revision information, and the best-match for committer."""
146
email_users = {} # user@email.com => User Name
148
with ui.ui_factory.nested_progress_bar() as pb:
149
trace.note('getting revisions')
150
revisions = list(a_repo.iter_revisions(revids))
151
for count, (revid, rev) in enumerate(revisions):
152
pb.update('checking', count, len(revids))
153
for author in rev.get_apparent_authors():
154
# XXX: There is a chance sometimes with svn imports that the
155
# full name and email can BOTH be blank.
156
username, email = config.parse_username(author)
157
email_users.setdefault(email, set()).add(username)
158
combo = (username, email)
159
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
160
return ((rev for (revid, rev) in revisions),
161
collapse_email_and_users(email_users, combo_count))
164
def get_info(a_repo, revision):
165
"""Get all of the information for a particular revision"""
166
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
167
trace.note('getting ancestry')
168
graph = a_repo.get_graph()
170
r for (r, ps) in graph.iter_ancestry([revision])
171
if ps is not None and r != NULL_REVISION]
172
revs, canonical_committer = get_revisions_and_committers(
175
return collapse_by_person(revs, canonical_committer)
178
def get_diff_info(a_repo, start_rev, end_rev):
179
"""Get only the info for new revisions between the two revisions
181
This lets us figure out what has actually changed between 2 revisions.
183
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
184
graph = a_repo.get_graph()
185
trace.note('getting ancestry diff')
186
ancestry = graph.find_difference(start_rev, end_rev)[1]
187
revs, canonical_committer = get_revisions_and_committers(
190
return collapse_by_person(revs, canonical_committer)
193
def display_info(info, to_file, gather_class_stats=None):
194
"""Write out the information"""
196
for count, revs, emails, fullnames in info:
197
# Get the most common email name
198
sorted_emails = sorted(((count, email)
199
for email, count in emails.items()),
201
sorted_fullnames = sorted(((count, fullname)
202
for fullname, count in fullnames.items()),
204
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
205
to_file.write('%4d %s\n'
206
% (count, 'Unknown'))
208
to_file.write('%4d %s <%s>\n'
209
% (count, sorted_fullnames[0][1],
210
sorted_emails[0][1]))
211
if len(sorted_fullnames) > 1:
212
to_file.write(' Other names:\n')
213
for count, fname in sorted_fullnames:
214
to_file.write(' %4d ' % (count,))
216
to_file.write("''\n")
218
to_file.write("%s\n" % (fname,))
219
if len(sorted_emails) > 1:
220
to_file.write(' Other email addresses:\n')
221
for count, email in sorted_emails:
222
to_file.write(' %4d ' % (count,))
224
to_file.write("''\n")
226
to_file.write("%s\n" % (email,))
227
if gather_class_stats is not None:
228
to_file.write(' Contributions:\n')
229
classes, total = gather_class_stats(revs)
230
for name, count in sorted(classes.items(), key=classify_key):
233
to_file.write(" %4.0f%% %s\n" %
234
((float(count) / total) * 100.0, name))
237
class cmd_committer_statistics(commands.Command):
238
"""Generate statistics for LOCATION."""
240
aliases = ['stats', 'committer-stats']
241
takes_args = ['location?']
242
takes_options = ['revision',
243
option.Option('show-class', help="Show the class of contributions.")]
245
encoding_type = 'replace'
247
def run(self, location='.', revision=None, show_class=False):
250
wt = workingtree.WorkingTree.open_containing(location)[0]
251
except errors.NoWorkingTree:
252
a_branch = branch.Branch.open(location)
253
last_rev = a_branch.last_revision()
256
last_rev = wt.last_revision()
258
if revision is not None:
259
last_rev = revision[0].in_history(a_branch).rev_id
260
if len(revision) > 1:
261
alternate_rev = revision[1].in_history(a_branch).rev_id
263
with a_branch.lock_read():
265
info = get_diff_info(a_branch.repository, last_rev,
268
info = get_info(a_branch.repository, last_rev)
270
def fetch_class_stats(revs):
271
return gather_class_stats(a_branch.repository, revs)
273
fetch_class_stats = None
274
display_info(info, self.outf, fetch_class_stats)
277
class cmd_ancestor_growth(commands.Command):
278
"""Figure out the ancestor graph for LOCATION"""
280
takes_args = ['location?']
282
encoding_type = 'replace'
286
def run(self, location='.'):
288
wt = workingtree.WorkingTree.open_containing(location)[0]
289
except errors.NoWorkingTree:
290
a_branch = branch.Branch.open(location)
291
last_rev = a_branch.last_revision()
294
last_rev = wt.last_revision()
296
with a_branch.lock_read():
297
graph = a_branch.repository.get_graph()
300
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
302
for num, node_name, depth, isend in reversed(sorted_graph):
306
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
309
def gather_class_stats(repository, revs):
312
with ui.ui_factory.nested_progress_bar() as pb:
313
with repository.lock_read():
315
for delta in repository.get_deltas_for_revisions(revs):
316
pb.update("classifying commits", i, len(revs))
317
for c in classify_delta(delta):
326
def classify_key(item):
327
"""Sort key for item of (author, count) from classify_delta."""
328
return -item[1], item[0]
331
def display_credits(credits, to_file):
332
(coders, documenters, artists, translators) = credits
334
def print_section(name, lst):
337
to_file.write("%s:\n" % name)
339
to_file.write("%s\n" % name)
341
print_section("Code", coders)
342
print_section("Documentation", documenters)
343
print_section("Art", artists)
344
print_section("Translations", translators)
347
def find_credits(repository, revid):
348
"""Find the credits of the contributors to a revision.
350
:return: tuple with (authors, documenters, artists, translators)
352
ret = {"documentation": {},
358
with repository.lock_read():
359
graph = repository.get_graph()
360
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
361
if ps is not None and r != NULL_REVISION]
362
revs = repository.get_revisions(ancestry)
363
with ui.ui_factory.nested_progress_bar() as pb:
364
iterator = zip(revs, repository.get_deltas_for_revisions(revs))
365
for i, (rev, delta) in enumerate(iterator):
366
pb.update("analysing revisions", i, len(revs))
368
if len(rev.parent_ids) > 1:
370
for c in set(classify_delta(delta)):
371
for author in rev.get_apparent_authors():
372
if author not in ret[c]:
376
def sort_class(name):
378
for author, _ in sorted(ret[name].items(), key=classify_key)]
379
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
382
class cmd_credits(commands.Command):
383
"""Determine credits for LOCATION."""
385
takes_args = ['location?']
386
takes_options = ['revision']
388
encoding_type = 'replace'
390
def run(self, location='.', revision=None):
392
wt = workingtree.WorkingTree.open_containing(location)[0]
393
except errors.NoWorkingTree:
394
a_branch = branch.Branch.open(location)
395
last_rev = a_branch.last_revision()
398
last_rev = wt.last_revision()
400
if revision is not None:
401
last_rev = revision[0].in_history(a_branch).rev_id
403
with a_branch.lock_read():
404
credits = find_credits(a_branch.repository, last_rev)
405
display_credits(credits, self.outf)