1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
33
from ...revision import NULL_REVISION
34
from .classify import classify_delta
37
def collapse_by_person(revisions, canonical_committer):
38
"""The committers list is sorted by email, fix it up by person.
40
Some people commit with a similar username, but different email
41
address. Which makes it hard to sort out when they have multiple
42
entries. Email is actually more stable, though, since people
43
frequently forget to set their name properly.
45
So take the most common username for each email address, and
46
combine them into one new list.
48
# Map from canonical committer to
49
# {committer: ([rev_list], {email: count}, {fname:count})}
50
committer_to_info = {}
52
authors = rev.get_apparent_authors()
53
for author in authors:
54
username, email = config.parse_username(author)
55
if len(username) == 0 and len(email) == 0:
57
canon_author = canonical_committer[(username, email)]
58
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
60
info[1][email] = info[1].setdefault(email, 0) + 1
61
info[2][username] = info[2].setdefault(username, 0) + 1
62
res = [(len(revs), revs, emails, fnames)
63
for revs, emails, fnames in committer_to_info.values()]
64
res.sort(reverse=True, key=operator.itemgetter(0))
68
def collapse_email_and_users(email_users, combo_count):
69
"""Combine the mapping of User Name to email and email to User Name.
71
If a given User Name is used for multiple emails, try to map it all to one
79
def collapse_ids(old_id, new_id, new_combos):
80
old_combos = id_to_combos.pop(old_id)
81
new_combos.update(old_combos)
82
for old_user, old_email in old_combos:
83
if (old_user and old_user != user):
84
low_old_user = old_user.lower()
85
old_user_id = username_to_id[low_old_user]
86
assert old_user_id in (old_id, new_id)
87
username_to_id[low_old_user] = new_id
88
if (old_email and old_email != email):
89
old_email_id = email_to_id[old_email]
90
assert old_email_id in (old_id, new_id)
91
email_to_id[old_email] = cur_id
92
for email, usernames in email_users.items():
93
assert email not in email_to_id
95
# We use a different algorithm for usernames that have no email
96
# address, we just try to match by username, and not at all by
98
for user in usernames:
100
continue # The mysterious ('', '') user
101
# When mapping, use case-insensitive names
102
low_user = user.lower()
103
user_id = username_to_id.get(low_user)
107
username_to_id[low_user] = user_id
108
id_to_combos[user_id] = id_combos = set()
110
id_combos = id_to_combos[user_id]
111
id_combos.add((user, email))
116
id_to_combos[cur_id] = id_combos = set()
117
email_to_id[email] = cur_id
119
for user in usernames:
120
combo = (user, email)
123
# We don't match on empty usernames
125
low_user = user.lower()
126
user_id = username_to_id.get(low_user)
127
if user_id is not None:
128
# This UserName was matched to an cur_id
129
if user_id != cur_id:
130
# And it is a different identity than the current email
131
collapse_ids(user_id, cur_id, id_combos)
132
username_to_id[low_user] = cur_id
133
combo_to_best_combo = {}
134
for cur_id, combos in id_to_combos.items():
135
best_combo = sorted(combos,
136
key=lambda x: combo_count[x],
139
combo_to_best_combo[combo] = best_combo
140
return combo_to_best_combo
143
def get_revisions_and_committers(a_repo, revids):
144
"""Get the Revision information, and the best-match for committer."""
146
email_users = {} # user@email.com => User Name
148
with ui.ui_factory.nested_progress_bar() as pb:
149
trace.note('getting revisions')
150
revisions = list(a_repo.iter_revisions(revids))
151
for count, (revid, rev) in enumerate(revisions):
152
pb.update('checking', count, len(revids))
153
for author in rev.get_apparent_authors():
154
# XXX: There is a chance sometimes with svn imports that the
155
# full name and email can BOTH be blank.
156
username, email = config.parse_username(author)
157
email_users.setdefault(email, set()).add(username)
158
combo = (username, email)
159
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
160
return ((rev for (revid, rev) in revisions),
161
collapse_email_and_users(email_users, combo_count))
164
def get_info(a_repo, revision):
165
"""Get all of the information for a particular revision"""
166
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
167
trace.note('getting ancestry')
168
graph = a_repo.get_graph()
170
r for (r, ps) in graph.iter_ancestry([revision])
171
if ps is not None and r != NULL_REVISION]
172
revs, canonical_committer = get_revisions_and_committers(
175
return collapse_by_person(revs, canonical_committer)
178
def get_diff_info(a_repo, start_rev, end_rev):
179
"""Get only the info for new revisions between the two revisions
181
This lets us figure out what has actually changed between 2 revisions.
183
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
184
graph = a_repo.get_graph()
185
trace.note('getting ancestry diff')
186
ancestry = graph.find_difference(start_rev, end_rev)[1]
187
revs, canonical_committer = get_revisions_and_committers(
190
return collapse_by_person(revs, canonical_committer)
193
def display_info(info, to_file, gather_class_stats=None):
194
"""Write out the information"""
196
for count, revs, emails, fullnames in info:
197
# Get the most common email name
198
sorted_emails = sorted(((count, email)
199
for email, count in emails.items()),
201
sorted_fullnames = sorted(((count, fullname)
202
for fullname, count in fullnames.items()),
204
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
205
to_file.write('%4d %s\n'
206
% (count, 'Unknown'))
208
to_file.write('%4d %s <%s>\n'
209
% (count, sorted_fullnames[0][1],
210
sorted_emails[0][1]))
211
if len(sorted_fullnames) > 1:
212
to_file.write(' Other names:\n')
213
for count, fname in sorted_fullnames:
214
to_file.write(' %4d ' % (count,))
216
to_file.write("''\n")
218
to_file.write("%s\n" % (fname,))
219
if len(sorted_emails) > 1:
220
to_file.write(' Other email addresses:\n')
221
for count, email in sorted_emails:
222
to_file.write(' %4d ' % (count,))
224
to_file.write("''\n")
226
to_file.write("%s\n" % (email,))
227
if gather_class_stats is not None:
228
to_file.write(' Contributions:\n')
229
classes, total = gather_class_stats(revs)
230
for name, count in sorted(classes.items(), key=classify_key):
233
to_file.write(" %4.0f%% %s\n" %
234
((float(count) / total) * 100.0, name))
237
class cmd_committer_statistics(commands.Command):
238
"""Generate statistics for LOCATION."""
240
aliases = ['stats', 'committer-stats']
241
takes_args = ['location?']
242
takes_options = ['revision',
243
option.Option('show-class', help="Show the class of contributions.")]
245
encoding_type = 'replace'
247
def run(self, location='.', revision=None, show_class=False):
250
wt = workingtree.WorkingTree.open_containing(location)[0]
251
except errors.NoWorkingTree:
252
a_branch = branch.Branch.open(location)
253
last_rev = a_branch.last_revision()
256
last_rev = wt.last_revision()
258
if revision is not None:
259
last_rev = revision[0].in_history(a_branch).rev_id
260
if len(revision) > 1:
261
alternate_rev = revision[1].in_history(a_branch).rev_id
263
with a_branch.lock_read():
265
info = get_diff_info(a_branch.repository, last_rev,
268
info = get_info(a_branch.repository, last_rev)
270
def fetch_class_stats(revs):
271
return gather_class_stats(a_branch.repository, revs)
273
fetch_class_stats = None
274
display_info(info, self.outf, fetch_class_stats)
277
class cmd_ancestor_growth(commands.Command):
278
"""Figure out the ancestor graph for LOCATION"""
280
takes_args = ['location?']
282
encoding_type = 'replace'
284
def run(self, location='.'):
286
wt = workingtree.WorkingTree.open_containing(location)[0]
287
except errors.NoWorkingTree:
288
a_branch = branch.Branch.open(location)
289
last_rev = a_branch.last_revision()
292
last_rev = wt.last_revision()
294
with a_branch.lock_read():
295
graph = a_branch.repository.get_graph()
298
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
300
for num, node_name, depth, isend in reversed(sorted_graph):
304
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
307
def gather_class_stats(repository, revs):
310
with ui.ui_factory.nested_progress_bar() as pb:
311
with repository.lock_read():
313
for delta in repository.get_deltas_for_revisions(revs):
314
pb.update("classifying commits", i, len(revs))
315
for c in classify_delta(delta):
324
def classify_key(item):
325
"""Sort key for item of (author, count) from classify_delta."""
326
return -item[1], item[0]
329
def display_credits(credits, to_file):
330
(coders, documenters, artists, translators) = credits
332
def print_section(name, lst):
335
to_file.write("%s:\n" % name)
337
to_file.write("%s\n" % name)
339
print_section("Code", coders)
340
print_section("Documentation", documenters)
341
print_section("Art", artists)
342
print_section("Translations", translators)
345
def find_credits(repository, revid):
346
"""Find the credits of the contributors to a revision.
348
:return: tuple with (authors, documenters, artists, translators)
350
ret = {"documentation": {},
356
with repository.lock_read():
357
graph = repository.get_graph()
358
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
359
if ps is not None and r != NULL_REVISION]
360
revs = repository.get_revisions(ancestry)
361
with ui.ui_factory.nested_progress_bar() as pb:
362
iterator = zip(revs, repository.get_deltas_for_revisions(revs))
363
for i, (rev, delta) in enumerate(iterator):
364
pb.update("analysing revisions", i, len(revs))
366
if len(rev.parent_ids) > 1:
368
for c in set(classify_delta(delta)):
369
for author in rev.get_apparent_authors():
370
if author not in ret[c]:
374
def sort_class(name):
376
for author, _ in sorted(ret[name].items(), key=classify_key)]
377
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
380
class cmd_credits(commands.Command):
381
"""Determine credits for LOCATION."""
383
takes_args = ['location?']
384
takes_options = ['revision']
386
encoding_type = 'replace'
388
def run(self, location='.', revision=None):
390
wt = workingtree.WorkingTree.open_containing(location)[0]
391
except errors.NoWorkingTree:
392
a_branch = branch.Branch.open(location)
393
last_rev = a_branch.last_revision()
396
last_rev = wt.last_revision()
398
if revision is not None:
399
last_rev = revision[0].in_history(a_branch).rev_id
401
with a_branch.lock_read():
402
credits = find_credits(a_branch.repository, last_rev)
403
display_credits(credits, self.outf)