1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
31
from ...revision import NULL_REVISION
32
from .classify import classify_delta
35
def collapse_by_person(revisions, canonical_committer):
36
"""The committers list is sorted by email, fix it up by person.
38
Some people commit with a similar username, but different email
39
address. Which makes it hard to sort out when they have multiple
40
entries. Email is actually more stable, though, since people
41
frequently forget to set their name properly.
43
So take the most common username for each email address, and
44
combine them into one new list.
46
# Map from canonical committer to
47
# {committer: ([rev_list], {email: count}, {fname:count})}
48
committer_to_info = {}
50
authors = rev.get_apparent_authors()
51
for author in authors:
52
username, email = config.parse_username(author)
53
if len(username) == 0 and len(email) == 0:
55
canon_author = canonical_committer[(username, email)]
56
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
58
info[1][email] = info[1].setdefault(email, 0) + 1
59
info[2][username] = info[2].setdefault(username, 0) + 1
60
res = [(len(revs), revs, emails, fnames)
61
for revs, emails, fnames in committer_to_info.values()]
64
return item[0], list(item[2].keys())
65
res.sort(reverse=True, key=key_fn)
69
def collapse_email_and_users(email_users, combo_count):
70
"""Combine the mapping of User Name to email and email to User Name.
72
If a given User Name is used for multiple emails, try to map it all to one
80
def collapse_ids(old_id, new_id, new_combos):
81
old_combos = id_to_combos.pop(old_id)
82
new_combos.update(old_combos)
83
for old_user, old_email in old_combos:
84
if (old_user and old_user != user):
85
low_old_user = old_user.lower()
86
old_user_id = username_to_id[low_old_user]
87
assert old_user_id in (old_id, new_id)
88
username_to_id[low_old_user] = new_id
89
if (old_email and old_email != email):
90
old_email_id = email_to_id[old_email]
91
assert old_email_id in (old_id, new_id)
92
email_to_id[old_email] = cur_id
93
for email, usernames in email_users.items():
94
assert email not in email_to_id
96
# We use a different algorithm for usernames that have no email
97
# address, we just try to match by username, and not at all by
99
for user in usernames:
101
continue # The mysterious ('', '') user
102
# When mapping, use case-insensitive names
103
low_user = user.lower()
104
user_id = username_to_id.get(low_user)
108
username_to_id[low_user] = user_id
109
id_to_combos[user_id] = id_combos = set()
111
id_combos = id_to_combos[user_id]
112
id_combos.add((user, email))
117
id_to_combos[cur_id] = id_combos = set()
118
email_to_id[email] = cur_id
120
for user in usernames:
121
combo = (user, email)
124
# We don't match on empty usernames
126
low_user = user.lower()
127
user_id = username_to_id.get(low_user)
128
if user_id is not None:
129
# This UserName was matched to an cur_id
130
if user_id != cur_id:
131
# And it is a different identity than the current email
132
collapse_ids(user_id, cur_id, id_combos)
133
username_to_id[low_user] = cur_id
134
combo_to_best_combo = {}
135
for cur_id, combos in id_to_combos.items():
136
best_combo = sorted(combos,
137
key=lambda x: combo_count[x],
140
combo_to_best_combo[combo] = best_combo
141
return combo_to_best_combo
144
def get_revisions_and_committers(a_repo, revids):
145
"""Get the Revision information, and the best-match for committer."""
147
email_users = {} # user@email.com => User Name
149
with ui.ui_factory.nested_progress_bar() as pb:
150
trace.note('getting revisions')
151
revisions = list(a_repo.iter_revisions(revids))
152
for count, (revid, rev) in enumerate(revisions):
153
pb.update('checking', count, len(revids))
154
for author in rev.get_apparent_authors():
155
# XXX: There is a chance sometimes with svn imports that the
156
# full name and email can BOTH be blank.
157
username, email = config.parse_username(author)
158
email_users.setdefault(email, set()).add(username)
159
combo = (username, email)
160
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
161
return ((rev for (revid, rev) in revisions),
162
collapse_email_and_users(email_users, combo_count))
165
def get_info(a_repo, revision):
166
"""Get all of the information for a particular revision"""
167
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
168
trace.note('getting ancestry')
169
graph = a_repo.get_graph()
171
r for (r, ps) in graph.iter_ancestry([revision])
172
if ps is not None and r != NULL_REVISION]
173
revs, canonical_committer = get_revisions_and_committers(
176
return collapse_by_person(revs, canonical_committer)
179
def get_diff_info(a_repo, start_rev, end_rev):
180
"""Get only the info for new revisions between the two revisions
182
This lets us figure out what has actually changed between 2 revisions.
184
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
185
graph = a_repo.get_graph()
186
trace.note('getting ancestry diff')
187
ancestry = graph.find_difference(start_rev, end_rev)[1]
188
revs, canonical_committer = get_revisions_and_committers(
191
return collapse_by_person(revs, canonical_committer)
194
def display_info(info, to_file, gather_class_stats=None):
195
"""Write out the information"""
197
for count, revs, emails, fullnames in info:
198
# Get the most common email name
199
sorted_emails = sorted(((count, email)
200
for email, count in emails.items()),
202
sorted_fullnames = sorted(((count, fullname)
203
for fullname, count in fullnames.items()),
205
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
206
to_file.write('%4d %s\n'
207
% (count, 'Unknown'))
209
to_file.write('%4d %s <%s>\n'
210
% (count, sorted_fullnames[0][1],
211
sorted_emails[0][1]))
212
if len(sorted_fullnames) > 1:
213
to_file.write(' Other names:\n')
214
for count, fname in sorted_fullnames:
215
to_file.write(' %4d ' % (count,))
217
to_file.write("''\n")
219
to_file.write("%s\n" % (fname,))
220
if len(sorted_emails) > 1:
221
to_file.write(' Other email addresses:\n')
222
for count, email in sorted_emails:
223
to_file.write(' %4d ' % (count,))
225
to_file.write("''\n")
227
to_file.write("%s\n" % (email,))
228
if gather_class_stats is not None:
229
to_file.write(' Contributions:\n')
230
classes, total = gather_class_stats(revs)
231
for name, count in sorted(classes.items(), key=classify_key):
234
to_file.write(" %4.0f%% %s\n" %
235
((float(count) / total) * 100.0, name))
238
class cmd_committer_statistics(commands.Command):
239
"""Generate statistics for LOCATION."""
241
aliases = ['stats', 'committer-stats']
242
takes_args = ['location?']
243
takes_options = ['revision',
244
option.Option('show-class', help="Show the class of contributions.")]
246
encoding_type = 'replace'
248
def run(self, location='.', revision=None, show_class=False):
251
wt = workingtree.WorkingTree.open_containing(location)[0]
252
except errors.NoWorkingTree:
253
a_branch = branch.Branch.open(location)
254
last_rev = a_branch.last_revision()
257
last_rev = wt.last_revision()
259
if revision is not None:
260
last_rev = revision[0].in_history(a_branch).rev_id
261
if len(revision) > 1:
262
alternate_rev = revision[1].in_history(a_branch).rev_id
264
with a_branch.lock_read():
266
info = get_diff_info(a_branch.repository, last_rev,
269
info = get_info(a_branch.repository, last_rev)
271
def fetch_class_stats(revs):
272
return gather_class_stats(a_branch.repository, revs)
274
fetch_class_stats = None
275
display_info(info, self.outf, fetch_class_stats)
278
class cmd_ancestor_growth(commands.Command):
279
"""Figure out the ancestor graph for LOCATION"""
281
takes_args = ['location?']
283
encoding_type = 'replace'
287
def run(self, location='.'):
289
wt = workingtree.WorkingTree.open_containing(location)[0]
290
except errors.NoWorkingTree:
291
a_branch = branch.Branch.open(location)
292
last_rev = a_branch.last_revision()
295
last_rev = wt.last_revision()
297
with a_branch.lock_read():
298
graph = a_branch.repository.get_graph()
301
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
303
for num, node_name, depth, isend in reversed(sorted_graph):
307
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
310
def gather_class_stats(repository, revs):
313
with ui.ui_factory.nested_progress_bar() as pb:
314
with repository.lock_read():
316
for delta in repository.get_revision_deltas(revs):
317
pb.update("classifying commits", i, len(revs))
318
for c in classify_delta(delta):
327
def classify_key(item):
328
"""Sort key for item of (author, count) from classify_delta."""
329
return -item[1], item[0]
332
def display_credits(credits, to_file):
333
(coders, documenters, artists, translators) = credits
335
def print_section(name, lst):
338
to_file.write("%s:\n" % name)
340
to_file.write("%s\n" % name)
342
print_section("Code", coders)
343
print_section("Documentation", documenters)
344
print_section("Art", artists)
345
print_section("Translations", translators)
348
def find_credits(repository, revid):
349
"""Find the credits of the contributors to a revision.
351
:return: tuple with (authors, documenters, artists, translators)
353
ret = {"documentation": {},
359
with repository.lock_read():
360
graph = repository.get_graph()
361
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
362
if ps is not None and r != NULL_REVISION]
363
revs = repository.get_revisions(ancestry)
364
with ui.ui_factory.nested_progress_bar() as pb:
365
iterator = zip(revs, repository.get_revision_deltas(revs))
366
for i, (rev, delta) in enumerate(iterator):
367
pb.update("analysing revisions", i, len(revs))
369
if len(rev.parent_ids) > 1:
371
for c in set(classify_delta(delta)):
372
for author in rev.get_apparent_authors():
373
if author not in ret[c]:
377
def sort_class(name):
379
for author, _ in sorted(ret[name].items(), key=classify_key)]
380
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
383
class cmd_credits(commands.Command):
384
"""Determine credits for LOCATION."""
386
takes_args = ['location?']
387
takes_options = ['revision']
389
encoding_type = 'replace'
391
def run(self, location='.', revision=None):
393
wt = workingtree.WorkingTree.open_containing(location)[0]
394
except errors.NoWorkingTree:
395
a_branch = branch.Branch.open(location)
396
last_rev = a_branch.last_revision()
399
last_rev = wt.last_revision()
401
if revision is not None:
402
last_rev = revision[0].in_history(a_branch).rev_id
404
with a_branch.lock_read():
405
credits = find_credits(a_branch.repository, last_rev)
406
display_credits(credits, self.outf)