1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
"""A Simple bzr plugin to generate statistics about the history."""
18
from __future__ import absolute_import
31
from ...revision import NULL_REVISION
32
from .classify import classify_delta
35
def collapse_by_person(revisions, canonical_committer):
36
"""The committers list is sorted by email, fix it up by person.
38
Some people commit with a similar username, but different email
39
address. Which makes it hard to sort out when they have multiple
40
entries. Email is actually more stable, though, since people
41
frequently forget to set their name properly.
43
So take the most common username for each email address, and
44
combine them into one new list.
46
# Map from canonical committer to
47
# {committer: ([rev_list], {email: count}, {fname:count})}
48
committer_to_info = {}
50
authors = rev.get_apparent_authors()
51
for author in authors:
52
username, email = config.parse_username(author)
53
if len(username) == 0 and len(email) == 0:
55
canon_author = canonical_committer[(username, email)]
56
info = committer_to_info.setdefault(canon_author, ([], {}, {}))
58
info[1][email] = info[1].setdefault(email, 0) + 1
59
info[2][username] = info[2].setdefault(username, 0) + 1
60
res = [(len(revs), revs, emails, fnames)
61
for revs, emails, fnames in committer_to_info.values()]
62
res.sort(reverse=True)
66
def collapse_email_and_users(email_users, combo_count):
67
"""Combine the mapping of User Name to email and email to User Name.
69
If a given User Name is used for multiple emails, try to map it all to one
77
def collapse_ids(old_id, new_id, new_combos):
78
old_combos = id_to_combos.pop(old_id)
79
new_combos.update(old_combos)
80
for old_user, old_email in old_combos:
81
if (old_user and old_user != user):
82
low_old_user = old_user.lower()
83
old_user_id = username_to_id[low_old_user]
84
assert old_user_id in (old_id, new_id)
85
username_to_id[low_old_user] = new_id
86
if (old_email and old_email != email):
87
old_email_id = email_to_id[old_email]
88
assert old_email_id in (old_id, new_id)
89
email_to_id[old_email] = cur_id
90
for email, usernames in email_users.items():
91
assert email not in email_to_id
93
# We use a different algorithm for usernames that have no email
94
# address, we just try to match by username, and not at all by
96
for user in usernames:
98
continue # The mysterious ('', '') user
99
# When mapping, use case-insensitive names
100
low_user = user.lower()
101
user_id = username_to_id.get(low_user)
105
username_to_id[low_user] = user_id
106
id_to_combos[user_id] = id_combos = set()
108
id_combos = id_to_combos[user_id]
109
id_combos.add((user, email))
114
id_to_combos[cur_id] = id_combos = set()
115
email_to_id[email] = cur_id
117
for user in usernames:
118
combo = (user, email)
121
# We don't match on empty usernames
123
low_user = user.lower()
124
user_id = username_to_id.get(low_user)
125
if user_id is not None:
126
# This UserName was matched to an cur_id
127
if user_id != cur_id:
128
# And it is a different identity than the current email
129
collapse_ids(user_id, cur_id, id_combos)
130
username_to_id[low_user] = cur_id
131
combo_to_best_combo = {}
132
for cur_id, combos in id_to_combos.items():
133
best_combo = sorted(combos,
134
key=lambda x:combo_count[x],
137
combo_to_best_combo[combo] = best_combo
138
return combo_to_best_combo
141
def get_revisions_and_committers(a_repo, revids):
142
"""Get the Revision information, and the best-match for committer."""
144
email_users = {} # user@email.com => User Name
146
with ui.ui_factory.nested_progress_bar() as pb:
147
trace.note('getting revisions')
148
revisions = list(a_repo.iter_revisions(revids))
149
for count, (revid, rev) in enumerate(revisions):
150
pb.update('checking', count, len(revids))
151
for author in rev.get_apparent_authors():
152
# XXX: There is a chance sometimes with svn imports that the
153
# full name and email can BOTH be blank.
154
username, email = config.parse_username(author)
155
email_users.setdefault(email, set()).add(username)
156
combo = (username, email)
157
combo_count[combo] = combo_count.setdefault(combo, 0) + 1
158
return ((rev for (revid, rev) in revisions),
159
collapse_email_and_users(email_users, combo_count))
162
def get_info(a_repo, revision):
163
"""Get all of the information for a particular revision"""
164
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
165
trace.note('getting ancestry')
166
graph = a_repo.get_graph()
168
r for (r, ps) in graph.iter_ancestry([revision])
169
if ps is not None and r != NULL_REVISION]
170
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
172
return collapse_by_person(revs, canonical_committer)
175
def get_diff_info(a_repo, start_rev, end_rev):
176
"""Get only the info for new revisions between the two revisions
178
This lets us figure out what has actually changed between 2 revisions.
180
with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
181
graph = a_repo.get_graph()
182
trace.note('getting ancestry diff')
183
ancestry = graph.find_difference(start_rev, end_rev)[1]
184
revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
186
return collapse_by_person(revs, canonical_committer)
189
def display_info(info, to_file, gather_class_stats=None):
190
"""Write out the information"""
192
for count, revs, emails, fullnames in info:
193
# Get the most common email name
194
sorted_emails = sorted(((count, email)
195
for email, count in emails.items()),
197
sorted_fullnames = sorted(((count, fullname)
198
for fullname, count in fullnames.items()),
200
if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
201
to_file.write('%4d %s\n'
202
% (count, 'Unknown'))
204
to_file.write('%4d %s <%s>\n'
205
% (count, sorted_fullnames[0][1],
206
sorted_emails[0][1]))
207
if len(sorted_fullnames) > 1:
208
to_file.write(' Other names:\n')
209
for count, fname in sorted_fullnames:
210
to_file.write(' %4d ' % (count,))
212
to_file.write("''\n")
214
to_file.write("%s\n" % (fname,))
215
if len(sorted_emails) > 1:
216
to_file.write(' Other email addresses:\n')
217
for count, email in sorted_emails:
218
to_file.write(' %4d ' % (count,))
220
to_file.write("''\n")
222
to_file.write("%s\n" % (email,))
223
if gather_class_stats is not None:
224
to_file.write(' Contributions:\n')
225
classes, total = gather_class_stats(revs)
226
for name, count in sorted(classes.items(), key=classify_key):
229
to_file.write(" %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
232
class cmd_committer_statistics(commands.Command):
233
"""Generate statistics for LOCATION."""
235
aliases = ['stats', 'committer-stats']
236
takes_args = ['location?']
237
takes_options = ['revision',
238
option.Option('show-class', help="Show the class of contributions.")]
240
encoding_type = 'replace'
242
def run(self, location='.', revision=None, show_class=False):
245
wt = workingtree.WorkingTree.open_containing(location)[0]
246
except errors.NoWorkingTree:
247
a_branch = branch.Branch.open(location)
248
last_rev = a_branch.last_revision()
251
last_rev = wt.last_revision()
253
if revision is not None:
254
last_rev = revision[0].in_history(a_branch).rev_id
255
if len(revision) > 1:
256
alternate_rev = revision[1].in_history(a_branch).rev_id
258
with a_branch.lock_read():
260
info = get_diff_info(a_branch.repository, last_rev,
263
info = get_info(a_branch.repository, last_rev)
265
def fetch_class_stats(revs):
266
return gather_class_stats(a_branch.repository, revs)
268
fetch_class_stats = None
269
display_info(info, self.outf, fetch_class_stats)
272
class cmd_ancestor_growth(commands.Command):
273
"""Figure out the ancestor graph for LOCATION"""
275
takes_args = ['location?']
277
encoding_type = 'replace'
279
def run(self, location='.'):
281
wt = workingtree.WorkingTree.open_containing(location)[0]
282
except errors.NoWorkingTree:
283
a_branch = branch.Branch.open(location)
284
last_rev = a_branch.last_revision()
287
last_rev = wt.last_revision()
289
with a_branch.lock_read():
290
graph = a_branch.repository.get_graph()
293
sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
295
for num, node_name, depth, isend in reversed(sorted_graph):
299
self.outf.write('%4d, %4d\n' % (revno, cur_parents))
302
def gather_class_stats(repository, revs):
305
with ui.ui_factory.nested_progress_bar() as pb:
306
with repository.lock_read():
308
for delta in repository.get_deltas_for_revisions(revs):
309
pb.update("classifying commits", i, len(revs))
310
for c in classify_delta(delta):
319
def classify_key(item):
320
"""Sort key for item of (author, count) from classify_delta."""
321
return -item[1], item[0]
324
def display_credits(credits, to_file):
325
(coders, documenters, artists, translators) = credits
326
def print_section(name, lst):
329
to_file.write("%s:\n" % name)
331
to_file.write("%s\n" % name)
333
print_section("Code", coders)
334
print_section("Documentation", documenters)
335
print_section("Art", artists)
336
print_section("Translations", translators)
339
def find_credits(repository, revid):
340
"""Find the credits of the contributors to a revision.
342
:return: tuple with (authors, documenters, artists, translators)
344
ret = {"documentation": {},
350
with repository.lock_read():
351
graph = repository.get_graph()
352
ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
353
if ps is not None and r != NULL_REVISION]
354
revs = repository.get_revisions(ancestry)
355
with ui.ui_factory.nested_progress_bar() as pb:
356
iterator = zip(revs, repository.get_deltas_for_revisions(revs))
357
for i, (rev, delta) in enumerate(iterator):
358
pb.update("analysing revisions", i, len(revs))
360
if len(rev.parent_ids) > 1:
362
for c in set(classify_delta(delta)):
363
for author in rev.get_apparent_authors():
364
if not author in ret[c]:
367
def sort_class(name):
369
for author, _ in sorted(ret[name].items(), key=classify_key)]
370
return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
373
class cmd_credits(commands.Command):
374
"""Determine credits for LOCATION."""
376
takes_args = ['location?']
377
takes_options = ['revision']
379
encoding_type = 'replace'
381
def run(self, location='.', revision=None):
383
wt = workingtree.WorkingTree.open_containing(location)[0]
384
except errors.NoWorkingTree:
385
a_branch = branch.Branch.open(location)
386
last_rev = a_branch.last_revision()
389
last_rev = wt.last_revision()
391
if revision is not None:
392
last_rev = revision[0].in_history(a_branch).rev_id
394
with a_branch.lock_read():
395
credits = find_credits(a_branch.repository, last_rev)
396
display_credits(credits, self.outf)