/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2017-07-30 21:23:44 UTC
  • mto: This revision was merged to the branch mainline in revision 6743.
  • Revision ID: jelmer@jelmer.uk-20170730212344-mumrkz1c4jbm9yzc
review comments.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
from ... import (
 
21
    branch,
 
22
    commands,
 
23
    config,
 
24
    errors,
 
25
    option,
 
26
    trace,
 
27
    tsort,
 
28
    ui,
 
29
    workingtree,
 
30
    )
 
31
from ...revision import NULL_REVISION
 
32
from .classify import classify_delta
 
33
 
 
34
from itertools import izip
 
35
 
 
36
 
 
37
def collapse_by_person(revisions, canonical_committer):
 
38
    """The committers list is sorted by email, fix it up by person.
 
39
 
 
40
    Some people commit with a similar username, but different email
 
41
    address. Which makes it hard to sort out when they have multiple
 
42
    entries. Email is actually more stable, though, since people
 
43
    frequently forget to set their name properly.
 
44
 
 
45
    So take the most common username for each email address, and
 
46
    combine them into one new list.
 
47
    """
 
48
    # Map from canonical committer to
 
49
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
50
    committer_to_info = {}
 
51
    for rev in revisions:
 
52
        authors = rev.get_apparent_authors()
 
53
        for author in authors:
 
54
            username, email = config.parse_username(author)
 
55
            if len(username) == 0 and len(email) == 0:
 
56
                continue
 
57
            canon_author = canonical_committer[(username, email)]
 
58
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
59
            info[0].append(rev)
 
60
            info[1][email] = info[1].setdefault(email, 0) + 1
 
61
            info[2][username] = info[2].setdefault(username, 0) + 1
 
62
    res = [(len(revs), revs, emails, fnames)
 
63
           for revs, emails, fnames in committer_to_info.values()]
 
64
    res.sort(reverse=True)
 
65
    return res
 
66
 
 
67
 
 
68
def collapse_email_and_users(email_users, combo_count):
 
69
    """Combine the mapping of User Name to email and email to User Name.
 
70
 
 
71
    If a given User Name is used for multiple emails, try to map it all to one
 
72
    entry.
 
73
    """
 
74
    id_to_combos = {}
 
75
    username_to_id = {}
 
76
    email_to_id = {}
 
77
    id_counter = 0
 
78
 
 
79
    def collapse_ids(old_id, new_id, new_combos):
 
80
        old_combos = id_to_combos.pop(old_id)
 
81
        new_combos.update(old_combos)
 
82
        for old_user, old_email in old_combos:
 
83
            if (old_user and old_user != user):
 
84
                low_old_user = old_user.lower()
 
85
                old_user_id = username_to_id[low_old_user]
 
86
                assert old_user_id in (old_id, new_id)
 
87
                username_to_id[low_old_user] = new_id
 
88
            if (old_email and old_email != email):
 
89
                old_email_id = email_to_id[old_email]
 
90
                assert old_email_id in (old_id, new_id)
 
91
                email_to_id[old_email] = cur_id
 
92
    for email, usernames in email_users.items():
 
93
        assert email not in email_to_id
 
94
        if not email:
 
95
            # We use a different algorithm for usernames that have no email
 
96
            # address, we just try to match by username, and not at all by
 
97
            # email
 
98
            for user in usernames:
 
99
                if not user:
 
100
                    continue # The mysterious ('', '') user
 
101
                # When mapping, use case-insensitive names
 
102
                low_user = user.lower()
 
103
                user_id = username_to_id.get(low_user)
 
104
                if user_id is None:
 
105
                    id_counter += 1
 
106
                    user_id = id_counter
 
107
                    username_to_id[low_user] = user_id
 
108
                    id_to_combos[user_id] = id_combos = set()
 
109
                else:
 
110
                    id_combos = id_to_combos[user_id]
 
111
                id_combos.add((user, email))
 
112
            continue
 
113
 
 
114
        id_counter += 1
 
115
        cur_id = id_counter
 
116
        id_to_combos[cur_id] = id_combos = set()
 
117
        email_to_id[email] = cur_id
 
118
 
 
119
        for user in usernames:
 
120
            combo = (user, email)
 
121
            id_combos.add(combo)
 
122
            if not user:
 
123
                # We don't match on empty usernames
 
124
                continue
 
125
            low_user = user.lower()
 
126
            user_id = username_to_id.get(low_user)
 
127
            if user_id is not None:
 
128
                # This UserName was matched to an cur_id
 
129
                if user_id != cur_id:
 
130
                    # And it is a different identity than the current email
 
131
                    collapse_ids(user_id, cur_id, id_combos)
 
132
            username_to_id[low_user] = cur_id
 
133
    combo_to_best_combo = {}
 
134
    for cur_id, combos in id_to_combos.items():
 
135
        best_combo = sorted(combos,
 
136
                            key=lambda x:combo_count[x],
 
137
                            reverse=True)[0]
 
138
        for combo in combos:
 
139
            combo_to_best_combo[combo] = best_combo
 
140
    return combo_to_best_combo
 
141
 
 
142
 
 
143
def get_revisions_and_committers(a_repo, revids):
 
144
    """Get the Revision information, and the best-match for committer."""
 
145
 
 
146
    email_users = {} # user@email.com => User Name
 
147
    combo_count = {}
 
148
    pb = ui.ui_factory.nested_progress_bar()
 
149
    try:
 
150
        trace.note('getting revisions')
 
151
        revisions = a_repo.iter_revisions(revids)
 
152
        for count, (revid, rev) in enumerate(revisions):
 
153
            pb.update('checking', count, len(revids))
 
154
            for author in rev.get_apparent_authors():
 
155
                # XXX: There is a chance sometimes with svn imports that the
 
156
                #      full name and email can BOTH be blank.
 
157
                username, email = config.parse_username(author)
 
158
                email_users.setdefault(email, set()).add(username)
 
159
                combo = (username, email)
 
160
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
161
    finally:
 
162
        pb.finished()
 
163
    return revisions, collapse_email_and_users(email_users, combo_count)
 
164
 
 
165
 
 
166
def get_info(a_repo, revision):
 
167
    """Get all of the information for a particular revision"""
 
168
    pb = ui.ui_factory.nested_progress_bar()
 
169
    a_repo.lock_read()
 
170
    try:
 
171
        trace.note('getting ancestry')
 
172
        graph = a_repo.get_graph()
 
173
        ancestry = [
 
174
            r for (r, ps) in graph.iter_ancestry([revision])
 
175
            if ps is not None and r != NULL_REVISION]
 
176
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
177
    finally:
 
178
        a_repo.unlock()
 
179
        pb.finished()
 
180
 
 
181
    return collapse_by_person(revs, canonical_committer)
 
182
 
 
183
 
 
184
def get_diff_info(a_repo, start_rev, end_rev):
 
185
    """Get only the info for new revisions between the two revisions
 
186
 
 
187
    This lets us figure out what has actually changed between 2 revisions.
 
188
    """
 
189
    pb = ui.ui_factory.nested_progress_bar()
 
190
    a_repo.lock_read()
 
191
    try:
 
192
        graph = a_repo.get_graph()
 
193
        trace.note('getting ancestry diff')
 
194
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
195
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
196
    finally:
 
197
        a_repo.unlock()
 
198
        pb.finished()
 
199
 
 
200
    return collapse_by_person(revs, canonical_committer)
 
201
 
 
202
 
 
203
def display_info(info, to_file, gather_class_stats=None):
 
204
    """Write out the information"""
 
205
 
 
206
    for count, revs, emails, fullnames in info:
 
207
        # Get the most common email name
 
208
        sorted_emails = sorted(((count, email)
 
209
                               for email, count in emails.items()),
 
210
                               reverse=True)
 
211
        sorted_fullnames = sorted(((count, fullname)
 
212
                                  for fullname, count in fullnames.items()),
 
213
                                  reverse=True)
 
214
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
215
            to_file.write('%4d %s\n'
 
216
                          % (count, 'Unknown'))
 
217
        else:
 
218
            to_file.write('%4d %s <%s>\n'
 
219
                          % (count, sorted_fullnames[0][1],
 
220
                             sorted_emails[0][1]))
 
221
        if len(sorted_fullnames) > 1:
 
222
            to_file.write('     Other names:\n')
 
223
            for count, fname in sorted_fullnames:
 
224
                to_file.write('     %4d ' % (count,))
 
225
                if fname == '':
 
226
                    to_file.write("''\n")
 
227
                else:
 
228
                    to_file.write("%s\n" % (fname,))
 
229
        if len(sorted_emails) > 1:
 
230
            to_file.write('     Other email addresses:\n')
 
231
            for count, email in sorted_emails:
 
232
                to_file.write('     %4d ' % (count,))
 
233
                if email == '':
 
234
                    to_file.write("''\n")
 
235
                else:
 
236
                    to_file.write("%s\n" % (email,))
 
237
        if gather_class_stats is not None:
 
238
            to_file.write('     Contributions:\n')
 
239
            classes, total = gather_class_stats(revs)
 
240
            for name, count in sorted(classes.items(), key=classify_key):
 
241
                if name is None:
 
242
                    name = "Unknown"
 
243
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
 
244
 
 
245
 
 
246
class cmd_committer_statistics(commands.Command):
 
247
    """Generate statistics for LOCATION."""
 
248
 
 
249
    aliases = ['stats', 'committer-stats']
 
250
    takes_args = ['location?']
 
251
    takes_options = ['revision', 
 
252
            option.Option('show-class', help="Show the class of contributions.")]
 
253
 
 
254
    encoding_type = 'replace'
 
255
 
 
256
    def run(self, location='.', revision=None, show_class=False):
 
257
        alternate_rev = None
 
258
        try:
 
259
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
260
        except errors.NoWorkingTree:
 
261
            a_branch = branch.Branch.open(location)
 
262
            last_rev = a_branch.last_revision()
 
263
        else:
 
264
            a_branch = wt.branch
 
265
            last_rev = wt.last_revision()
 
266
 
 
267
        if revision is not None:
 
268
            last_rev = revision[0].in_history(a_branch).rev_id
 
269
            if len(revision) > 1:
 
270
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
271
 
 
272
        a_branch.lock_read()
 
273
        try:
 
274
            if alternate_rev:
 
275
                info = get_diff_info(a_branch.repository, last_rev,
 
276
                                     alternate_rev)
 
277
            else:
 
278
                info = get_info(a_branch.repository, last_rev)
 
279
        finally:
 
280
            a_branch.unlock()
 
281
        if show_class:
 
282
            def fetch_class_stats(revs):
 
283
                return gather_class_stats(a_branch.repository, revs)
 
284
        else:
 
285
            fetch_class_stats = None
 
286
        display_info(info, self.outf, fetch_class_stats)
 
287
 
 
288
 
 
289
class cmd_ancestor_growth(commands.Command):
 
290
    """Figure out the ancestor graph for LOCATION"""
 
291
 
 
292
    takes_args = ['location?']
 
293
 
 
294
    encoding_type = 'replace'
 
295
 
 
296
    def run(self, location='.'):
 
297
        try:
 
298
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
299
        except errors.NoWorkingTree:
 
300
            a_branch = branch.Branch.open(location)
 
301
            last_rev = a_branch.last_revision()
 
302
        else:
 
303
            a_branch = wt.branch
 
304
            last_rev = wt.last_revision()
 
305
 
 
306
        a_branch.lock_read()
 
307
        try:
 
308
            graph = a_branch.repository.get_graph()
 
309
            revno = 0
 
310
            cur_parents = 0
 
311
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
312
                                            last_rev)
 
313
            for num, node_name, depth, isend in reversed(sorted_graph):
 
314
                cur_parents += 1
 
315
                if depth == 0:
 
316
                    revno += 1
 
317
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
318
        finally:
 
319
            a_branch.unlock()
 
320
 
 
321
 
 
322
def gather_class_stats(repository, revs):
 
323
    ret = {}
 
324
    total = 0
 
325
    pb = ui.ui_factory.nested_progress_bar()
 
326
    try:
 
327
        repository.lock_read()
 
328
        try:
 
329
            i = 0
 
330
            for delta in repository.get_deltas_for_revisions(revs):
 
331
                pb.update("classifying commits", i, len(revs))
 
332
                for c in classify_delta(delta):
 
333
                    if not c in ret:
 
334
                        ret[c] = 0
 
335
                    ret[c] += 1
 
336
                    total += 1
 
337
                i += 1
 
338
        finally:
 
339
            repository.unlock()
 
340
    finally:
 
341
        pb.finished()
 
342
    return ret, total
 
343
 
 
344
 
 
345
def classify_key(item):
 
346
    """Sort key for item of (author, count) from classify_delta."""
 
347
    return -item[1], item[0]
 
348
 
 
349
 
 
350
def display_credits(credits, to_file):
 
351
    (coders, documenters, artists, translators) = credits
 
352
    def print_section(name, lst):
 
353
        if len(lst) == 0:
 
354
            return
 
355
        to_file.write("%s:\n" % name)
 
356
        for name in lst:
 
357
            to_file.write("%s\n" % name)
 
358
        to_file.write('\n')
 
359
    print_section("Code", coders)
 
360
    print_section("Documentation", documenters)
 
361
    print_section("Art", artists)
 
362
    print_section("Translations", translators)
 
363
 
 
364
 
 
365
def find_credits(repository, revid):
 
366
    """Find the credits of the contributors to a revision.
 
367
 
 
368
    :return: tuple with (authors, documenters, artists, translators)
 
369
    """
 
370
    ret = {"documentation": {},
 
371
           "code": {},
 
372
           "art": {},
 
373
           "translation": {},
 
374
           None: {}
 
375
           }
 
376
    repository.lock_read()
 
377
    try:
 
378
        graph = repository.get_graph()
 
379
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
380
                    if ps is not None and r != NULL_REVISION]
 
381
        revs = repository.get_revisions(ancestry)
 
382
        pb = ui.ui_factory.nested_progress_bar()
 
383
        try:
 
384
            iterator = izip(revs, repository.get_deltas_for_revisions(revs))
 
385
            for i, (rev,delta) in enumerate(iterator):
 
386
                pb.update("analysing revisions", i, len(revs))
 
387
                # Don't count merges
 
388
                if len(rev.parent_ids) > 1:
 
389
                    continue
 
390
                for c in set(classify_delta(delta)):
 
391
                    for author in rev.get_apparent_authors():
 
392
                        if not author in ret[c]:
 
393
                            ret[c][author] = 0
 
394
                        ret[c][author] += 1
 
395
        finally:
 
396
            pb.finished()
 
397
    finally:
 
398
        repository.unlock()
 
399
    def sort_class(name):
 
400
        return [author
 
401
            for author, _  in sorted(ret[name].items(), key=classify_key)]
 
402
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
403
 
 
404
 
 
405
class cmd_credits(commands.Command):
 
406
    """Determine credits for LOCATION."""
 
407
 
 
408
    takes_args = ['location?']
 
409
    takes_options = ['revision']
 
410
 
 
411
    encoding_type = 'replace'
 
412
 
 
413
    def run(self, location='.', revision=None):
 
414
        try:
 
415
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
416
        except errors.NoWorkingTree:
 
417
            a_branch = branch.Branch.open(location)
 
418
            last_rev = a_branch.last_revision()
 
419
        else:
 
420
            a_branch = wt.branch
 
421
            last_rev = wt.last_revision()
 
422
 
 
423
        if revision is not None:
 
424
            last_rev = revision[0].in_history(a_branch).rev_id
 
425
 
 
426
        a_branch.lock_read()
 
427
        try:
 
428
            credits = find_credits(a_branch.repository, last_rev)
 
429
            display_credits(credits, self.outf)
 
430
        finally:
 
431
            a_branch.unlock()