/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2019-01-01 21:38:07 UTC
  • mfrom: (7228 work)
  • mto: This revision was merged to the branch mainline in revision 7233.
  • Revision ID: jelmer@jelmer.uk-20190101213807-ay6uqghz0nnrgjvx
Merge trunk.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
import operator
 
21
 
 
22
from ... import (
 
23
    branch,
 
24
    commands,
 
25
    config,
 
26
    errors,
 
27
    option,
 
28
    trace,
 
29
    tsort,
 
30
    ui,
 
31
    workingtree,
 
32
    )
 
33
from ...revision import NULL_REVISION
 
34
from .classify import classify_delta
 
35
 
 
36
 
 
37
def collapse_by_person(revisions, canonical_committer):
 
38
    """The committers list is sorted by email, fix it up by person.
 
39
 
 
40
    Some people commit with a similar username, but different email
 
41
    address. Which makes it hard to sort out when they have multiple
 
42
    entries. Email is actually more stable, though, since people
 
43
    frequently forget to set their name properly.
 
44
 
 
45
    So take the most common username for each email address, and
 
46
    combine them into one new list.
 
47
    """
 
48
    # Map from canonical committer to
 
49
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
50
    committer_to_info = {}
 
51
    for rev in revisions:
 
52
        authors = rev.get_apparent_authors()
 
53
        for author in authors:
 
54
            username, email = config.parse_username(author)
 
55
            if len(username) == 0 and len(email) == 0:
 
56
                continue
 
57
            canon_author = canonical_committer[(username, email)]
 
58
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
59
            info[0].append(rev)
 
60
            info[1][email] = info[1].setdefault(email, 0) + 1
 
61
            info[2][username] = info[2].setdefault(username, 0) + 1
 
62
    res = [(len(revs), revs, emails, fnames)
 
63
           for revs, emails, fnames in committer_to_info.values()]
 
64
    res.sort(reverse=True, key=operator.itemgetter(0))
 
65
    return res
 
66
 
 
67
 
 
68
def collapse_email_and_users(email_users, combo_count):
 
69
    """Combine the mapping of User Name to email and email to User Name.
 
70
 
 
71
    If a given User Name is used for multiple emails, try to map it all to one
 
72
    entry.
 
73
    """
 
74
    id_to_combos = {}
 
75
    username_to_id = {}
 
76
    email_to_id = {}
 
77
    id_counter = 0
 
78
 
 
79
    def collapse_ids(old_id, new_id, new_combos):
 
80
        old_combos = id_to_combos.pop(old_id)
 
81
        new_combos.update(old_combos)
 
82
        for old_user, old_email in old_combos:
 
83
            if (old_user and old_user != user):
 
84
                low_old_user = old_user.lower()
 
85
                old_user_id = username_to_id[low_old_user]
 
86
                assert old_user_id in (old_id, new_id)
 
87
                username_to_id[low_old_user] = new_id
 
88
            if (old_email and old_email != email):
 
89
                old_email_id = email_to_id[old_email]
 
90
                assert old_email_id in (old_id, new_id)
 
91
                email_to_id[old_email] = cur_id
 
92
    for email, usernames in email_users.items():
 
93
        assert email not in email_to_id
 
94
        if not email:
 
95
            # We use a different algorithm for usernames that have no email
 
96
            # address, we just try to match by username, and not at all by
 
97
            # email
 
98
            for user in usernames:
 
99
                if not user:
 
100
                    continue  # The mysterious ('', '') user
 
101
                # When mapping, use case-insensitive names
 
102
                low_user = user.lower()
 
103
                user_id = username_to_id.get(low_user)
 
104
                if user_id is None:
 
105
                    id_counter += 1
 
106
                    user_id = id_counter
 
107
                    username_to_id[low_user] = user_id
 
108
                    id_to_combos[user_id] = id_combos = set()
 
109
                else:
 
110
                    id_combos = id_to_combos[user_id]
 
111
                id_combos.add((user, email))
 
112
            continue
 
113
 
 
114
        id_counter += 1
 
115
        cur_id = id_counter
 
116
        id_to_combos[cur_id] = id_combos = set()
 
117
        email_to_id[email] = cur_id
 
118
 
 
119
        for user in usernames:
 
120
            combo = (user, email)
 
121
            id_combos.add(combo)
 
122
            if not user:
 
123
                # We don't match on empty usernames
 
124
                continue
 
125
            low_user = user.lower()
 
126
            user_id = username_to_id.get(low_user)
 
127
            if user_id is not None:
 
128
                # This UserName was matched to an cur_id
 
129
                if user_id != cur_id:
 
130
                    # And it is a different identity than the current email
 
131
                    collapse_ids(user_id, cur_id, id_combos)
 
132
            username_to_id[low_user] = cur_id
 
133
    combo_to_best_combo = {}
 
134
    for cur_id, combos in id_to_combos.items():
 
135
        best_combo = sorted(combos,
 
136
                            key=lambda x: combo_count[x],
 
137
                            reverse=True)[0]
 
138
        for combo in combos:
 
139
            combo_to_best_combo[combo] = best_combo
 
140
    return combo_to_best_combo
 
141
 
 
142
 
 
143
def get_revisions_and_committers(a_repo, revids):
 
144
    """Get the Revision information, and the best-match for committer."""
 
145
 
 
146
    email_users = {}  # user@email.com => User Name
 
147
    combo_count = {}
 
148
    with ui.ui_factory.nested_progress_bar() as pb:
 
149
        trace.note('getting revisions')
 
150
        revisions = list(a_repo.iter_revisions(revids))
 
151
        for count, (revid, rev) in enumerate(revisions):
 
152
            pb.update('checking', count, len(revids))
 
153
            for author in rev.get_apparent_authors():
 
154
                # XXX: There is a chance sometimes with svn imports that the
 
155
                #      full name and email can BOTH be blank.
 
156
                username, email = config.parse_username(author)
 
157
                email_users.setdefault(email, set()).add(username)
 
158
                combo = (username, email)
 
159
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
160
    return ((rev for (revid, rev) in revisions),
 
161
            collapse_email_and_users(email_users, combo_count))
 
162
 
 
163
 
 
164
def get_info(a_repo, revision):
 
165
    """Get all of the information for a particular revision"""
 
166
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
167
        trace.note('getting ancestry')
 
168
        graph = a_repo.get_graph()
 
169
        ancestry = [
 
170
            r for (r, ps) in graph.iter_ancestry([revision])
 
171
            if ps is not None and r != NULL_REVISION]
 
172
        revs, canonical_committer = get_revisions_and_committers(
 
173
            a_repo, ancestry)
 
174
 
 
175
    return collapse_by_person(revs, canonical_committer)
 
176
 
 
177
 
 
178
def get_diff_info(a_repo, start_rev, end_rev):
 
179
    """Get only the info for new revisions between the two revisions
 
180
 
 
181
    This lets us figure out what has actually changed between 2 revisions.
 
182
    """
 
183
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
184
        graph = a_repo.get_graph()
 
185
        trace.note('getting ancestry diff')
 
186
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
187
        revs, canonical_committer = get_revisions_and_committers(
 
188
            a_repo, ancestry)
 
189
 
 
190
    return collapse_by_person(revs, canonical_committer)
 
191
 
 
192
 
 
193
def display_info(info, to_file, gather_class_stats=None):
 
194
    """Write out the information"""
 
195
 
 
196
    for count, revs, emails, fullnames in info:
 
197
        # Get the most common email name
 
198
        sorted_emails = sorted(((count, email)
 
199
                                for email, count in emails.items()),
 
200
                               reverse=True)
 
201
        sorted_fullnames = sorted(((count, fullname)
 
202
                                   for fullname, count in fullnames.items()),
 
203
                                  reverse=True)
 
204
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
205
            to_file.write('%4d %s\n'
 
206
                          % (count, 'Unknown'))
 
207
        else:
 
208
            to_file.write('%4d %s <%s>\n'
 
209
                          % (count, sorted_fullnames[0][1],
 
210
                             sorted_emails[0][1]))
 
211
        if len(sorted_fullnames) > 1:
 
212
            to_file.write('     Other names:\n')
 
213
            for count, fname in sorted_fullnames:
 
214
                to_file.write('     %4d ' % (count,))
 
215
                if fname == '':
 
216
                    to_file.write("''\n")
 
217
                else:
 
218
                    to_file.write("%s\n" % (fname,))
 
219
        if len(sorted_emails) > 1:
 
220
            to_file.write('     Other email addresses:\n')
 
221
            for count, email in sorted_emails:
 
222
                to_file.write('     %4d ' % (count,))
 
223
                if email == '':
 
224
                    to_file.write("''\n")
 
225
                else:
 
226
                    to_file.write("%s\n" % (email,))
 
227
        if gather_class_stats is not None:
 
228
            to_file.write('     Contributions:\n')
 
229
            classes, total = gather_class_stats(revs)
 
230
            for name, count in sorted(classes.items(), key=classify_key):
 
231
                if name is None:
 
232
                    name = "Unknown"
 
233
                to_file.write("     %4.0f%% %s\n" %
 
234
                              ((float(count) / total) * 100.0, name))
 
235
 
 
236
 
 
237
class cmd_committer_statistics(commands.Command):
 
238
    """Generate statistics for LOCATION."""
 
239
 
 
240
    aliases = ['stats', 'committer-stats']
 
241
    takes_args = ['location?']
 
242
    takes_options = ['revision',
 
243
                     option.Option('show-class', help="Show the class of contributions.")]
 
244
 
 
245
    encoding_type = 'replace'
 
246
 
 
247
    def run(self, location='.', revision=None, show_class=False):
 
248
        alternate_rev = None
 
249
        try:
 
250
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
251
        except errors.NoWorkingTree:
 
252
            a_branch = branch.Branch.open(location)
 
253
            last_rev = a_branch.last_revision()
 
254
        else:
 
255
            a_branch = wt.branch
 
256
            last_rev = wt.last_revision()
 
257
 
 
258
        if revision is not None:
 
259
            last_rev = revision[0].in_history(a_branch).rev_id
 
260
            if len(revision) > 1:
 
261
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
262
 
 
263
        with a_branch.lock_read():
 
264
            if alternate_rev:
 
265
                info = get_diff_info(a_branch.repository, last_rev,
 
266
                                     alternate_rev)
 
267
            else:
 
268
                info = get_info(a_branch.repository, last_rev)
 
269
        if show_class:
 
270
            def fetch_class_stats(revs):
 
271
                return gather_class_stats(a_branch.repository, revs)
 
272
        else:
 
273
            fetch_class_stats = None
 
274
        display_info(info, self.outf, fetch_class_stats)
 
275
 
 
276
 
 
277
class cmd_ancestor_growth(commands.Command):
 
278
    """Figure out the ancestor graph for LOCATION"""
 
279
 
 
280
    takes_args = ['location?']
 
281
 
 
282
    encoding_type = 'replace'
 
283
 
 
284
    def run(self, location='.'):
 
285
        try:
 
286
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
287
        except errors.NoWorkingTree:
 
288
            a_branch = branch.Branch.open(location)
 
289
            last_rev = a_branch.last_revision()
 
290
        else:
 
291
            a_branch = wt.branch
 
292
            last_rev = wt.last_revision()
 
293
 
 
294
        with a_branch.lock_read():
 
295
            graph = a_branch.repository.get_graph()
 
296
            revno = 0
 
297
            cur_parents = 0
 
298
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
299
                                            last_rev)
 
300
            for num, node_name, depth, isend in reversed(sorted_graph):
 
301
                cur_parents += 1
 
302
                if depth == 0:
 
303
                    revno += 1
 
304
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
305
 
 
306
 
 
307
def gather_class_stats(repository, revs):
 
308
    ret = {}
 
309
    total = 0
 
310
    with ui.ui_factory.nested_progress_bar() as pb:
 
311
        with repository.lock_read():
 
312
            i = 0
 
313
            for delta in repository.get_deltas_for_revisions(revs):
 
314
                pb.update("classifying commits", i, len(revs))
 
315
                for c in classify_delta(delta):
 
316
                    if c not in ret:
 
317
                        ret[c] = 0
 
318
                    ret[c] += 1
 
319
                    total += 1
 
320
                i += 1
 
321
    return ret, total
 
322
 
 
323
 
 
324
def classify_key(item):
 
325
    """Sort key for item of (author, count) from classify_delta."""
 
326
    return -item[1], item[0]
 
327
 
 
328
 
 
329
def display_credits(credits, to_file):
 
330
    (coders, documenters, artists, translators) = credits
 
331
 
 
332
    def print_section(name, lst):
 
333
        if len(lst) == 0:
 
334
            return
 
335
        to_file.write("%s:\n" % name)
 
336
        for name in lst:
 
337
            to_file.write("%s\n" % name)
 
338
        to_file.write('\n')
 
339
    print_section("Code", coders)
 
340
    print_section("Documentation", documenters)
 
341
    print_section("Art", artists)
 
342
    print_section("Translations", translators)
 
343
 
 
344
 
 
345
def find_credits(repository, revid):
 
346
    """Find the credits of the contributors to a revision.
 
347
 
 
348
    :return: tuple with (authors, documenters, artists, translators)
 
349
    """
 
350
    ret = {"documentation": {},
 
351
           "code": {},
 
352
           "art": {},
 
353
           "translation": {},
 
354
           None: {}
 
355
           }
 
356
    with repository.lock_read():
 
357
        graph = repository.get_graph()
 
358
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
359
                    if ps is not None and r != NULL_REVISION]
 
360
        revs = repository.get_revisions(ancestry)
 
361
        with ui.ui_factory.nested_progress_bar() as pb:
 
362
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
 
363
            for i, (rev, delta) in enumerate(iterator):
 
364
                pb.update("analysing revisions", i, len(revs))
 
365
                # Don't count merges
 
366
                if len(rev.parent_ids) > 1:
 
367
                    continue
 
368
                for c in set(classify_delta(delta)):
 
369
                    for author in rev.get_apparent_authors():
 
370
                        if author not in ret[c]:
 
371
                            ret[c][author] = 0
 
372
                        ret[c][author] += 1
 
373
 
 
374
    def sort_class(name):
 
375
        return [author
 
376
                for author, _ in sorted(ret[name].items(), key=classify_key)]
 
377
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
378
 
 
379
 
 
380
class cmd_credits(commands.Command):
 
381
    """Determine credits for LOCATION."""
 
382
 
 
383
    takes_args = ['location?']
 
384
    takes_options = ['revision']
 
385
 
 
386
    encoding_type = 'replace'
 
387
 
 
388
    def run(self, location='.', revision=None):
 
389
        try:
 
390
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
391
        except errors.NoWorkingTree:
 
392
            a_branch = branch.Branch.open(location)
 
393
            last_rev = a_branch.last_revision()
 
394
        else:
 
395
            a_branch = wt.branch
 
396
            last_rev = wt.last_revision()
 
397
 
 
398
        if revision is not None:
 
399
            last_rev = revision[0].in_history(a_branch).rev_id
 
400
 
 
401
        with a_branch.lock_read():
 
402
            credits = find_credits(a_branch.repository, last_rev)
 
403
            display_credits(credits, self.outf)