/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2020-04-05 19:11:34 UTC
  • mto: (7490.7.16 work)
  • mto: This revision was merged to the branch mainline in revision 7501.
  • Revision ID: jelmer@jelmer.uk-20200405191134-0aebh8ikiwygxma5
Populate the .gitignore file.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
import operator
 
21
 
 
22
from ... import (
 
23
    branch,
 
24
    commands,
 
25
    config,
 
26
    errors,
 
27
    option,
 
28
    trace,
 
29
    tsort,
 
30
    ui,
 
31
    workingtree,
 
32
    )
 
33
from ...revision import NULL_REVISION
 
34
from .classify import classify_delta
 
35
 
 
36
 
 
37
def collapse_by_person(revisions, canonical_committer):
 
38
    """The committers list is sorted by email, fix it up by person.
 
39
 
 
40
    Some people commit with a similar username, but different email
 
41
    address. Which makes it hard to sort out when they have multiple
 
42
    entries. Email is actually more stable, though, since people
 
43
    frequently forget to set their name properly.
 
44
 
 
45
    So take the most common username for each email address, and
 
46
    combine them into one new list.
 
47
    """
 
48
    # Map from canonical committer to
 
49
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
50
    committer_to_info = {}
 
51
    for rev in revisions:
 
52
        authors = rev.get_apparent_authors()
 
53
        for author in authors:
 
54
            username, email = config.parse_username(author)
 
55
            if len(username) == 0 and len(email) == 0:
 
56
                continue
 
57
            canon_author = canonical_committer[(username, email)]
 
58
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
59
            info[0].append(rev)
 
60
            info[1][email] = info[1].setdefault(email, 0) + 1
 
61
            info[2][username] = info[2].setdefault(username, 0) + 1
 
62
    res = [(len(revs), revs, emails, fnames)
 
63
           for revs, emails, fnames in committer_to_info.values()]
 
64
 
 
65
    def key_fn(item):
 
66
        return item[0], list(item[2].keys())
 
67
    res.sort(reverse=True, key=key_fn)
 
68
    return res
 
69
 
 
70
 
 
71
def collapse_email_and_users(email_users, combo_count):
 
72
    """Combine the mapping of User Name to email and email to User Name.
 
73
 
 
74
    If a given User Name is used for multiple emails, try to map it all to one
 
75
    entry.
 
76
    """
 
77
    id_to_combos = {}
 
78
    username_to_id = {}
 
79
    email_to_id = {}
 
80
    id_counter = 0
 
81
 
 
82
    def collapse_ids(old_id, new_id, new_combos):
 
83
        old_combos = id_to_combos.pop(old_id)
 
84
        new_combos.update(old_combos)
 
85
        for old_user, old_email in old_combos:
 
86
            if (old_user and old_user != user):
 
87
                low_old_user = old_user.lower()
 
88
                old_user_id = username_to_id[low_old_user]
 
89
                assert old_user_id in (old_id, new_id)
 
90
                username_to_id[low_old_user] = new_id
 
91
            if (old_email and old_email != email):
 
92
                old_email_id = email_to_id[old_email]
 
93
                assert old_email_id in (old_id, new_id)
 
94
                email_to_id[old_email] = cur_id
 
95
    for email, usernames in email_users.items():
 
96
        assert email not in email_to_id
 
97
        if not email:
 
98
            # We use a different algorithm for usernames that have no email
 
99
            # address, we just try to match by username, and not at all by
 
100
            # email
 
101
            for user in usernames:
 
102
                if not user:
 
103
                    continue  # The mysterious ('', '') user
 
104
                # When mapping, use case-insensitive names
 
105
                low_user = user.lower()
 
106
                user_id = username_to_id.get(low_user)
 
107
                if user_id is None:
 
108
                    id_counter += 1
 
109
                    user_id = id_counter
 
110
                    username_to_id[low_user] = user_id
 
111
                    id_to_combos[user_id] = id_combos = set()
 
112
                else:
 
113
                    id_combos = id_to_combos[user_id]
 
114
                id_combos.add((user, email))
 
115
            continue
 
116
 
 
117
        id_counter += 1
 
118
        cur_id = id_counter
 
119
        id_to_combos[cur_id] = id_combos = set()
 
120
        email_to_id[email] = cur_id
 
121
 
 
122
        for user in usernames:
 
123
            combo = (user, email)
 
124
            id_combos.add(combo)
 
125
            if not user:
 
126
                # We don't match on empty usernames
 
127
                continue
 
128
            low_user = user.lower()
 
129
            user_id = username_to_id.get(low_user)
 
130
            if user_id is not None:
 
131
                # This UserName was matched to an cur_id
 
132
                if user_id != cur_id:
 
133
                    # And it is a different identity than the current email
 
134
                    collapse_ids(user_id, cur_id, id_combos)
 
135
            username_to_id[low_user] = cur_id
 
136
    combo_to_best_combo = {}
 
137
    for cur_id, combos in id_to_combos.items():
 
138
        best_combo = sorted(combos,
 
139
                            key=lambda x: combo_count[x],
 
140
                            reverse=True)[0]
 
141
        for combo in combos:
 
142
            combo_to_best_combo[combo] = best_combo
 
143
    return combo_to_best_combo
 
144
 
 
145
 
 
146
def get_revisions_and_committers(a_repo, revids):
 
147
    """Get the Revision information, and the best-match for committer."""
 
148
 
 
149
    email_users = {}  # user@email.com => User Name
 
150
    combo_count = {}
 
151
    with ui.ui_factory.nested_progress_bar() as pb:
 
152
        trace.note('getting revisions')
 
153
        revisions = list(a_repo.iter_revisions(revids))
 
154
        for count, (revid, rev) in enumerate(revisions):
 
155
            pb.update('checking', count, len(revids))
 
156
            for author in rev.get_apparent_authors():
 
157
                # XXX: There is a chance sometimes with svn imports that the
 
158
                #      full name and email can BOTH be blank.
 
159
                username, email = config.parse_username(author)
 
160
                email_users.setdefault(email, set()).add(username)
 
161
                combo = (username, email)
 
162
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
163
    return ((rev for (revid, rev) in revisions),
 
164
            collapse_email_and_users(email_users, combo_count))
 
165
 
 
166
 
 
167
def get_info(a_repo, revision):
 
168
    """Get all of the information for a particular revision"""
 
169
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
170
        trace.note('getting ancestry')
 
171
        graph = a_repo.get_graph()
 
172
        ancestry = [
 
173
            r for (r, ps) in graph.iter_ancestry([revision])
 
174
            if ps is not None and r != NULL_REVISION]
 
175
        revs, canonical_committer = get_revisions_and_committers(
 
176
            a_repo, ancestry)
 
177
 
 
178
    return collapse_by_person(revs, canonical_committer)
 
179
 
 
180
 
 
181
def get_diff_info(a_repo, start_rev, end_rev):
 
182
    """Get only the info for new revisions between the two revisions
 
183
 
 
184
    This lets us figure out what has actually changed between 2 revisions.
 
185
    """
 
186
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
187
        graph = a_repo.get_graph()
 
188
        trace.note('getting ancestry diff')
 
189
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
190
        revs, canonical_committer = get_revisions_and_committers(
 
191
            a_repo, ancestry)
 
192
 
 
193
    return collapse_by_person(revs, canonical_committer)
 
194
 
 
195
 
 
196
def display_info(info, to_file, gather_class_stats=None):
 
197
    """Write out the information"""
 
198
 
 
199
    for count, revs, emails, fullnames in info:
 
200
        # Get the most common email name
 
201
        sorted_emails = sorted(((count, email)
 
202
                                for email, count in emails.items()),
 
203
                               reverse=True)
 
204
        sorted_fullnames = sorted(((count, fullname)
 
205
                                   for fullname, count in fullnames.items()),
 
206
                                  reverse=True)
 
207
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
208
            to_file.write('%4d %s\n'
 
209
                          % (count, 'Unknown'))
 
210
        else:
 
211
            to_file.write('%4d %s <%s>\n'
 
212
                          % (count, sorted_fullnames[0][1],
 
213
                             sorted_emails[0][1]))
 
214
        if len(sorted_fullnames) > 1:
 
215
            to_file.write('     Other names:\n')
 
216
            for count, fname in sorted_fullnames:
 
217
                to_file.write('     %4d ' % (count,))
 
218
                if fname == '':
 
219
                    to_file.write("''\n")
 
220
                else:
 
221
                    to_file.write("%s\n" % (fname,))
 
222
        if len(sorted_emails) > 1:
 
223
            to_file.write('     Other email addresses:\n')
 
224
            for count, email in sorted_emails:
 
225
                to_file.write('     %4d ' % (count,))
 
226
                if email == '':
 
227
                    to_file.write("''\n")
 
228
                else:
 
229
                    to_file.write("%s\n" % (email,))
 
230
        if gather_class_stats is not None:
 
231
            to_file.write('     Contributions:\n')
 
232
            classes, total = gather_class_stats(revs)
 
233
            for name, count in sorted(classes.items(), key=classify_key):
 
234
                if name is None:
 
235
                    name = "Unknown"
 
236
                to_file.write("     %4.0f%% %s\n" %
 
237
                              ((float(count) / total) * 100.0, name))
 
238
 
 
239
 
 
240
class cmd_committer_statistics(commands.Command):
 
241
    """Generate statistics for LOCATION."""
 
242
 
 
243
    aliases = ['stats', 'committer-stats']
 
244
    takes_args = ['location?']
 
245
    takes_options = ['revision',
 
246
                     option.Option('show-class', help="Show the class of contributions.")]
 
247
 
 
248
    encoding_type = 'replace'
 
249
 
 
250
    def run(self, location='.', revision=None, show_class=False):
 
251
        alternate_rev = None
 
252
        try:
 
253
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
254
        except errors.NoWorkingTree:
 
255
            a_branch = branch.Branch.open(location)
 
256
            last_rev = a_branch.last_revision()
 
257
        else:
 
258
            a_branch = wt.branch
 
259
            last_rev = wt.last_revision()
 
260
 
 
261
        if revision is not None:
 
262
            last_rev = revision[0].in_history(a_branch).rev_id
 
263
            if len(revision) > 1:
 
264
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
265
 
 
266
        with a_branch.lock_read():
 
267
            if alternate_rev:
 
268
                info = get_diff_info(a_branch.repository, last_rev,
 
269
                                     alternate_rev)
 
270
            else:
 
271
                info = get_info(a_branch.repository, last_rev)
 
272
        if show_class:
 
273
            def fetch_class_stats(revs):
 
274
                return gather_class_stats(a_branch.repository, revs)
 
275
        else:
 
276
            fetch_class_stats = None
 
277
        display_info(info, self.outf, fetch_class_stats)
 
278
 
 
279
 
 
280
class cmd_ancestor_growth(commands.Command):
 
281
    """Figure out the ancestor graph for LOCATION"""
 
282
 
 
283
    takes_args = ['location?']
 
284
 
 
285
    encoding_type = 'replace'
 
286
 
 
287
    hidden = True
 
288
 
 
289
    def run(self, location='.'):
 
290
        try:
 
291
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
292
        except errors.NoWorkingTree:
 
293
            a_branch = branch.Branch.open(location)
 
294
            last_rev = a_branch.last_revision()
 
295
        else:
 
296
            a_branch = wt.branch
 
297
            last_rev = wt.last_revision()
 
298
 
 
299
        with a_branch.lock_read():
 
300
            graph = a_branch.repository.get_graph()
 
301
            revno = 0
 
302
            cur_parents = 0
 
303
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
304
                                            last_rev)
 
305
            for num, node_name, depth, isend in reversed(sorted_graph):
 
306
                cur_parents += 1
 
307
                if depth == 0:
 
308
                    revno += 1
 
309
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
310
 
 
311
 
 
312
def gather_class_stats(repository, revs):
 
313
    ret = {}
 
314
    total = 0
 
315
    with ui.ui_factory.nested_progress_bar() as pb:
 
316
        with repository.lock_read():
 
317
            i = 0
 
318
            for delta in repository.get_deltas_for_revisions(revs):
 
319
                pb.update("classifying commits", i, len(revs))
 
320
                for c in classify_delta(delta):
 
321
                    if c not in ret:
 
322
                        ret[c] = 0
 
323
                    ret[c] += 1
 
324
                    total += 1
 
325
                i += 1
 
326
    return ret, total
 
327
 
 
328
 
 
329
def classify_key(item):
 
330
    """Sort key for item of (author, count) from classify_delta."""
 
331
    return -item[1], item[0]
 
332
 
 
333
 
 
334
def display_credits(credits, to_file):
 
335
    (coders, documenters, artists, translators) = credits
 
336
 
 
337
    def print_section(name, lst):
 
338
        if len(lst) == 0:
 
339
            return
 
340
        to_file.write("%s:\n" % name)
 
341
        for name in lst:
 
342
            to_file.write("%s\n" % name)
 
343
        to_file.write('\n')
 
344
    print_section("Code", coders)
 
345
    print_section("Documentation", documenters)
 
346
    print_section("Art", artists)
 
347
    print_section("Translations", translators)
 
348
 
 
349
 
 
350
def find_credits(repository, revid):
 
351
    """Find the credits of the contributors to a revision.
 
352
 
 
353
    :return: tuple with (authors, documenters, artists, translators)
 
354
    """
 
355
    ret = {"documentation": {},
 
356
           "code": {},
 
357
           "art": {},
 
358
           "translation": {},
 
359
           None: {}
 
360
           }
 
361
    with repository.lock_read():
 
362
        graph = repository.get_graph()
 
363
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
364
                    if ps is not None and r != NULL_REVISION]
 
365
        revs = repository.get_revisions(ancestry)
 
366
        with ui.ui_factory.nested_progress_bar() as pb:
 
367
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
 
368
            for i, (rev, delta) in enumerate(iterator):
 
369
                pb.update("analysing revisions", i, len(revs))
 
370
                # Don't count merges
 
371
                if len(rev.parent_ids) > 1:
 
372
                    continue
 
373
                for c in set(classify_delta(delta)):
 
374
                    for author in rev.get_apparent_authors():
 
375
                        if author not in ret[c]:
 
376
                            ret[c][author] = 0
 
377
                        ret[c][author] += 1
 
378
 
 
379
    def sort_class(name):
 
380
        return [author
 
381
                for author, _ in sorted(ret[name].items(), key=classify_key)]
 
382
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
383
 
 
384
 
 
385
class cmd_credits(commands.Command):
 
386
    """Determine credits for LOCATION."""
 
387
 
 
388
    takes_args = ['location?']
 
389
    takes_options = ['revision']
 
390
 
 
391
    encoding_type = 'replace'
 
392
 
 
393
    def run(self, location='.', revision=None):
 
394
        try:
 
395
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
396
        except errors.NoWorkingTree:
 
397
            a_branch = branch.Branch.open(location)
 
398
            last_rev = a_branch.last_revision()
 
399
        else:
 
400
            a_branch = wt.branch
 
401
            last_rev = wt.last_revision()
 
402
 
 
403
        if revision is not None:
 
404
            last_rev = revision[0].in_history(a_branch).rev_id
 
405
 
 
406
        with a_branch.lock_read():
 
407
            credits = find_credits(a_branch.repository, last_rev)
 
408
            display_credits(credits, self.outf)