/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2019-07-12 01:01:45 UTC
  • mto: This revision was merged to the branch mainline in revision 7375.
  • Revision ID: jelmer@jelmer.uk-20190712010145-m7224qumb8w068zw
Fix importing from remote git repositories.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
import operator
 
21
 
 
22
from ... import (
 
23
    branch,
 
24
    commands,
 
25
    config,
 
26
    errors,
 
27
    option,
 
28
    trace,
 
29
    tsort,
 
30
    ui,
 
31
    workingtree,
 
32
    )
 
33
from ...revision import NULL_REVISION
 
34
from .classify import classify_delta
 
35
 
 
36
 
 
37
def collapse_by_person(revisions, canonical_committer):
 
38
    """The committers list is sorted by email, fix it up by person.
 
39
 
 
40
    Some people commit with a similar username, but different email
 
41
    address. Which makes it hard to sort out when they have multiple
 
42
    entries. Email is actually more stable, though, since people
 
43
    frequently forget to set their name properly.
 
44
 
 
45
    So take the most common username for each email address, and
 
46
    combine them into one new list.
 
47
    """
 
48
    # Map from canonical committer to
 
49
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
50
    committer_to_info = {}
 
51
    for rev in revisions:
 
52
        authors = rev.get_apparent_authors()
 
53
        for author in authors:
 
54
            username, email = config.parse_username(author)
 
55
            if len(username) == 0 and len(email) == 0:
 
56
                continue
 
57
            canon_author = canonical_committer[(username, email)]
 
58
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
59
            info[0].append(rev)
 
60
            info[1][email] = info[1].setdefault(email, 0) + 1
 
61
            info[2][username] = info[2].setdefault(username, 0) + 1
 
62
    res = [(len(revs), revs, emails, fnames)
 
63
           for revs, emails, fnames in committer_to_info.values()]
 
64
    res.sort(reverse=True, key=operator.itemgetter(0))
 
65
    return res
 
66
 
 
67
 
 
68
def collapse_email_and_users(email_users, combo_count):
 
69
    """Combine the mapping of User Name to email and email to User Name.
 
70
 
 
71
    If a given User Name is used for multiple emails, try to map it all to one
 
72
    entry.
 
73
    """
 
74
    id_to_combos = {}
 
75
    username_to_id = {}
 
76
    email_to_id = {}
 
77
    id_counter = 0
 
78
 
 
79
    def collapse_ids(old_id, new_id, new_combos):
 
80
        old_combos = id_to_combos.pop(old_id)
 
81
        new_combos.update(old_combos)
 
82
        for old_user, old_email in old_combos:
 
83
            if (old_user and old_user != user):
 
84
                low_old_user = old_user.lower()
 
85
                old_user_id = username_to_id[low_old_user]
 
86
                assert old_user_id in (old_id, new_id)
 
87
                username_to_id[low_old_user] = new_id
 
88
            if (old_email and old_email != email):
 
89
                old_email_id = email_to_id[old_email]
 
90
                assert old_email_id in (old_id, new_id)
 
91
                email_to_id[old_email] = cur_id
 
92
    for email, usernames in email_users.items():
 
93
        assert email not in email_to_id
 
94
        if not email:
 
95
            # We use a different algorithm for usernames that have no email
 
96
            # address, we just try to match by username, and not at all by
 
97
            # email
 
98
            for user in usernames:
 
99
                if not user:
 
100
                    continue  # The mysterious ('', '') user
 
101
                # When mapping, use case-insensitive names
 
102
                low_user = user.lower()
 
103
                user_id = username_to_id.get(low_user)
 
104
                if user_id is None:
 
105
                    id_counter += 1
 
106
                    user_id = id_counter
 
107
                    username_to_id[low_user] = user_id
 
108
                    id_to_combos[user_id] = id_combos = set()
 
109
                else:
 
110
                    id_combos = id_to_combos[user_id]
 
111
                id_combos.add((user, email))
 
112
            continue
 
113
 
 
114
        id_counter += 1
 
115
        cur_id = id_counter
 
116
        id_to_combos[cur_id] = id_combos = set()
 
117
        email_to_id[email] = cur_id
 
118
 
 
119
        for user in usernames:
 
120
            combo = (user, email)
 
121
            id_combos.add(combo)
 
122
            if not user:
 
123
                # We don't match on empty usernames
 
124
                continue
 
125
            low_user = user.lower()
 
126
            user_id = username_to_id.get(low_user)
 
127
            if user_id is not None:
 
128
                # This UserName was matched to an cur_id
 
129
                if user_id != cur_id:
 
130
                    # And it is a different identity than the current email
 
131
                    collapse_ids(user_id, cur_id, id_combos)
 
132
            username_to_id[low_user] = cur_id
 
133
    combo_to_best_combo = {}
 
134
    for cur_id, combos in id_to_combos.items():
 
135
        best_combo = sorted(combos,
 
136
                            key=lambda x: combo_count[x],
 
137
                            reverse=True)[0]
 
138
        for combo in combos:
 
139
            combo_to_best_combo[combo] = best_combo
 
140
    return combo_to_best_combo
 
141
 
 
142
 
 
143
def get_revisions_and_committers(a_repo, revids):
 
144
    """Get the Revision information, and the best-match for committer."""
 
145
 
 
146
    email_users = {}  # user@email.com => User Name
 
147
    combo_count = {}
 
148
    with ui.ui_factory.nested_progress_bar() as pb:
 
149
        trace.note('getting revisions')
 
150
        revisions = list(a_repo.iter_revisions(revids))
 
151
        for count, (revid, rev) in enumerate(revisions):
 
152
            pb.update('checking', count, len(revids))
 
153
            for author in rev.get_apparent_authors():
 
154
                # XXX: There is a chance sometimes with svn imports that the
 
155
                #      full name and email can BOTH be blank.
 
156
                username, email = config.parse_username(author)
 
157
                email_users.setdefault(email, set()).add(username)
 
158
                combo = (username, email)
 
159
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
160
    return ((rev for (revid, rev) in revisions),
 
161
            collapse_email_and_users(email_users, combo_count))
 
162
 
 
163
 
 
164
def get_info(a_repo, revision):
 
165
    """Get all of the information for a particular revision"""
 
166
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
167
        trace.note('getting ancestry')
 
168
        graph = a_repo.get_graph()
 
169
        ancestry = [
 
170
            r for (r, ps) in graph.iter_ancestry([revision])
 
171
            if ps is not None and r != NULL_REVISION]
 
172
        revs, canonical_committer = get_revisions_and_committers(
 
173
            a_repo, ancestry)
 
174
 
 
175
    return collapse_by_person(revs, canonical_committer)
 
176
 
 
177
 
 
178
def get_diff_info(a_repo, start_rev, end_rev):
 
179
    """Get only the info for new revisions between the two revisions
 
180
 
 
181
    This lets us figure out what has actually changed between 2 revisions.
 
182
    """
 
183
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
184
        graph = a_repo.get_graph()
 
185
        trace.note('getting ancestry diff')
 
186
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
187
        revs, canonical_committer = get_revisions_and_committers(
 
188
            a_repo, ancestry)
 
189
 
 
190
    return collapse_by_person(revs, canonical_committer)
 
191
 
 
192
 
 
193
def display_info(info, to_file, gather_class_stats=None):
 
194
    """Write out the information"""
 
195
 
 
196
    for count, revs, emails, fullnames in info:
 
197
        # Get the most common email name
 
198
        sorted_emails = sorted(((count, email)
 
199
                                for email, count in emails.items()),
 
200
                               reverse=True)
 
201
        sorted_fullnames = sorted(((count, fullname)
 
202
                                   for fullname, count in fullnames.items()),
 
203
                                  reverse=True)
 
204
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
205
            to_file.write('%4d %s\n'
 
206
                          % (count, 'Unknown'))
 
207
        else:
 
208
            to_file.write('%4d %s <%s>\n'
 
209
                          % (count, sorted_fullnames[0][1],
 
210
                             sorted_emails[0][1]))
 
211
        if len(sorted_fullnames) > 1:
 
212
            to_file.write('     Other names:\n')
 
213
            for count, fname in sorted_fullnames:
 
214
                to_file.write('     %4d ' % (count,))
 
215
                if fname == '':
 
216
                    to_file.write("''\n")
 
217
                else:
 
218
                    to_file.write("%s\n" % (fname,))
 
219
        if len(sorted_emails) > 1:
 
220
            to_file.write('     Other email addresses:\n')
 
221
            for count, email in sorted_emails:
 
222
                to_file.write('     %4d ' % (count,))
 
223
                if email == '':
 
224
                    to_file.write("''\n")
 
225
                else:
 
226
                    to_file.write("%s\n" % (email,))
 
227
        if gather_class_stats is not None:
 
228
            to_file.write('     Contributions:\n')
 
229
            classes, total = gather_class_stats(revs)
 
230
            for name, count in sorted(classes.items(), key=classify_key):
 
231
                if name is None:
 
232
                    name = "Unknown"
 
233
                to_file.write("     %4.0f%% %s\n" %
 
234
                              ((float(count) / total) * 100.0, name))
 
235
 
 
236
 
 
237
class cmd_committer_statistics(commands.Command):
 
238
    """Generate statistics for LOCATION."""
 
239
 
 
240
    aliases = ['stats', 'committer-stats']
 
241
    takes_args = ['location?']
 
242
    takes_options = ['revision',
 
243
                     option.Option('show-class', help="Show the class of contributions.")]
 
244
 
 
245
    encoding_type = 'replace'
 
246
 
 
247
    def run(self, location='.', revision=None, show_class=False):
 
248
        alternate_rev = None
 
249
        try:
 
250
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
251
        except errors.NoWorkingTree:
 
252
            a_branch = branch.Branch.open(location)
 
253
            last_rev = a_branch.last_revision()
 
254
        else:
 
255
            a_branch = wt.branch
 
256
            last_rev = wt.last_revision()
 
257
 
 
258
        if revision is not None:
 
259
            last_rev = revision[0].in_history(a_branch).rev_id
 
260
            if len(revision) > 1:
 
261
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
262
 
 
263
        with a_branch.lock_read():
 
264
            if alternate_rev:
 
265
                info = get_diff_info(a_branch.repository, last_rev,
 
266
                                     alternate_rev)
 
267
            else:
 
268
                info = get_info(a_branch.repository, last_rev)
 
269
        if show_class:
 
270
            def fetch_class_stats(revs):
 
271
                return gather_class_stats(a_branch.repository, revs)
 
272
        else:
 
273
            fetch_class_stats = None
 
274
        display_info(info, self.outf, fetch_class_stats)
 
275
 
 
276
 
 
277
class cmd_ancestor_growth(commands.Command):
 
278
    """Figure out the ancestor graph for LOCATION"""
 
279
 
 
280
    takes_args = ['location?']
 
281
 
 
282
    encoding_type = 'replace'
 
283
 
 
284
    hidden = True
 
285
 
 
286
    def run(self, location='.'):
 
287
        try:
 
288
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
289
        except errors.NoWorkingTree:
 
290
            a_branch = branch.Branch.open(location)
 
291
            last_rev = a_branch.last_revision()
 
292
        else:
 
293
            a_branch = wt.branch
 
294
            last_rev = wt.last_revision()
 
295
 
 
296
        with a_branch.lock_read():
 
297
            graph = a_branch.repository.get_graph()
 
298
            revno = 0
 
299
            cur_parents = 0
 
300
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
301
                                            last_rev)
 
302
            for num, node_name, depth, isend in reversed(sorted_graph):
 
303
                cur_parents += 1
 
304
                if depth == 0:
 
305
                    revno += 1
 
306
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
307
 
 
308
 
 
309
def gather_class_stats(repository, revs):
 
310
    ret = {}
 
311
    total = 0
 
312
    with ui.ui_factory.nested_progress_bar() as pb:
 
313
        with repository.lock_read():
 
314
            i = 0
 
315
            for delta in repository.get_deltas_for_revisions(revs):
 
316
                pb.update("classifying commits", i, len(revs))
 
317
                for c in classify_delta(delta):
 
318
                    if c not in ret:
 
319
                        ret[c] = 0
 
320
                    ret[c] += 1
 
321
                    total += 1
 
322
                i += 1
 
323
    return ret, total
 
324
 
 
325
 
 
326
def classify_key(item):
 
327
    """Sort key for item of (author, count) from classify_delta."""
 
328
    return -item[1], item[0]
 
329
 
 
330
 
 
331
def display_credits(credits, to_file):
 
332
    (coders, documenters, artists, translators) = credits
 
333
 
 
334
    def print_section(name, lst):
 
335
        if len(lst) == 0:
 
336
            return
 
337
        to_file.write("%s:\n" % name)
 
338
        for name in lst:
 
339
            to_file.write("%s\n" % name)
 
340
        to_file.write('\n')
 
341
    print_section("Code", coders)
 
342
    print_section("Documentation", documenters)
 
343
    print_section("Art", artists)
 
344
    print_section("Translations", translators)
 
345
 
 
346
 
 
347
def find_credits(repository, revid):
 
348
    """Find the credits of the contributors to a revision.
 
349
 
 
350
    :return: tuple with (authors, documenters, artists, translators)
 
351
    """
 
352
    ret = {"documentation": {},
 
353
           "code": {},
 
354
           "art": {},
 
355
           "translation": {},
 
356
           None: {}
 
357
           }
 
358
    with repository.lock_read():
 
359
        graph = repository.get_graph()
 
360
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
361
                    if ps is not None and r != NULL_REVISION]
 
362
        revs = repository.get_revisions(ancestry)
 
363
        with ui.ui_factory.nested_progress_bar() as pb:
 
364
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
 
365
            for i, (rev, delta) in enumerate(iterator):
 
366
                pb.update("analysing revisions", i, len(revs))
 
367
                # Don't count merges
 
368
                if len(rev.parent_ids) > 1:
 
369
                    continue
 
370
                for c in set(classify_delta(delta)):
 
371
                    for author in rev.get_apparent_authors():
 
372
                        if author not in ret[c]:
 
373
                            ret[c][author] = 0
 
374
                        ret[c][author] += 1
 
375
 
 
376
    def sort_class(name):
 
377
        return [author
 
378
                for author, _ in sorted(ret[name].items(), key=classify_key)]
 
379
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
380
 
 
381
 
 
382
class cmd_credits(commands.Command):
 
383
    """Determine credits for LOCATION."""
 
384
 
 
385
    takes_args = ['location?']
 
386
    takes_options = ['revision']
 
387
 
 
388
    encoding_type = 'replace'
 
389
 
 
390
    def run(self, location='.', revision=None):
 
391
        try:
 
392
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
393
        except errors.NoWorkingTree:
 
394
            a_branch = branch.Branch.open(location)
 
395
            last_rev = a_branch.last_revision()
 
396
        else:
 
397
            a_branch = wt.branch
 
398
            last_rev = wt.last_revision()
 
399
 
 
400
        if revision is not None:
 
401
            last_rev = revision[0].in_history(a_branch).rev_id
 
402
 
 
403
        with a_branch.lock_read():
 
404
            credits = find_credits(a_branch.repository, last_rev)
 
405
            display_credits(credits, self.outf)