/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2018-11-16 23:19:12 UTC
  • mfrom: (7180 work)
  • mto: This revision was merged to the branch mainline in revision 7294.
  • Revision ID: jelmer@jelmer.uk-20181116231912-e043vpq22bdkxa6q
Merge trunk.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
from ... import (
 
21
    branch,
 
22
    commands,
 
23
    config,
 
24
    errors,
 
25
    option,
 
26
    trace,
 
27
    tsort,
 
28
    ui,
 
29
    workingtree,
 
30
    )
 
31
from ...revision import NULL_REVISION
 
32
from .classify import classify_delta
 
33
 
 
34
 
 
35
def collapse_by_person(revisions, canonical_committer):
 
36
    """The committers list is sorted by email, fix it up by person.
 
37
 
 
38
    Some people commit with a similar username, but different email
 
39
    address. Which makes it hard to sort out when they have multiple
 
40
    entries. Email is actually more stable, though, since people
 
41
    frequently forget to set their name properly.
 
42
 
 
43
    So take the most common username for each email address, and
 
44
    combine them into one new list.
 
45
    """
 
46
    # Map from canonical committer to
 
47
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
48
    committer_to_info = {}
 
49
    for rev in revisions:
 
50
        authors = rev.get_apparent_authors()
 
51
        for author in authors:
 
52
            username, email = config.parse_username(author)
 
53
            if len(username) == 0 and len(email) == 0:
 
54
                continue
 
55
            canon_author = canonical_committer[(username, email)]
 
56
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
57
            info[0].append(rev)
 
58
            info[1][email] = info[1].setdefault(email, 0) + 1
 
59
            info[2][username] = info[2].setdefault(username, 0) + 1
 
60
    res = [(len(revs), revs, emails, fnames)
 
61
           for revs, emails, fnames in committer_to_info.values()]
 
62
    res.sort(reverse=True)
 
63
    return res
 
64
 
 
65
 
 
66
def collapse_email_and_users(email_users, combo_count):
 
67
    """Combine the mapping of User Name to email and email to User Name.
 
68
 
 
69
    If a given User Name is used for multiple emails, try to map it all to one
 
70
    entry.
 
71
    """
 
72
    id_to_combos = {}
 
73
    username_to_id = {}
 
74
    email_to_id = {}
 
75
    id_counter = 0
 
76
 
 
77
    def collapse_ids(old_id, new_id, new_combos):
 
78
        old_combos = id_to_combos.pop(old_id)
 
79
        new_combos.update(old_combos)
 
80
        for old_user, old_email in old_combos:
 
81
            if (old_user and old_user != user):
 
82
                low_old_user = old_user.lower()
 
83
                old_user_id = username_to_id[low_old_user]
 
84
                assert old_user_id in (old_id, new_id)
 
85
                username_to_id[low_old_user] = new_id
 
86
            if (old_email and old_email != email):
 
87
                old_email_id = email_to_id[old_email]
 
88
                assert old_email_id in (old_id, new_id)
 
89
                email_to_id[old_email] = cur_id
 
90
    for email, usernames in email_users.items():
 
91
        assert email not in email_to_id
 
92
        if not email:
 
93
            # We use a different algorithm for usernames that have no email
 
94
            # address, we just try to match by username, and not at all by
 
95
            # email
 
96
            for user in usernames:
 
97
                if not user:
 
98
                    continue  # The mysterious ('', '') user
 
99
                # When mapping, use case-insensitive names
 
100
                low_user = user.lower()
 
101
                user_id = username_to_id.get(low_user)
 
102
                if user_id is None:
 
103
                    id_counter += 1
 
104
                    user_id = id_counter
 
105
                    username_to_id[low_user] = user_id
 
106
                    id_to_combos[user_id] = id_combos = set()
 
107
                else:
 
108
                    id_combos = id_to_combos[user_id]
 
109
                id_combos.add((user, email))
 
110
            continue
 
111
 
 
112
        id_counter += 1
 
113
        cur_id = id_counter
 
114
        id_to_combos[cur_id] = id_combos = set()
 
115
        email_to_id[email] = cur_id
 
116
 
 
117
        for user in usernames:
 
118
            combo = (user, email)
 
119
            id_combos.add(combo)
 
120
            if not user:
 
121
                # We don't match on empty usernames
 
122
                continue
 
123
            low_user = user.lower()
 
124
            user_id = username_to_id.get(low_user)
 
125
            if user_id is not None:
 
126
                # This UserName was matched to an cur_id
 
127
                if user_id != cur_id:
 
128
                    # And it is a different identity than the current email
 
129
                    collapse_ids(user_id, cur_id, id_combos)
 
130
            username_to_id[low_user] = cur_id
 
131
    combo_to_best_combo = {}
 
132
    for cur_id, combos in id_to_combos.items():
 
133
        best_combo = sorted(combos,
 
134
                            key=lambda x: combo_count[x],
 
135
                            reverse=True)[0]
 
136
        for combo in combos:
 
137
            combo_to_best_combo[combo] = best_combo
 
138
    return combo_to_best_combo
 
139
 
 
140
 
 
141
def get_revisions_and_committers(a_repo, revids):
 
142
    """Get the Revision information, and the best-match for committer."""
 
143
 
 
144
    email_users = {}  # user@email.com => User Name
 
145
    combo_count = {}
 
146
    with ui.ui_factory.nested_progress_bar() as pb:
 
147
        trace.note('getting revisions')
 
148
        revisions = list(a_repo.iter_revisions(revids))
 
149
        for count, (revid, rev) in enumerate(revisions):
 
150
            pb.update('checking', count, len(revids))
 
151
            for author in rev.get_apparent_authors():
 
152
                # XXX: There is a chance sometimes with svn imports that the
 
153
                #      full name and email can BOTH be blank.
 
154
                username, email = config.parse_username(author)
 
155
                email_users.setdefault(email, set()).add(username)
 
156
                combo = (username, email)
 
157
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
158
    return ((rev for (revid, rev) in revisions),
 
159
            collapse_email_and_users(email_users, combo_count))
 
160
 
 
161
 
 
162
def get_info(a_repo, revision):
 
163
    """Get all of the information for a particular revision"""
 
164
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
165
        trace.note('getting ancestry')
 
166
        graph = a_repo.get_graph()
 
167
        ancestry = [
 
168
            r for (r, ps) in graph.iter_ancestry([revision])
 
169
            if ps is not None and r != NULL_REVISION]
 
170
        revs, canonical_committer = get_revisions_and_committers(
 
171
            a_repo, ancestry)
 
172
 
 
173
    return collapse_by_person(revs, canonical_committer)
 
174
 
 
175
 
 
176
def get_diff_info(a_repo, start_rev, end_rev):
 
177
    """Get only the info for new revisions between the two revisions
 
178
 
 
179
    This lets us figure out what has actually changed between 2 revisions.
 
180
    """
 
181
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
182
        graph = a_repo.get_graph()
 
183
        trace.note('getting ancestry diff')
 
184
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
185
        revs, canonical_committer = get_revisions_and_committers(
 
186
            a_repo, ancestry)
 
187
 
 
188
    return collapse_by_person(revs, canonical_committer)
 
189
 
 
190
 
 
191
def display_info(info, to_file, gather_class_stats=None):
 
192
    """Write out the information"""
 
193
 
 
194
    for count, revs, emails, fullnames in info:
 
195
        # Get the most common email name
 
196
        sorted_emails = sorted(((count, email)
 
197
                                for email, count in emails.items()),
 
198
                               reverse=True)
 
199
        sorted_fullnames = sorted(((count, fullname)
 
200
                                   for fullname, count in fullnames.items()),
 
201
                                  reverse=True)
 
202
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
203
            to_file.write('%4d %s\n'
 
204
                          % (count, 'Unknown'))
 
205
        else:
 
206
            to_file.write('%4d %s <%s>\n'
 
207
                          % (count, sorted_fullnames[0][1],
 
208
                             sorted_emails[0][1]))
 
209
        if len(sorted_fullnames) > 1:
 
210
            to_file.write('     Other names:\n')
 
211
            for count, fname in sorted_fullnames:
 
212
                to_file.write('     %4d ' % (count,))
 
213
                if fname == '':
 
214
                    to_file.write("''\n")
 
215
                else:
 
216
                    to_file.write("%s\n" % (fname,))
 
217
        if len(sorted_emails) > 1:
 
218
            to_file.write('     Other email addresses:\n')
 
219
            for count, email in sorted_emails:
 
220
                to_file.write('     %4d ' % (count,))
 
221
                if email == '':
 
222
                    to_file.write("''\n")
 
223
                else:
 
224
                    to_file.write("%s\n" % (email,))
 
225
        if gather_class_stats is not None:
 
226
            to_file.write('     Contributions:\n')
 
227
            classes, total = gather_class_stats(revs)
 
228
            for name, count in sorted(classes.items(), key=classify_key):
 
229
                if name is None:
 
230
                    name = "Unknown"
 
231
                to_file.write("     %4.0f%% %s\n" %
 
232
                              ((float(count) / total) * 100.0, name))
 
233
 
 
234
 
 
235
class cmd_committer_statistics(commands.Command):
 
236
    """Generate statistics for LOCATION."""
 
237
 
 
238
    aliases = ['stats', 'committer-stats']
 
239
    takes_args = ['location?']
 
240
    takes_options = ['revision',
 
241
                     option.Option('show-class', help="Show the class of contributions.")]
 
242
 
 
243
    encoding_type = 'replace'
 
244
 
 
245
    def run(self, location='.', revision=None, show_class=False):
 
246
        alternate_rev = None
 
247
        try:
 
248
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
249
        except errors.NoWorkingTree:
 
250
            a_branch = branch.Branch.open(location)
 
251
            last_rev = a_branch.last_revision()
 
252
        else:
 
253
            a_branch = wt.branch
 
254
            last_rev = wt.last_revision()
 
255
 
 
256
        if revision is not None:
 
257
            last_rev = revision[0].in_history(a_branch).rev_id
 
258
            if len(revision) > 1:
 
259
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
260
 
 
261
        with a_branch.lock_read():
 
262
            if alternate_rev:
 
263
                info = get_diff_info(a_branch.repository, last_rev,
 
264
                                     alternate_rev)
 
265
            else:
 
266
                info = get_info(a_branch.repository, last_rev)
 
267
        if show_class:
 
268
            def fetch_class_stats(revs):
 
269
                return gather_class_stats(a_branch.repository, revs)
 
270
        else:
 
271
            fetch_class_stats = None
 
272
        display_info(info, self.outf, fetch_class_stats)
 
273
 
 
274
 
 
275
class cmd_ancestor_growth(commands.Command):
 
276
    """Figure out the ancestor graph for LOCATION"""
 
277
 
 
278
    takes_args = ['location?']
 
279
 
 
280
    encoding_type = 'replace'
 
281
 
 
282
    def run(self, location='.'):
 
283
        try:
 
284
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
285
        except errors.NoWorkingTree:
 
286
            a_branch = branch.Branch.open(location)
 
287
            last_rev = a_branch.last_revision()
 
288
        else:
 
289
            a_branch = wt.branch
 
290
            last_rev = wt.last_revision()
 
291
 
 
292
        with a_branch.lock_read():
 
293
            graph = a_branch.repository.get_graph()
 
294
            revno = 0
 
295
            cur_parents = 0
 
296
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
297
                                            last_rev)
 
298
            for num, node_name, depth, isend in reversed(sorted_graph):
 
299
                cur_parents += 1
 
300
                if depth == 0:
 
301
                    revno += 1
 
302
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
303
 
 
304
 
 
305
def gather_class_stats(repository, revs):
 
306
    ret = {}
 
307
    total = 0
 
308
    with ui.ui_factory.nested_progress_bar() as pb:
 
309
        with repository.lock_read():
 
310
            i = 0
 
311
            for delta in repository.get_deltas_for_revisions(revs):
 
312
                pb.update("classifying commits", i, len(revs))
 
313
                for c in classify_delta(delta):
 
314
                    if not c in ret:
 
315
                        ret[c] = 0
 
316
                    ret[c] += 1
 
317
                    total += 1
 
318
                i += 1
 
319
    return ret, total
 
320
 
 
321
 
 
322
def classify_key(item):
 
323
    """Sort key for item of (author, count) from classify_delta."""
 
324
    return -item[1], item[0]
 
325
 
 
326
 
 
327
def display_credits(credits, to_file):
 
328
    (coders, documenters, artists, translators) = credits
 
329
 
 
330
    def print_section(name, lst):
 
331
        if len(lst) == 0:
 
332
            return
 
333
        to_file.write("%s:\n" % name)
 
334
        for name in lst:
 
335
            to_file.write("%s\n" % name)
 
336
        to_file.write('\n')
 
337
    print_section("Code", coders)
 
338
    print_section("Documentation", documenters)
 
339
    print_section("Art", artists)
 
340
    print_section("Translations", translators)
 
341
 
 
342
 
 
343
def find_credits(repository, revid):
 
344
    """Find the credits of the contributors to a revision.
 
345
 
 
346
    :return: tuple with (authors, documenters, artists, translators)
 
347
    """
 
348
    ret = {"documentation": {},
 
349
           "code": {},
 
350
           "art": {},
 
351
           "translation": {},
 
352
           None: {}
 
353
           }
 
354
    with repository.lock_read():
 
355
        graph = repository.get_graph()
 
356
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
357
                    if ps is not None and r != NULL_REVISION]
 
358
        revs = repository.get_revisions(ancestry)
 
359
        with ui.ui_factory.nested_progress_bar() as pb:
 
360
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
 
361
            for i, (rev, delta) in enumerate(iterator):
 
362
                pb.update("analysing revisions", i, len(revs))
 
363
                # Don't count merges
 
364
                if len(rev.parent_ids) > 1:
 
365
                    continue
 
366
                for c in set(classify_delta(delta)):
 
367
                    for author in rev.get_apparent_authors():
 
368
                        if not author in ret[c]:
 
369
                            ret[c][author] = 0
 
370
                        ret[c][author] += 1
 
371
 
 
372
    def sort_class(name):
 
373
        return [author
 
374
                for author, _ in sorted(ret[name].items(), key=classify_key)]
 
375
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
376
 
 
377
 
 
378
class cmd_credits(commands.Command):
 
379
    """Determine credits for LOCATION."""
 
380
 
 
381
    takes_args = ['location?']
 
382
    takes_options = ['revision']
 
383
 
 
384
    encoding_type = 'replace'
 
385
 
 
386
    def run(self, location='.', revision=None):
 
387
        try:
 
388
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
389
        except errors.NoWorkingTree:
 
390
            a_branch = branch.Branch.open(location)
 
391
            last_rev = a_branch.last_revision()
 
392
        else:
 
393
            a_branch = wt.branch
 
394
            last_rev = wt.last_revision()
 
395
 
 
396
        if revision is not None:
 
397
            last_rev = revision[0].in_history(a_branch).rev_id
 
398
 
 
399
        with a_branch.lock_read():
 
400
            credits = find_credits(a_branch.repository, last_rev)
 
401
            display_credits(credits, self.outf)