/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Breezy landing bot
  • Author(s): Colin Watson
  • Date: 2020-11-16 21:47:08 UTC
  • mfrom: (7521.1.1 remove-lp-workaround)
  • Revision ID: breezy.the.bot@gmail.com-20201116214708-jos209mgxi41oy15
Remove breezy.git workaround for bazaar.launchpad.net.

Merged from https://code.launchpad.net/~cjwatson/brz/remove-lp-workaround/+merge/393710

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
import operator
 
19
 
 
20
from ... import (
 
21
    branch,
 
22
    commands,
 
23
    config,
 
24
    errors,
 
25
    option,
 
26
    trace,
 
27
    tsort,
 
28
    ui,
 
29
    workingtree,
 
30
    )
 
31
from ...revision import NULL_REVISION
 
32
from .classify import classify_delta
 
33
 
 
34
 
 
35
def collapse_by_person(revisions, canonical_committer):
 
36
    """The committers list is sorted by email, fix it up by person.
 
37
 
 
38
    Some people commit with a similar username, but different email
 
39
    address. Which makes it hard to sort out when they have multiple
 
40
    entries. Email is actually more stable, though, since people
 
41
    frequently forget to set their name properly.
 
42
 
 
43
    So take the most common username for each email address, and
 
44
    combine them into one new list.
 
45
    """
 
46
    # Map from canonical committer to
 
47
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
48
    committer_to_info = {}
 
49
    for rev in revisions:
 
50
        authors = rev.get_apparent_authors()
 
51
        for author in authors:
 
52
            username, email = config.parse_username(author)
 
53
            if len(username) == 0 and len(email) == 0:
 
54
                continue
 
55
            canon_author = canonical_committer[(username, email)]
 
56
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
57
            info[0].append(rev)
 
58
            info[1][email] = info[1].setdefault(email, 0) + 1
 
59
            info[2][username] = info[2].setdefault(username, 0) + 1
 
60
    res = [(len(revs), revs, emails, fnames)
 
61
           for revs, emails, fnames in committer_to_info.values()]
 
62
 
 
63
    def key_fn(item):
 
64
        return item[0], list(item[2].keys())
 
65
    res.sort(reverse=True, key=key_fn)
 
66
    return res
 
67
 
 
68
 
 
69
def collapse_email_and_users(email_users, combo_count):
 
70
    """Combine the mapping of User Name to email and email to User Name.
 
71
 
 
72
    If a given User Name is used for multiple emails, try to map it all to one
 
73
    entry.
 
74
    """
 
75
    id_to_combos = {}
 
76
    username_to_id = {}
 
77
    email_to_id = {}
 
78
    id_counter = 0
 
79
 
 
80
    def collapse_ids(old_id, new_id, new_combos):
 
81
        old_combos = id_to_combos.pop(old_id)
 
82
        new_combos.update(old_combos)
 
83
        for old_user, old_email in old_combos:
 
84
            if (old_user and old_user != user):
 
85
                low_old_user = old_user.lower()
 
86
                old_user_id = username_to_id[low_old_user]
 
87
                assert old_user_id in (old_id, new_id)
 
88
                username_to_id[low_old_user] = new_id
 
89
            if (old_email and old_email != email):
 
90
                old_email_id = email_to_id[old_email]
 
91
                assert old_email_id in (old_id, new_id)
 
92
                email_to_id[old_email] = cur_id
 
93
    for email, usernames in email_users.items():
 
94
        assert email not in email_to_id
 
95
        if not email:
 
96
            # We use a different algorithm for usernames that have no email
 
97
            # address, we just try to match by username, and not at all by
 
98
            # email
 
99
            for user in usernames:
 
100
                if not user:
 
101
                    continue  # The mysterious ('', '') user
 
102
                # When mapping, use case-insensitive names
 
103
                low_user = user.lower()
 
104
                user_id = username_to_id.get(low_user)
 
105
                if user_id is None:
 
106
                    id_counter += 1
 
107
                    user_id = id_counter
 
108
                    username_to_id[low_user] = user_id
 
109
                    id_to_combos[user_id] = id_combos = set()
 
110
                else:
 
111
                    id_combos = id_to_combos[user_id]
 
112
                id_combos.add((user, email))
 
113
            continue
 
114
 
 
115
        id_counter += 1
 
116
        cur_id = id_counter
 
117
        id_to_combos[cur_id] = id_combos = set()
 
118
        email_to_id[email] = cur_id
 
119
 
 
120
        for user in usernames:
 
121
            combo = (user, email)
 
122
            id_combos.add(combo)
 
123
            if not user:
 
124
                # We don't match on empty usernames
 
125
                continue
 
126
            low_user = user.lower()
 
127
            user_id = username_to_id.get(low_user)
 
128
            if user_id is not None:
 
129
                # This UserName was matched to an cur_id
 
130
                if user_id != cur_id:
 
131
                    # And it is a different identity than the current email
 
132
                    collapse_ids(user_id, cur_id, id_combos)
 
133
            username_to_id[low_user] = cur_id
 
134
    combo_to_best_combo = {}
 
135
    for cur_id, combos in id_to_combos.items():
 
136
        best_combo = sorted(combos,
 
137
                            key=lambda x: combo_count[x],
 
138
                            reverse=True)[0]
 
139
        for combo in combos:
 
140
            combo_to_best_combo[combo] = best_combo
 
141
    return combo_to_best_combo
 
142
 
 
143
 
 
144
def get_revisions_and_committers(a_repo, revids):
 
145
    """Get the Revision information, and the best-match for committer."""
 
146
 
 
147
    email_users = {}  # user@email.com => User Name
 
148
    combo_count = {}
 
149
    with ui.ui_factory.nested_progress_bar() as pb:
 
150
        trace.note('getting revisions')
 
151
        revisions = list(a_repo.iter_revisions(revids))
 
152
        for count, (revid, rev) in enumerate(revisions):
 
153
            pb.update('checking', count, len(revids))
 
154
            for author in rev.get_apparent_authors():
 
155
                # XXX: There is a chance sometimes with svn imports that the
 
156
                #      full name and email can BOTH be blank.
 
157
                username, email = config.parse_username(author)
 
158
                email_users.setdefault(email, set()).add(username)
 
159
                combo = (username, email)
 
160
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
161
    return ((rev for (revid, rev) in revisions),
 
162
            collapse_email_and_users(email_users, combo_count))
 
163
 
 
164
 
 
165
def get_info(a_repo, revision):
 
166
    """Get all of the information for a particular revision"""
 
167
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
168
        trace.note('getting ancestry')
 
169
        graph = a_repo.get_graph()
 
170
        ancestry = [
 
171
            r for (r, ps) in graph.iter_ancestry([revision])
 
172
            if ps is not None and r != NULL_REVISION]
 
173
        revs, canonical_committer = get_revisions_and_committers(
 
174
            a_repo, ancestry)
 
175
 
 
176
    return collapse_by_person(revs, canonical_committer)
 
177
 
 
178
 
 
179
def get_diff_info(a_repo, start_rev, end_rev):
 
180
    """Get only the info for new revisions between the two revisions
 
181
 
 
182
    This lets us figure out what has actually changed between 2 revisions.
 
183
    """
 
184
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
185
        graph = a_repo.get_graph()
 
186
        trace.note('getting ancestry diff')
 
187
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
188
        revs, canonical_committer = get_revisions_and_committers(
 
189
            a_repo, ancestry)
 
190
 
 
191
    return collapse_by_person(revs, canonical_committer)
 
192
 
 
193
 
 
194
def display_info(info, to_file, gather_class_stats=None):
 
195
    """Write out the information"""
 
196
 
 
197
    for count, revs, emails, fullnames in info:
 
198
        # Get the most common email name
 
199
        sorted_emails = sorted(((count, email)
 
200
                                for email, count in emails.items()),
 
201
                               reverse=True)
 
202
        sorted_fullnames = sorted(((count, fullname)
 
203
                                   for fullname, count in fullnames.items()),
 
204
                                  reverse=True)
 
205
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
206
            to_file.write('%4d %s\n'
 
207
                          % (count, 'Unknown'))
 
208
        else:
 
209
            to_file.write('%4d %s <%s>\n'
 
210
                          % (count, sorted_fullnames[0][1],
 
211
                             sorted_emails[0][1]))
 
212
        if len(sorted_fullnames) > 1:
 
213
            to_file.write('     Other names:\n')
 
214
            for count, fname in sorted_fullnames:
 
215
                to_file.write('     %4d ' % (count,))
 
216
                if fname == '':
 
217
                    to_file.write("''\n")
 
218
                else:
 
219
                    to_file.write("%s\n" % (fname,))
 
220
        if len(sorted_emails) > 1:
 
221
            to_file.write('     Other email addresses:\n')
 
222
            for count, email in sorted_emails:
 
223
                to_file.write('     %4d ' % (count,))
 
224
                if email == '':
 
225
                    to_file.write("''\n")
 
226
                else:
 
227
                    to_file.write("%s\n" % (email,))
 
228
        if gather_class_stats is not None:
 
229
            to_file.write('     Contributions:\n')
 
230
            classes, total = gather_class_stats(revs)
 
231
            for name, count in sorted(classes.items(), key=classify_key):
 
232
                if name is None:
 
233
                    name = "Unknown"
 
234
                to_file.write("     %4.0f%% %s\n" %
 
235
                              ((float(count) / total) * 100.0, name))
 
236
 
 
237
 
 
238
class cmd_committer_statistics(commands.Command):
 
239
    """Generate statistics for LOCATION."""
 
240
 
 
241
    aliases = ['stats', 'committer-stats']
 
242
    takes_args = ['location?']
 
243
    takes_options = ['revision',
 
244
                     option.Option('show-class', help="Show the class of contributions.")]
 
245
 
 
246
    encoding_type = 'replace'
 
247
 
 
248
    def run(self, location='.', revision=None, show_class=False):
 
249
        alternate_rev = None
 
250
        try:
 
251
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
252
        except errors.NoWorkingTree:
 
253
            a_branch = branch.Branch.open(location)
 
254
            last_rev = a_branch.last_revision()
 
255
        else:
 
256
            a_branch = wt.branch
 
257
            last_rev = wt.last_revision()
 
258
 
 
259
        if revision is not None:
 
260
            last_rev = revision[0].in_history(a_branch).rev_id
 
261
            if len(revision) > 1:
 
262
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
263
 
 
264
        with a_branch.lock_read():
 
265
            if alternate_rev:
 
266
                info = get_diff_info(a_branch.repository, last_rev,
 
267
                                     alternate_rev)
 
268
            else:
 
269
                info = get_info(a_branch.repository, last_rev)
 
270
        if show_class:
 
271
            def fetch_class_stats(revs):
 
272
                return gather_class_stats(a_branch.repository, revs)
 
273
        else:
 
274
            fetch_class_stats = None
 
275
        display_info(info, self.outf, fetch_class_stats)
 
276
 
 
277
 
 
278
class cmd_ancestor_growth(commands.Command):
 
279
    """Figure out the ancestor graph for LOCATION"""
 
280
 
 
281
    takes_args = ['location?']
 
282
 
 
283
    encoding_type = 'replace'
 
284
 
 
285
    hidden = True
 
286
 
 
287
    def run(self, location='.'):
 
288
        try:
 
289
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
290
        except errors.NoWorkingTree:
 
291
            a_branch = branch.Branch.open(location)
 
292
            last_rev = a_branch.last_revision()
 
293
        else:
 
294
            a_branch = wt.branch
 
295
            last_rev = wt.last_revision()
 
296
 
 
297
        with a_branch.lock_read():
 
298
            graph = a_branch.repository.get_graph()
 
299
            revno = 0
 
300
            cur_parents = 0
 
301
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
302
                                            last_rev)
 
303
            for num, node_name, depth, isend in reversed(sorted_graph):
 
304
                cur_parents += 1
 
305
                if depth == 0:
 
306
                    revno += 1
 
307
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
308
 
 
309
 
 
310
def gather_class_stats(repository, revs):
 
311
    ret = {}
 
312
    total = 0
 
313
    with ui.ui_factory.nested_progress_bar() as pb:
 
314
        with repository.lock_read():
 
315
            i = 0
 
316
            for delta in repository.get_revision_deltas(revs):
 
317
                pb.update("classifying commits", i, len(revs))
 
318
                for c in classify_delta(delta):
 
319
                    if c not in ret:
 
320
                        ret[c] = 0
 
321
                    ret[c] += 1
 
322
                    total += 1
 
323
                i += 1
 
324
    return ret, total
 
325
 
 
326
 
 
327
def classify_key(item):
 
328
    """Sort key for item of (author, count) from classify_delta."""
 
329
    return -item[1], item[0]
 
330
 
 
331
 
 
332
def display_credits(credits, to_file):
 
333
    (coders, documenters, artists, translators) = credits
 
334
 
 
335
    def print_section(name, lst):
 
336
        if len(lst) == 0:
 
337
            return
 
338
        to_file.write("%s:\n" % name)
 
339
        for name in lst:
 
340
            to_file.write("%s\n" % name)
 
341
        to_file.write('\n')
 
342
    print_section("Code", coders)
 
343
    print_section("Documentation", documenters)
 
344
    print_section("Art", artists)
 
345
    print_section("Translations", translators)
 
346
 
 
347
 
 
348
def find_credits(repository, revid):
 
349
    """Find the credits of the contributors to a revision.
 
350
 
 
351
    :return: tuple with (authors, documenters, artists, translators)
 
352
    """
 
353
    ret = {"documentation": {},
 
354
           "code": {},
 
355
           "art": {},
 
356
           "translation": {},
 
357
           None: {}
 
358
           }
 
359
    with repository.lock_read():
 
360
        graph = repository.get_graph()
 
361
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
362
                    if ps is not None and r != NULL_REVISION]
 
363
        revs = repository.get_revisions(ancestry)
 
364
        with ui.ui_factory.nested_progress_bar() as pb:
 
365
            iterator = zip(revs, repository.get_revision_deltas(revs))
 
366
            for i, (rev, delta) in enumerate(iterator):
 
367
                pb.update("analysing revisions", i, len(revs))
 
368
                # Don't count merges
 
369
                if len(rev.parent_ids) > 1:
 
370
                    continue
 
371
                for c in set(classify_delta(delta)):
 
372
                    for author in rev.get_apparent_authors():
 
373
                        if author not in ret[c]:
 
374
                            ret[c][author] = 0
 
375
                        ret[c][author] += 1
 
376
 
 
377
    def sort_class(name):
 
378
        return [author
 
379
                for author, _ in sorted(ret[name].items(), key=classify_key)]
 
380
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
381
 
 
382
 
 
383
class cmd_credits(commands.Command):
 
384
    """Determine credits for LOCATION."""
 
385
 
 
386
    takes_args = ['location?']
 
387
    takes_options = ['revision']
 
388
 
 
389
    encoding_type = 'replace'
 
390
 
 
391
    def run(self, location='.', revision=None):
 
392
        try:
 
393
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
394
        except errors.NoWorkingTree:
 
395
            a_branch = branch.Branch.open(location)
 
396
            last_rev = a_branch.last_revision()
 
397
        else:
 
398
            a_branch = wt.branch
 
399
            last_rev = wt.last_revision()
 
400
 
 
401
        if revision is not None:
 
402
            last_rev = revision[0].in_history(a_branch).rev_id
 
403
 
 
404
        with a_branch.lock_read():
 
405
            credits = find_credits(a_branch.repository, last_rev)
 
406
            display_credits(credits, self.outf)