/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2017-11-21 20:14:44 UTC
  • mfrom: (6821.1.1 ignore-warnings)
  • Revision ID: jelmer@jelmer.uk-20171121201444-dvb7yjku3zwjev83
Merge lp:~jelmer/brz/ignore-warnings.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
from ... import (
 
21
    branch,
 
22
    commands,
 
23
    config,
 
24
    errors,
 
25
    option,
 
26
    trace,
 
27
    tsort,
 
28
    ui,
 
29
    workingtree,
 
30
    )
 
31
from ...revision import NULL_REVISION
 
32
from .classify import classify_delta
 
33
 
 
34
 
 
35
def collapse_by_person(revisions, canonical_committer):
 
36
    """The committers list is sorted by email, fix it up by person.
 
37
 
 
38
    Some people commit with a similar username, but different email
 
39
    address. Which makes it hard to sort out when they have multiple
 
40
    entries. Email is actually more stable, though, since people
 
41
    frequently forget to set their name properly.
 
42
 
 
43
    So take the most common username for each email address, and
 
44
    combine them into one new list.
 
45
    """
 
46
    # Map from canonical committer to
 
47
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
48
    committer_to_info = {}
 
49
    for rev in revisions:
 
50
        authors = rev.get_apparent_authors()
 
51
        for author in authors:
 
52
            username, email = config.parse_username(author)
 
53
            if len(username) == 0 and len(email) == 0:
 
54
                continue
 
55
            canon_author = canonical_committer[(username, email)]
 
56
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
57
            info[0].append(rev)
 
58
            info[1][email] = info[1].setdefault(email, 0) + 1
 
59
            info[2][username] = info[2].setdefault(username, 0) + 1
 
60
    res = [(len(revs), revs, emails, fnames)
 
61
           for revs, emails, fnames in committer_to_info.values()]
 
62
    res.sort(reverse=True)
 
63
    return res
 
64
 
 
65
 
 
66
def collapse_email_and_users(email_users, combo_count):
 
67
    """Combine the mapping of User Name to email and email to User Name.
 
68
 
 
69
    If a given User Name is used for multiple emails, try to map it all to one
 
70
    entry.
 
71
    """
 
72
    id_to_combos = {}
 
73
    username_to_id = {}
 
74
    email_to_id = {}
 
75
    id_counter = 0
 
76
 
 
77
    def collapse_ids(old_id, new_id, new_combos):
 
78
        old_combos = id_to_combos.pop(old_id)
 
79
        new_combos.update(old_combos)
 
80
        for old_user, old_email in old_combos:
 
81
            if (old_user and old_user != user):
 
82
                low_old_user = old_user.lower()
 
83
                old_user_id = username_to_id[low_old_user]
 
84
                assert old_user_id in (old_id, new_id)
 
85
                username_to_id[low_old_user] = new_id
 
86
            if (old_email and old_email != email):
 
87
                old_email_id = email_to_id[old_email]
 
88
                assert old_email_id in (old_id, new_id)
 
89
                email_to_id[old_email] = cur_id
 
90
    for email, usernames in email_users.items():
 
91
        assert email not in email_to_id
 
92
        if not email:
 
93
            # We use a different algorithm for usernames that have no email
 
94
            # address, we just try to match by username, and not at all by
 
95
            # email
 
96
            for user in usernames:
 
97
                if not user:
 
98
                    continue # The mysterious ('', '') user
 
99
                # When mapping, use case-insensitive names
 
100
                low_user = user.lower()
 
101
                user_id = username_to_id.get(low_user)
 
102
                if user_id is None:
 
103
                    id_counter += 1
 
104
                    user_id = id_counter
 
105
                    username_to_id[low_user] = user_id
 
106
                    id_to_combos[user_id] = id_combos = set()
 
107
                else:
 
108
                    id_combos = id_to_combos[user_id]
 
109
                id_combos.add((user, email))
 
110
            continue
 
111
 
 
112
        id_counter += 1
 
113
        cur_id = id_counter
 
114
        id_to_combos[cur_id] = id_combos = set()
 
115
        email_to_id[email] = cur_id
 
116
 
 
117
        for user in usernames:
 
118
            combo = (user, email)
 
119
            id_combos.add(combo)
 
120
            if not user:
 
121
                # We don't match on empty usernames
 
122
                continue
 
123
            low_user = user.lower()
 
124
            user_id = username_to_id.get(low_user)
 
125
            if user_id is not None:
 
126
                # This UserName was matched to an cur_id
 
127
                if user_id != cur_id:
 
128
                    # And it is a different identity than the current email
 
129
                    collapse_ids(user_id, cur_id, id_combos)
 
130
            username_to_id[low_user] = cur_id
 
131
    combo_to_best_combo = {}
 
132
    for cur_id, combos in id_to_combos.items():
 
133
        best_combo = sorted(combos,
 
134
                            key=lambda x:combo_count[x],
 
135
                            reverse=True)[0]
 
136
        for combo in combos:
 
137
            combo_to_best_combo[combo] = best_combo
 
138
    return combo_to_best_combo
 
139
 
 
140
 
 
141
def get_revisions_and_committers(a_repo, revids):
 
142
    """Get the Revision information, and the best-match for committer."""
 
143
 
 
144
    email_users = {} # user@email.com => User Name
 
145
    combo_count = {}
 
146
    pb = ui.ui_factory.nested_progress_bar()
 
147
    try:
 
148
        trace.note('getting revisions')
 
149
        revisions = a_repo.iter_revisions(revids)
 
150
        for count, (revid, rev) in enumerate(revisions):
 
151
            pb.update('checking', count, len(revids))
 
152
            for author in rev.get_apparent_authors():
 
153
                # XXX: There is a chance sometimes with svn imports that the
 
154
                #      full name and email can BOTH be blank.
 
155
                username, email = config.parse_username(author)
 
156
                email_users.setdefault(email, set()).add(username)
 
157
                combo = (username, email)
 
158
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
159
    finally:
 
160
        pb.finished()
 
161
    return revisions, collapse_email_and_users(email_users, combo_count)
 
162
 
 
163
 
 
164
def get_info(a_repo, revision):
 
165
    """Get all of the information for a particular revision"""
 
166
    pb = ui.ui_factory.nested_progress_bar()
 
167
    a_repo.lock_read()
 
168
    try:
 
169
        trace.note('getting ancestry')
 
170
        graph = a_repo.get_graph()
 
171
        ancestry = [
 
172
            r for (r, ps) in graph.iter_ancestry([revision])
 
173
            if ps is not None and r != NULL_REVISION]
 
174
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
175
    finally:
 
176
        a_repo.unlock()
 
177
        pb.finished()
 
178
 
 
179
    return collapse_by_person(revs, canonical_committer)
 
180
 
 
181
 
 
182
def get_diff_info(a_repo, start_rev, end_rev):
 
183
    """Get only the info for new revisions between the two revisions
 
184
 
 
185
    This lets us figure out what has actually changed between 2 revisions.
 
186
    """
 
187
    pb = ui.ui_factory.nested_progress_bar()
 
188
    a_repo.lock_read()
 
189
    try:
 
190
        graph = a_repo.get_graph()
 
191
        trace.note('getting ancestry diff')
 
192
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
193
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
194
    finally:
 
195
        a_repo.unlock()
 
196
        pb.finished()
 
197
 
 
198
    return collapse_by_person(revs, canonical_committer)
 
199
 
 
200
 
 
201
def display_info(info, to_file, gather_class_stats=None):
 
202
    """Write out the information"""
 
203
 
 
204
    for count, revs, emails, fullnames in info:
 
205
        # Get the most common email name
 
206
        sorted_emails = sorted(((count, email)
 
207
                               for email, count in emails.items()),
 
208
                               reverse=True)
 
209
        sorted_fullnames = sorted(((count, fullname)
 
210
                                  for fullname, count in fullnames.items()),
 
211
                                  reverse=True)
 
212
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
213
            to_file.write('%4d %s\n'
 
214
                          % (count, 'Unknown'))
 
215
        else:
 
216
            to_file.write('%4d %s <%s>\n'
 
217
                          % (count, sorted_fullnames[0][1],
 
218
                             sorted_emails[0][1]))
 
219
        if len(sorted_fullnames) > 1:
 
220
            to_file.write('     Other names:\n')
 
221
            for count, fname in sorted_fullnames:
 
222
                to_file.write('     %4d ' % (count,))
 
223
                if fname == '':
 
224
                    to_file.write("''\n")
 
225
                else:
 
226
                    to_file.write("%s\n" % (fname,))
 
227
        if len(sorted_emails) > 1:
 
228
            to_file.write('     Other email addresses:\n')
 
229
            for count, email in sorted_emails:
 
230
                to_file.write('     %4d ' % (count,))
 
231
                if email == '':
 
232
                    to_file.write("''\n")
 
233
                else:
 
234
                    to_file.write("%s\n" % (email,))
 
235
        if gather_class_stats is not None:
 
236
            to_file.write('     Contributions:\n')
 
237
            classes, total = gather_class_stats(revs)
 
238
            for name, count in sorted(classes.items(), key=classify_key):
 
239
                if name is None:
 
240
                    name = "Unknown"
 
241
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
 
242
 
 
243
 
 
244
class cmd_committer_statistics(commands.Command):
 
245
    """Generate statistics for LOCATION."""
 
246
 
 
247
    aliases = ['stats', 'committer-stats']
 
248
    takes_args = ['location?']
 
249
    takes_options = ['revision', 
 
250
            option.Option('show-class', help="Show the class of contributions.")]
 
251
 
 
252
    encoding_type = 'replace'
 
253
 
 
254
    def run(self, location='.', revision=None, show_class=False):
 
255
        alternate_rev = None
 
256
        try:
 
257
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
258
        except errors.NoWorkingTree:
 
259
            a_branch = branch.Branch.open(location)
 
260
            last_rev = a_branch.last_revision()
 
261
        else:
 
262
            a_branch = wt.branch
 
263
            last_rev = wt.last_revision()
 
264
 
 
265
        if revision is not None:
 
266
            last_rev = revision[0].in_history(a_branch).rev_id
 
267
            if len(revision) > 1:
 
268
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
269
 
 
270
        with a_branch.lock_read():
 
271
            if alternate_rev:
 
272
                info = get_diff_info(a_branch.repository, last_rev,
 
273
                                     alternate_rev)
 
274
            else:
 
275
                info = get_info(a_branch.repository, last_rev)
 
276
        if show_class:
 
277
            def fetch_class_stats(revs):
 
278
                return gather_class_stats(a_branch.repository, revs)
 
279
        else:
 
280
            fetch_class_stats = None
 
281
        display_info(info, self.outf, fetch_class_stats)
 
282
 
 
283
 
 
284
class cmd_ancestor_growth(commands.Command):
 
285
    """Figure out the ancestor graph for LOCATION"""
 
286
 
 
287
    takes_args = ['location?']
 
288
 
 
289
    encoding_type = 'replace'
 
290
 
 
291
    def run(self, location='.'):
 
292
        try:
 
293
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
294
        except errors.NoWorkingTree:
 
295
            a_branch = branch.Branch.open(location)
 
296
            last_rev = a_branch.last_revision()
 
297
        else:
 
298
            a_branch = wt.branch
 
299
            last_rev = wt.last_revision()
 
300
 
 
301
        with a_branch.lock_read():
 
302
            graph = a_branch.repository.get_graph()
 
303
            revno = 0
 
304
            cur_parents = 0
 
305
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
306
                                            last_rev)
 
307
            for num, node_name, depth, isend in reversed(sorted_graph):
 
308
                cur_parents += 1
 
309
                if depth == 0:
 
310
                    revno += 1
 
311
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
312
 
 
313
 
 
314
def gather_class_stats(repository, revs):
 
315
    ret = {}
 
316
    total = 0
 
317
    pb = ui.ui_factory.nested_progress_bar()
 
318
    try:
 
319
        with repository.lock_read():
 
320
            i = 0
 
321
            for delta in repository.get_deltas_for_revisions(revs):
 
322
                pb.update("classifying commits", i, len(revs))
 
323
                for c in classify_delta(delta):
 
324
                    if not c in ret:
 
325
                        ret[c] = 0
 
326
                    ret[c] += 1
 
327
                    total += 1
 
328
                i += 1
 
329
    finally:
 
330
        pb.finished()
 
331
    return ret, total
 
332
 
 
333
 
 
334
def classify_key(item):
 
335
    """Sort key for item of (author, count) from classify_delta."""
 
336
    return -item[1], item[0]
 
337
 
 
338
 
 
339
def display_credits(credits, to_file):
 
340
    (coders, documenters, artists, translators) = credits
 
341
    def print_section(name, lst):
 
342
        if len(lst) == 0:
 
343
            return
 
344
        to_file.write("%s:\n" % name)
 
345
        for name in lst:
 
346
            to_file.write("%s\n" % name)
 
347
        to_file.write('\n')
 
348
    print_section("Code", coders)
 
349
    print_section("Documentation", documenters)
 
350
    print_section("Art", artists)
 
351
    print_section("Translations", translators)
 
352
 
 
353
 
 
354
def find_credits(repository, revid):
 
355
    """Find the credits of the contributors to a revision.
 
356
 
 
357
    :return: tuple with (authors, documenters, artists, translators)
 
358
    """
 
359
    ret = {"documentation": {},
 
360
           "code": {},
 
361
           "art": {},
 
362
           "translation": {},
 
363
           None: {}
 
364
           }
 
365
    with repository.lock_read():
 
366
        graph = repository.get_graph()
 
367
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
368
                    if ps is not None and r != NULL_REVISION]
 
369
        revs = repository.get_revisions(ancestry)
 
370
        pb = ui.ui_factory.nested_progress_bar()
 
371
        try:
 
372
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
 
373
            for i, (rev, delta) in enumerate(iterator):
 
374
                pb.update("analysing revisions", i, len(revs))
 
375
                # Don't count merges
 
376
                if len(rev.parent_ids) > 1:
 
377
                    continue
 
378
                for c in set(classify_delta(delta)):
 
379
                    for author in rev.get_apparent_authors():
 
380
                        if not author in ret[c]:
 
381
                            ret[c][author] = 0
 
382
                        ret[c][author] += 1
 
383
        finally:
 
384
            pb.finished()
 
385
    def sort_class(name):
 
386
        return [author
 
387
            for author, _  in sorted(ret[name].items(), key=classify_key)]
 
388
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
389
 
 
390
 
 
391
class cmd_credits(commands.Command):
 
392
    """Determine credits for LOCATION."""
 
393
 
 
394
    takes_args = ['location?']
 
395
    takes_options = ['revision']
 
396
 
 
397
    encoding_type = 'replace'
 
398
 
 
399
    def run(self, location='.', revision=None):
 
400
        try:
 
401
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
402
        except errors.NoWorkingTree:
 
403
            a_branch = branch.Branch.open(location)
 
404
            last_rev = a_branch.last_revision()
 
405
        else:
 
406
            a_branch = wt.branch
 
407
            last_rev = wt.last_revision()
 
408
 
 
409
        if revision is not None:
 
410
            last_rev = revision[0].in_history(a_branch).rev_id
 
411
 
 
412
        with a_branch.lock_read():
 
413
            credits = find_credits(a_branch.repository, last_rev)
 
414
            display_credits(credits, self.outf)