/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Richard Wilbur
  • Date: 2016-02-04 19:07:28 UTC
  • mto: This revision was merged to the branch mainline in revision 6618.
  • Revision ID: richard.wilbur@gmail.com-20160204190728-p0zvfii6zase0fw7
Update COPYING.txt from the original http://www.gnu.org/licenses/gpl-2.0.txt  (Only differences were in whitespace.)  Thanks to Petr Stodulka for pointing out the discrepancy.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2006-2010 Canonical Ltd
2
 
 
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
 
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
 
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
 
"""A Simple bzr plugin to generate statistics about the history."""
17
 
 
18
 
from __future__ import absolute_import
19
 
 
20
 
from ... import (
21
 
    branch,
22
 
    commands,
23
 
    config,
24
 
    errors,
25
 
    option,
26
 
    trace,
27
 
    tsort,
28
 
    ui,
29
 
    workingtree,
30
 
    )
31
 
from ...revision import NULL_REVISION
32
 
from .classify import classify_delta
33
 
 
34
 
 
35
 
def collapse_by_person(revisions, canonical_committer):
36
 
    """The committers list is sorted by email, fix it up by person.
37
 
 
38
 
    Some people commit with a similar username, but different email
39
 
    address. Which makes it hard to sort out when they have multiple
40
 
    entries. Email is actually more stable, though, since people
41
 
    frequently forget to set their name properly.
42
 
 
43
 
    So take the most common username for each email address, and
44
 
    combine them into one new list.
45
 
    """
46
 
    # Map from canonical committer to
47
 
    # {committer: ([rev_list], {email: count}, {fname:count})}
48
 
    committer_to_info = {}
49
 
    for rev in revisions:
50
 
        authors = rev.get_apparent_authors()
51
 
        for author in authors:
52
 
            username, email = config.parse_username(author)
53
 
            if len(username) == 0 and len(email) == 0:
54
 
                continue
55
 
            canon_author = canonical_committer[(username, email)]
56
 
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
57
 
            info[0].append(rev)
58
 
            info[1][email] = info[1].setdefault(email, 0) + 1
59
 
            info[2][username] = info[2].setdefault(username, 0) + 1
60
 
    res = [(len(revs), revs, emails, fnames)
61
 
           for revs, emails, fnames in committer_to_info.values()]
62
 
    res.sort(reverse=True)
63
 
    return res
64
 
 
65
 
 
66
 
def collapse_email_and_users(email_users, combo_count):
67
 
    """Combine the mapping of User Name to email and email to User Name.
68
 
 
69
 
    If a given User Name is used for multiple emails, try to map it all to one
70
 
    entry.
71
 
    """
72
 
    id_to_combos = {}
73
 
    username_to_id = {}
74
 
    email_to_id = {}
75
 
    id_counter = 0
76
 
 
77
 
    def collapse_ids(old_id, new_id, new_combos):
78
 
        old_combos = id_to_combos.pop(old_id)
79
 
        new_combos.update(old_combos)
80
 
        for old_user, old_email in old_combos:
81
 
            if (old_user and old_user != user):
82
 
                low_old_user = old_user.lower()
83
 
                old_user_id = username_to_id[low_old_user]
84
 
                assert old_user_id in (old_id, new_id)
85
 
                username_to_id[low_old_user] = new_id
86
 
            if (old_email and old_email != email):
87
 
                old_email_id = email_to_id[old_email]
88
 
                assert old_email_id in (old_id, new_id)
89
 
                email_to_id[old_email] = cur_id
90
 
    for email, usernames in email_users.items():
91
 
        assert email not in email_to_id
92
 
        if not email:
93
 
            # We use a different algorithm for usernames that have no email
94
 
            # address, we just try to match by username, and not at all by
95
 
            # email
96
 
            for user in usernames:
97
 
                if not user:
98
 
                    continue # The mysterious ('', '') user
99
 
                # When mapping, use case-insensitive names
100
 
                low_user = user.lower()
101
 
                user_id = username_to_id.get(low_user)
102
 
                if user_id is None:
103
 
                    id_counter += 1
104
 
                    user_id = id_counter
105
 
                    username_to_id[low_user] = user_id
106
 
                    id_to_combos[user_id] = id_combos = set()
107
 
                else:
108
 
                    id_combos = id_to_combos[user_id]
109
 
                id_combos.add((user, email))
110
 
            continue
111
 
 
112
 
        id_counter += 1
113
 
        cur_id = id_counter
114
 
        id_to_combos[cur_id] = id_combos = set()
115
 
        email_to_id[email] = cur_id
116
 
 
117
 
        for user in usernames:
118
 
            combo = (user, email)
119
 
            id_combos.add(combo)
120
 
            if not user:
121
 
                # We don't match on empty usernames
122
 
                continue
123
 
            low_user = user.lower()
124
 
            user_id = username_to_id.get(low_user)
125
 
            if user_id is not None:
126
 
                # This UserName was matched to an cur_id
127
 
                if user_id != cur_id:
128
 
                    # And it is a different identity than the current email
129
 
                    collapse_ids(user_id, cur_id, id_combos)
130
 
            username_to_id[low_user] = cur_id
131
 
    combo_to_best_combo = {}
132
 
    for cur_id, combos in id_to_combos.items():
133
 
        best_combo = sorted(combos,
134
 
                            key=lambda x:combo_count[x],
135
 
                            reverse=True)[0]
136
 
        for combo in combos:
137
 
            combo_to_best_combo[combo] = best_combo
138
 
    return combo_to_best_combo
139
 
 
140
 
 
141
 
def get_revisions_and_committers(a_repo, revids):
142
 
    """Get the Revision information, and the best-match for committer."""
143
 
 
144
 
    email_users = {} # user@email.com => User Name
145
 
    combo_count = {}
146
 
    with ui.ui_factory.nested_progress_bar() as pb:
147
 
        trace.note('getting revisions')
148
 
        revisions = list(a_repo.iter_revisions(revids))
149
 
        for count, (revid, rev) in enumerate(revisions):
150
 
            pb.update('checking', count, len(revids))
151
 
            for author in rev.get_apparent_authors():
152
 
                # XXX: There is a chance sometimes with svn imports that the
153
 
                #      full name and email can BOTH be blank.
154
 
                username, email = config.parse_username(author)
155
 
                email_users.setdefault(email, set()).add(username)
156
 
                combo = (username, email)
157
 
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
158
 
    return ((rev for (revid, rev) in revisions),
159
 
            collapse_email_and_users(email_users, combo_count))
160
 
 
161
 
 
162
 
def get_info(a_repo, revision):
163
 
    """Get all of the information for a particular revision"""
164
 
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
165
 
        trace.note('getting ancestry')
166
 
        graph = a_repo.get_graph()
167
 
        ancestry = [
168
 
            r for (r, ps) in graph.iter_ancestry([revision])
169
 
            if ps is not None and r != NULL_REVISION]
170
 
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
171
 
 
172
 
    return collapse_by_person(revs, canonical_committer)
173
 
 
174
 
 
175
 
def get_diff_info(a_repo, start_rev, end_rev):
176
 
    """Get only the info for new revisions between the two revisions
177
 
 
178
 
    This lets us figure out what has actually changed between 2 revisions.
179
 
    """
180
 
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
181
 
        graph = a_repo.get_graph()
182
 
        trace.note('getting ancestry diff')
183
 
        ancestry = graph.find_difference(start_rev, end_rev)[1]
184
 
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
185
 
 
186
 
    return collapse_by_person(revs, canonical_committer)
187
 
 
188
 
 
189
 
def display_info(info, to_file, gather_class_stats=None):
190
 
    """Write out the information"""
191
 
 
192
 
    for count, revs, emails, fullnames in info:
193
 
        # Get the most common email name
194
 
        sorted_emails = sorted(((count, email)
195
 
                               for email, count in emails.items()),
196
 
                               reverse=True)
197
 
        sorted_fullnames = sorted(((count, fullname)
198
 
                                  for fullname, count in fullnames.items()),
199
 
                                  reverse=True)
200
 
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
201
 
            to_file.write('%4d %s\n'
202
 
                          % (count, 'Unknown'))
203
 
        else:
204
 
            to_file.write('%4d %s <%s>\n'
205
 
                          % (count, sorted_fullnames[0][1],
206
 
                             sorted_emails[0][1]))
207
 
        if len(sorted_fullnames) > 1:
208
 
            to_file.write('     Other names:\n')
209
 
            for count, fname in sorted_fullnames:
210
 
                to_file.write('     %4d ' % (count,))
211
 
                if fname == '':
212
 
                    to_file.write("''\n")
213
 
                else:
214
 
                    to_file.write("%s\n" % (fname,))
215
 
        if len(sorted_emails) > 1:
216
 
            to_file.write('     Other email addresses:\n')
217
 
            for count, email in sorted_emails:
218
 
                to_file.write('     %4d ' % (count,))
219
 
                if email == '':
220
 
                    to_file.write("''\n")
221
 
                else:
222
 
                    to_file.write("%s\n" % (email,))
223
 
        if gather_class_stats is not None:
224
 
            to_file.write('     Contributions:\n')
225
 
            classes, total = gather_class_stats(revs)
226
 
            for name, count in sorted(classes.items(), key=classify_key):
227
 
                if name is None:
228
 
                    name = "Unknown"
229
 
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
230
 
 
231
 
 
232
 
class cmd_committer_statistics(commands.Command):
233
 
    """Generate statistics for LOCATION."""
234
 
 
235
 
    aliases = ['stats', 'committer-stats']
236
 
    takes_args = ['location?']
237
 
    takes_options = ['revision', 
238
 
            option.Option('show-class', help="Show the class of contributions.")]
239
 
 
240
 
    encoding_type = 'replace'
241
 
 
242
 
    def run(self, location='.', revision=None, show_class=False):
243
 
        alternate_rev = None
244
 
        try:
245
 
            wt = workingtree.WorkingTree.open_containing(location)[0]
246
 
        except errors.NoWorkingTree:
247
 
            a_branch = branch.Branch.open(location)
248
 
            last_rev = a_branch.last_revision()
249
 
        else:
250
 
            a_branch = wt.branch
251
 
            last_rev = wt.last_revision()
252
 
 
253
 
        if revision is not None:
254
 
            last_rev = revision[0].in_history(a_branch).rev_id
255
 
            if len(revision) > 1:
256
 
                alternate_rev = revision[1].in_history(a_branch).rev_id
257
 
 
258
 
        with a_branch.lock_read():
259
 
            if alternate_rev:
260
 
                info = get_diff_info(a_branch.repository, last_rev,
261
 
                                     alternate_rev)
262
 
            else:
263
 
                info = get_info(a_branch.repository, last_rev)
264
 
        if show_class:
265
 
            def fetch_class_stats(revs):
266
 
                return gather_class_stats(a_branch.repository, revs)
267
 
        else:
268
 
            fetch_class_stats = None
269
 
        display_info(info, self.outf, fetch_class_stats)
270
 
 
271
 
 
272
 
class cmd_ancestor_growth(commands.Command):
273
 
    """Figure out the ancestor graph for LOCATION"""
274
 
 
275
 
    takes_args = ['location?']
276
 
 
277
 
    encoding_type = 'replace'
278
 
 
279
 
    def run(self, location='.'):
280
 
        try:
281
 
            wt = workingtree.WorkingTree.open_containing(location)[0]
282
 
        except errors.NoWorkingTree:
283
 
            a_branch = branch.Branch.open(location)
284
 
            last_rev = a_branch.last_revision()
285
 
        else:
286
 
            a_branch = wt.branch
287
 
            last_rev = wt.last_revision()
288
 
 
289
 
        with a_branch.lock_read():
290
 
            graph = a_branch.repository.get_graph()
291
 
            revno = 0
292
 
            cur_parents = 0
293
 
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
294
 
                                            last_rev)
295
 
            for num, node_name, depth, isend in reversed(sorted_graph):
296
 
                cur_parents += 1
297
 
                if depth == 0:
298
 
                    revno += 1
299
 
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
300
 
 
301
 
 
302
 
def gather_class_stats(repository, revs):
303
 
    ret = {}
304
 
    total = 0
305
 
    with ui.ui_factory.nested_progress_bar() as pb:
306
 
        with repository.lock_read():
307
 
            i = 0
308
 
            for delta in repository.get_deltas_for_revisions(revs):
309
 
                pb.update("classifying commits", i, len(revs))
310
 
                for c in classify_delta(delta):
311
 
                    if not c in ret:
312
 
                        ret[c] = 0
313
 
                    ret[c] += 1
314
 
                    total += 1
315
 
                i += 1
316
 
    return ret, total
317
 
 
318
 
 
319
 
def classify_key(item):
320
 
    """Sort key for item of (author, count) from classify_delta."""
321
 
    return -item[1], item[0]
322
 
 
323
 
 
324
 
def display_credits(credits, to_file):
325
 
    (coders, documenters, artists, translators) = credits
326
 
    def print_section(name, lst):
327
 
        if len(lst) == 0:
328
 
            return
329
 
        to_file.write("%s:\n" % name)
330
 
        for name in lst:
331
 
            to_file.write("%s\n" % name)
332
 
        to_file.write('\n')
333
 
    print_section("Code", coders)
334
 
    print_section("Documentation", documenters)
335
 
    print_section("Art", artists)
336
 
    print_section("Translations", translators)
337
 
 
338
 
 
339
 
def find_credits(repository, revid):
340
 
    """Find the credits of the contributors to a revision.
341
 
 
342
 
    :return: tuple with (authors, documenters, artists, translators)
343
 
    """
344
 
    ret = {"documentation": {},
345
 
           "code": {},
346
 
           "art": {},
347
 
           "translation": {},
348
 
           None: {}
349
 
           }
350
 
    with repository.lock_read():
351
 
        graph = repository.get_graph()
352
 
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
353
 
                    if ps is not None and r != NULL_REVISION]
354
 
        revs = repository.get_revisions(ancestry)
355
 
        with ui.ui_factory.nested_progress_bar() as pb:
356
 
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
357
 
            for i, (rev, delta) in enumerate(iterator):
358
 
                pb.update("analysing revisions", i, len(revs))
359
 
                # Don't count merges
360
 
                if len(rev.parent_ids) > 1:
361
 
                    continue
362
 
                for c in set(classify_delta(delta)):
363
 
                    for author in rev.get_apparent_authors():
364
 
                        if not author in ret[c]:
365
 
                            ret[c][author] = 0
366
 
                        ret[c][author] += 1
367
 
    def sort_class(name):
368
 
        return [author
369
 
            for author, _  in sorted(ret[name].items(), key=classify_key)]
370
 
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
371
 
 
372
 
 
373
 
class cmd_credits(commands.Command):
374
 
    """Determine credits for LOCATION."""
375
 
 
376
 
    takes_args = ['location?']
377
 
    takes_options = ['revision']
378
 
 
379
 
    encoding_type = 'replace'
380
 
 
381
 
    def run(self, location='.', revision=None):
382
 
        try:
383
 
            wt = workingtree.WorkingTree.open_containing(location)[0]
384
 
        except errors.NoWorkingTree:
385
 
            a_branch = branch.Branch.open(location)
386
 
            last_rev = a_branch.last_revision()
387
 
        else:
388
 
            a_branch = wt.branch
389
 
            last_rev = wt.last_revision()
390
 
 
391
 
        if revision is not None:
392
 
            last_rev = revision[0].in_history(a_branch).rev_id
393
 
 
394
 
        with a_branch.lock_read():
395
 
            credits = find_credits(a_branch.repository, last_rev)
396
 
            display_credits(credits, self.outf)