/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2017-08-27 13:57:26 UTC
  • mto: This revision was merged to the branch mainline in revision 6773.
  • Revision ID: jelmer@jelmer.uk-20170827135726-o6k0a4j205zdh8k0
Fix some tests.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
from ... import (
 
21
    branch,
 
22
    commands,
 
23
    config,
 
24
    errors,
 
25
    option,
 
26
    trace,
 
27
    tsort,
 
28
    ui,
 
29
    workingtree,
 
30
    )
 
31
from ...revision import NULL_REVISION
 
32
from .classify import classify_delta
 
33
 
 
34
 
 
35
def collapse_by_person(revisions, canonical_committer):
 
36
    """The committers list is sorted by email, fix it up by person.
 
37
 
 
38
    Some people commit with a similar username, but different email
 
39
    address. Which makes it hard to sort out when they have multiple
 
40
    entries. Email is actually more stable, though, since people
 
41
    frequently forget to set their name properly.
 
42
 
 
43
    So take the most common username for each email address, and
 
44
    combine them into one new list.
 
45
    """
 
46
    # Map from canonical committer to
 
47
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
48
    committer_to_info = {}
 
49
    for rev in revisions:
 
50
        authors = rev.get_apparent_authors()
 
51
        for author in authors:
 
52
            username, email = config.parse_username(author)
 
53
            if len(username) == 0 and len(email) == 0:
 
54
                continue
 
55
            canon_author = canonical_committer[(username, email)]
 
56
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
57
            info[0].append(rev)
 
58
            info[1][email] = info[1].setdefault(email, 0) + 1
 
59
            info[2][username] = info[2].setdefault(username, 0) + 1
 
60
    res = [(len(revs), revs, emails, fnames)
 
61
           for revs, emails, fnames in committer_to_info.values()]
 
62
    res.sort(reverse=True)
 
63
    return res
 
64
 
 
65
 
 
66
def collapse_email_and_users(email_users, combo_count):
 
67
    """Combine the mapping of User Name to email and email to User Name.
 
68
 
 
69
    If a given User Name is used for multiple emails, try to map it all to one
 
70
    entry.
 
71
    """
 
72
    id_to_combos = {}
 
73
    username_to_id = {}
 
74
    email_to_id = {}
 
75
    id_counter = 0
 
76
 
 
77
    def collapse_ids(old_id, new_id, new_combos):
 
78
        old_combos = id_to_combos.pop(old_id)
 
79
        new_combos.update(old_combos)
 
80
        for old_user, old_email in old_combos:
 
81
            if (old_user and old_user != user):
 
82
                low_old_user = old_user.lower()
 
83
                old_user_id = username_to_id[low_old_user]
 
84
                assert old_user_id in (old_id, new_id)
 
85
                username_to_id[low_old_user] = new_id
 
86
            if (old_email and old_email != email):
 
87
                old_email_id = email_to_id[old_email]
 
88
                assert old_email_id in (old_id, new_id)
 
89
                email_to_id[old_email] = cur_id
 
90
    for email, usernames in email_users.items():
 
91
        assert email not in email_to_id
 
92
        if not email:
 
93
            # We use a different algorithm for usernames that have no email
 
94
            # address, we just try to match by username, and not at all by
 
95
            # email
 
96
            for user in usernames:
 
97
                if not user:
 
98
                    continue # The mysterious ('', '') user
 
99
                # When mapping, use case-insensitive names
 
100
                low_user = user.lower()
 
101
                user_id = username_to_id.get(low_user)
 
102
                if user_id is None:
 
103
                    id_counter += 1
 
104
                    user_id = id_counter
 
105
                    username_to_id[low_user] = user_id
 
106
                    id_to_combos[user_id] = id_combos = set()
 
107
                else:
 
108
                    id_combos = id_to_combos[user_id]
 
109
                id_combos.add((user, email))
 
110
            continue
 
111
 
 
112
        id_counter += 1
 
113
        cur_id = id_counter
 
114
        id_to_combos[cur_id] = id_combos = set()
 
115
        email_to_id[email] = cur_id
 
116
 
 
117
        for user in usernames:
 
118
            combo = (user, email)
 
119
            id_combos.add(combo)
 
120
            if not user:
 
121
                # We don't match on empty usernames
 
122
                continue
 
123
            low_user = user.lower()
 
124
            user_id = username_to_id.get(low_user)
 
125
            if user_id is not None:
 
126
                # This UserName was matched to an cur_id
 
127
                if user_id != cur_id:
 
128
                    # And it is a different identity than the current email
 
129
                    collapse_ids(user_id, cur_id, id_combos)
 
130
            username_to_id[low_user] = cur_id
 
131
    combo_to_best_combo = {}
 
132
    for cur_id, combos in id_to_combos.items():
 
133
        best_combo = sorted(combos,
 
134
                            key=lambda x:combo_count[x],
 
135
                            reverse=True)[0]
 
136
        for combo in combos:
 
137
            combo_to_best_combo[combo] = best_combo
 
138
    return combo_to_best_combo
 
139
 
 
140
 
 
141
def get_revisions_and_committers(a_repo, revids):
 
142
    """Get the Revision information, and the best-match for committer."""
 
143
 
 
144
    email_users = {} # user@email.com => User Name
 
145
    combo_count = {}
 
146
    pb = ui.ui_factory.nested_progress_bar()
 
147
    try:
 
148
        trace.note('getting revisions')
 
149
        revisions = a_repo.iter_revisions(revids)
 
150
        for count, (revid, rev) in enumerate(revisions):
 
151
            pb.update('checking', count, len(revids))
 
152
            for author in rev.get_apparent_authors():
 
153
                # XXX: There is a chance sometimes with svn imports that the
 
154
                #      full name and email can BOTH be blank.
 
155
                username, email = config.parse_username(author)
 
156
                email_users.setdefault(email, set()).add(username)
 
157
                combo = (username, email)
 
158
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
159
    finally:
 
160
        pb.finished()
 
161
    return revisions, collapse_email_and_users(email_users, combo_count)
 
162
 
 
163
 
 
164
def get_info(a_repo, revision):
 
165
    """Get all of the information for a particular revision"""
 
166
    pb = ui.ui_factory.nested_progress_bar()
 
167
    a_repo.lock_read()
 
168
    try:
 
169
        trace.note('getting ancestry')
 
170
        graph = a_repo.get_graph()
 
171
        ancestry = [
 
172
            r for (r, ps) in graph.iter_ancestry([revision])
 
173
            if ps is not None and r != NULL_REVISION]
 
174
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
175
    finally:
 
176
        a_repo.unlock()
 
177
        pb.finished()
 
178
 
 
179
    return collapse_by_person(revs, canonical_committer)
 
180
 
 
181
 
 
182
def get_diff_info(a_repo, start_rev, end_rev):
 
183
    """Get only the info for new revisions between the two revisions
 
184
 
 
185
    This lets us figure out what has actually changed between 2 revisions.
 
186
    """
 
187
    pb = ui.ui_factory.nested_progress_bar()
 
188
    a_repo.lock_read()
 
189
    try:
 
190
        graph = a_repo.get_graph()
 
191
        trace.note('getting ancestry diff')
 
192
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
193
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
194
    finally:
 
195
        a_repo.unlock()
 
196
        pb.finished()
 
197
 
 
198
    return collapse_by_person(revs, canonical_committer)
 
199
 
 
200
 
 
201
def display_info(info, to_file, gather_class_stats=None):
 
202
    """Write out the information"""
 
203
 
 
204
    for count, revs, emails, fullnames in info:
 
205
        # Get the most common email name
 
206
        sorted_emails = sorted(((count, email)
 
207
                               for email, count in emails.items()),
 
208
                               reverse=True)
 
209
        sorted_fullnames = sorted(((count, fullname)
 
210
                                  for fullname, count in fullnames.items()),
 
211
                                  reverse=True)
 
212
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
213
            to_file.write('%4d %s\n'
 
214
                          % (count, 'Unknown'))
 
215
        else:
 
216
            to_file.write('%4d %s <%s>\n'
 
217
                          % (count, sorted_fullnames[0][1],
 
218
                             sorted_emails[0][1]))
 
219
        if len(sorted_fullnames) > 1:
 
220
            to_file.write('     Other names:\n')
 
221
            for count, fname in sorted_fullnames:
 
222
                to_file.write('     %4d ' % (count,))
 
223
                if fname == '':
 
224
                    to_file.write("''\n")
 
225
                else:
 
226
                    to_file.write("%s\n" % (fname,))
 
227
        if len(sorted_emails) > 1:
 
228
            to_file.write('     Other email addresses:\n')
 
229
            for count, email in sorted_emails:
 
230
                to_file.write('     %4d ' % (count,))
 
231
                if email == '':
 
232
                    to_file.write("''\n")
 
233
                else:
 
234
                    to_file.write("%s\n" % (email,))
 
235
        if gather_class_stats is not None:
 
236
            to_file.write('     Contributions:\n')
 
237
            classes, total = gather_class_stats(revs)
 
238
            for name, count in sorted(classes.items(), key=classify_key):
 
239
                if name is None:
 
240
                    name = "Unknown"
 
241
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
 
242
 
 
243
 
 
244
class cmd_committer_statistics(commands.Command):
 
245
    """Generate statistics for LOCATION."""
 
246
 
 
247
    aliases = ['stats', 'committer-stats']
 
248
    takes_args = ['location?']
 
249
    takes_options = ['revision', 
 
250
            option.Option('show-class', help="Show the class of contributions.")]
 
251
 
 
252
    encoding_type = 'replace'
 
253
 
 
254
    def run(self, location='.', revision=None, show_class=False):
 
255
        alternate_rev = None
 
256
        try:
 
257
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
258
        except errors.NoWorkingTree:
 
259
            a_branch = branch.Branch.open(location)
 
260
            last_rev = a_branch.last_revision()
 
261
        else:
 
262
            a_branch = wt.branch
 
263
            last_rev = wt.last_revision()
 
264
 
 
265
        if revision is not None:
 
266
            last_rev = revision[0].in_history(a_branch).rev_id
 
267
            if len(revision) > 1:
 
268
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
269
 
 
270
        a_branch.lock_read()
 
271
        try:
 
272
            if alternate_rev:
 
273
                info = get_diff_info(a_branch.repository, last_rev,
 
274
                                     alternate_rev)
 
275
            else:
 
276
                info = get_info(a_branch.repository, last_rev)
 
277
        finally:
 
278
            a_branch.unlock()
 
279
        if show_class:
 
280
            def fetch_class_stats(revs):
 
281
                return gather_class_stats(a_branch.repository, revs)
 
282
        else:
 
283
            fetch_class_stats = None
 
284
        display_info(info, self.outf, fetch_class_stats)
 
285
 
 
286
 
 
287
class cmd_ancestor_growth(commands.Command):
 
288
    """Figure out the ancestor graph for LOCATION"""
 
289
 
 
290
    takes_args = ['location?']
 
291
 
 
292
    encoding_type = 'replace'
 
293
 
 
294
    def run(self, location='.'):
 
295
        try:
 
296
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
297
        except errors.NoWorkingTree:
 
298
            a_branch = branch.Branch.open(location)
 
299
            last_rev = a_branch.last_revision()
 
300
        else:
 
301
            a_branch = wt.branch
 
302
            last_rev = wt.last_revision()
 
303
 
 
304
        a_branch.lock_read()
 
305
        try:
 
306
            graph = a_branch.repository.get_graph()
 
307
            revno = 0
 
308
            cur_parents = 0
 
309
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
310
                                            last_rev)
 
311
            for num, node_name, depth, isend in reversed(sorted_graph):
 
312
                cur_parents += 1
 
313
                if depth == 0:
 
314
                    revno += 1
 
315
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
316
        finally:
 
317
            a_branch.unlock()
 
318
 
 
319
 
 
320
def gather_class_stats(repository, revs):
 
321
    ret = {}
 
322
    total = 0
 
323
    pb = ui.ui_factory.nested_progress_bar()
 
324
    try:
 
325
        repository.lock_read()
 
326
        try:
 
327
            i = 0
 
328
            for delta in repository.get_deltas_for_revisions(revs):
 
329
                pb.update("classifying commits", i, len(revs))
 
330
                for c in classify_delta(delta):
 
331
                    if not c in ret:
 
332
                        ret[c] = 0
 
333
                    ret[c] += 1
 
334
                    total += 1
 
335
                i += 1
 
336
        finally:
 
337
            repository.unlock()
 
338
    finally:
 
339
        pb.finished()
 
340
    return ret, total
 
341
 
 
342
 
 
343
def classify_key(item):
 
344
    """Sort key for item of (author, count) from classify_delta."""
 
345
    return -item[1], item[0]
 
346
 
 
347
 
 
348
def display_credits(credits, to_file):
 
349
    (coders, documenters, artists, translators) = credits
 
350
    def print_section(name, lst):
 
351
        if len(lst) == 0:
 
352
            return
 
353
        to_file.write("%s:\n" % name)
 
354
        for name in lst:
 
355
            to_file.write("%s\n" % name)
 
356
        to_file.write('\n')
 
357
    print_section("Code", coders)
 
358
    print_section("Documentation", documenters)
 
359
    print_section("Art", artists)
 
360
    print_section("Translations", translators)
 
361
 
 
362
 
 
363
def find_credits(repository, revid):
 
364
    """Find the credits of the contributors to a revision.
 
365
 
 
366
    :return: tuple with (authors, documenters, artists, translators)
 
367
    """
 
368
    ret = {"documentation": {},
 
369
           "code": {},
 
370
           "art": {},
 
371
           "translation": {},
 
372
           None: {}
 
373
           }
 
374
    repository.lock_read()
 
375
    try:
 
376
        graph = repository.get_graph()
 
377
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
378
                    if ps is not None and r != NULL_REVISION]
 
379
        revs = repository.get_revisions(ancestry)
 
380
        pb = ui.ui_factory.nested_progress_bar()
 
381
        try:
 
382
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
 
383
            for i, (rev,delta) in enumerate(iterator):
 
384
                pb.update("analysing revisions", i, len(revs))
 
385
                # Don't count merges
 
386
                if len(rev.parent_ids) > 1:
 
387
                    continue
 
388
                for c in set(classify_delta(delta)):
 
389
                    for author in rev.get_apparent_authors():
 
390
                        if not author in ret[c]:
 
391
                            ret[c][author] = 0
 
392
                        ret[c][author] += 1
 
393
        finally:
 
394
            pb.finished()
 
395
    finally:
 
396
        repository.unlock()
 
397
    def sort_class(name):
 
398
        return [author
 
399
            for author, _  in sorted(ret[name].items(), key=classify_key)]
 
400
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
401
 
 
402
 
 
403
class cmd_credits(commands.Command):
 
404
    """Determine credits for LOCATION."""
 
405
 
 
406
    takes_args = ['location?']
 
407
    takes_options = ['revision']
 
408
 
 
409
    encoding_type = 'replace'
 
410
 
 
411
    def run(self, location='.', revision=None):
 
412
        try:
 
413
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
414
        except errors.NoWorkingTree:
 
415
            a_branch = branch.Branch.open(location)
 
416
            last_rev = a_branch.last_revision()
 
417
        else:
 
418
            a_branch = wt.branch
 
419
            last_rev = wt.last_revision()
 
420
 
 
421
        if revision is not None:
 
422
            last_rev = revision[0].in_history(a_branch).rev_id
 
423
 
 
424
        a_branch.lock_read()
 
425
        try:
 
426
            credits = find_credits(a_branch.repository, last_rev)
 
427
            display_credits(credits, self.outf)
 
428
        finally:
 
429
            a_branch.unlock()