/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/plugins/stats/cmds.py

  • Committer: Jelmer Vernooij
  • Date: 2018-07-08 14:45:27 UTC
  • mto: This revision was merged to the branch mainline in revision 7036.
  • Revision ID: jelmer@jelmer.uk-20180708144527-codhlvdcdg9y0nji
Fix a bunch of merge tests.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# Copyright (C) 2006-2010 Canonical Ltd
 
2
 
 
3
# This program is free software; you can redistribute it and/or modify
 
4
# it under the terms of the GNU General Public License as published by
 
5
# the Free Software Foundation; either version 2 of the License, or
 
6
# (at your option) any later version.
 
7
 
 
8
# This program is distributed in the hope that it will be useful,
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
11
# GNU General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU General Public License
 
14
# along with this program; if not, write to the Free Software
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
16
"""A Simple bzr plugin to generate statistics about the history."""
 
17
 
 
18
from __future__ import absolute_import
 
19
 
 
20
from ... import (
 
21
    branch,
 
22
    commands,
 
23
    config,
 
24
    errors,
 
25
    option,
 
26
    trace,
 
27
    tsort,
 
28
    ui,
 
29
    workingtree,
 
30
    )
 
31
from ...revision import NULL_REVISION
 
32
from .classify import classify_delta
 
33
 
 
34
 
 
35
def collapse_by_person(revisions, canonical_committer):
 
36
    """The committers list is sorted by email, fix it up by person.
 
37
 
 
38
    Some people commit with a similar username, but different email
 
39
    address. Which makes it hard to sort out when they have multiple
 
40
    entries. Email is actually more stable, though, since people
 
41
    frequently forget to set their name properly.
 
42
 
 
43
    So take the most common username for each email address, and
 
44
    combine them into one new list.
 
45
    """
 
46
    # Map from canonical committer to
 
47
    # {committer: ([rev_list], {email: count}, {fname:count})}
 
48
    committer_to_info = {}
 
49
    for rev in revisions:
 
50
        authors = rev.get_apparent_authors()
 
51
        for author in authors:
 
52
            username, email = config.parse_username(author)
 
53
            if len(username) == 0 and len(email) == 0:
 
54
                continue
 
55
            canon_author = canonical_committer[(username, email)]
 
56
            info = committer_to_info.setdefault(canon_author, ([], {}, {}))
 
57
            info[0].append(rev)
 
58
            info[1][email] = info[1].setdefault(email, 0) + 1
 
59
            info[2][username] = info[2].setdefault(username, 0) + 1
 
60
    res = [(len(revs), revs, emails, fnames)
 
61
           for revs, emails, fnames in committer_to_info.values()]
 
62
    res.sort(reverse=True)
 
63
    return res
 
64
 
 
65
 
 
66
def collapse_email_and_users(email_users, combo_count):
 
67
    """Combine the mapping of User Name to email and email to User Name.
 
68
 
 
69
    If a given User Name is used for multiple emails, try to map it all to one
 
70
    entry.
 
71
    """
 
72
    id_to_combos = {}
 
73
    username_to_id = {}
 
74
    email_to_id = {}
 
75
    id_counter = 0
 
76
 
 
77
    def collapse_ids(old_id, new_id, new_combos):
 
78
        old_combos = id_to_combos.pop(old_id)
 
79
        new_combos.update(old_combos)
 
80
        for old_user, old_email in old_combos:
 
81
            if (old_user and old_user != user):
 
82
                low_old_user = old_user.lower()
 
83
                old_user_id = username_to_id[low_old_user]
 
84
                assert old_user_id in (old_id, new_id)
 
85
                username_to_id[low_old_user] = new_id
 
86
            if (old_email and old_email != email):
 
87
                old_email_id = email_to_id[old_email]
 
88
                assert old_email_id in (old_id, new_id)
 
89
                email_to_id[old_email] = cur_id
 
90
    for email, usernames in email_users.items():
 
91
        assert email not in email_to_id
 
92
        if not email:
 
93
            # We use a different algorithm for usernames that have no email
 
94
            # address, we just try to match by username, and not at all by
 
95
            # email
 
96
            for user in usernames:
 
97
                if not user:
 
98
                    continue # The mysterious ('', '') user
 
99
                # When mapping, use case-insensitive names
 
100
                low_user = user.lower()
 
101
                user_id = username_to_id.get(low_user)
 
102
                if user_id is None:
 
103
                    id_counter += 1
 
104
                    user_id = id_counter
 
105
                    username_to_id[low_user] = user_id
 
106
                    id_to_combos[user_id] = id_combos = set()
 
107
                else:
 
108
                    id_combos = id_to_combos[user_id]
 
109
                id_combos.add((user, email))
 
110
            continue
 
111
 
 
112
        id_counter += 1
 
113
        cur_id = id_counter
 
114
        id_to_combos[cur_id] = id_combos = set()
 
115
        email_to_id[email] = cur_id
 
116
 
 
117
        for user in usernames:
 
118
            combo = (user, email)
 
119
            id_combos.add(combo)
 
120
            if not user:
 
121
                # We don't match on empty usernames
 
122
                continue
 
123
            low_user = user.lower()
 
124
            user_id = username_to_id.get(low_user)
 
125
            if user_id is not None:
 
126
                # This UserName was matched to an cur_id
 
127
                if user_id != cur_id:
 
128
                    # And it is a different identity than the current email
 
129
                    collapse_ids(user_id, cur_id, id_combos)
 
130
            username_to_id[low_user] = cur_id
 
131
    combo_to_best_combo = {}
 
132
    for cur_id, combos in id_to_combos.items():
 
133
        best_combo = sorted(combos,
 
134
                            key=lambda x:combo_count[x],
 
135
                            reverse=True)[0]
 
136
        for combo in combos:
 
137
            combo_to_best_combo[combo] = best_combo
 
138
    return combo_to_best_combo
 
139
 
 
140
 
 
141
def get_revisions_and_committers(a_repo, revids):
 
142
    """Get the Revision information, and the best-match for committer."""
 
143
 
 
144
    email_users = {} # user@email.com => User Name
 
145
    combo_count = {}
 
146
    with ui.ui_factory.nested_progress_bar() as pb:
 
147
        trace.note('getting revisions')
 
148
        revisions = list(a_repo.iter_revisions(revids))
 
149
        for count, (revid, rev) in enumerate(revisions):
 
150
            pb.update('checking', count, len(revids))
 
151
            for author in rev.get_apparent_authors():
 
152
                # XXX: There is a chance sometimes with svn imports that the
 
153
                #      full name and email can BOTH be blank.
 
154
                username, email = config.parse_username(author)
 
155
                email_users.setdefault(email, set()).add(username)
 
156
                combo = (username, email)
 
157
                combo_count[combo] = combo_count.setdefault(combo, 0) + 1
 
158
    return ((rev for (revid, rev) in revisions),
 
159
            collapse_email_and_users(email_users, combo_count))
 
160
 
 
161
 
 
162
def get_info(a_repo, revision):
 
163
    """Get all of the information for a particular revision"""
 
164
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
165
        trace.note('getting ancestry')
 
166
        graph = a_repo.get_graph()
 
167
        ancestry = [
 
168
            r for (r, ps) in graph.iter_ancestry([revision])
 
169
            if ps is not None and r != NULL_REVISION]
 
170
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
171
 
 
172
    return collapse_by_person(revs, canonical_committer)
 
173
 
 
174
 
 
175
def get_diff_info(a_repo, start_rev, end_rev):
 
176
    """Get only the info for new revisions between the two revisions
 
177
 
 
178
    This lets us figure out what has actually changed between 2 revisions.
 
179
    """
 
180
    with ui.ui_factory.nested_progress_bar() as pb, a_repo.lock_read():
 
181
        graph = a_repo.get_graph()
 
182
        trace.note('getting ancestry diff')
 
183
        ancestry = graph.find_difference(start_rev, end_rev)[1]
 
184
        revs, canonical_committer = get_revisions_and_committers(a_repo, ancestry)
 
185
 
 
186
    return collapse_by_person(revs, canonical_committer)
 
187
 
 
188
 
 
189
def display_info(info, to_file, gather_class_stats=None):
 
190
    """Write out the information"""
 
191
 
 
192
    for count, revs, emails, fullnames in info:
 
193
        # Get the most common email name
 
194
        sorted_emails = sorted(((count, email)
 
195
                               for email, count in emails.items()),
 
196
                               reverse=True)
 
197
        sorted_fullnames = sorted(((count, fullname)
 
198
                                  for fullname, count in fullnames.items()),
 
199
                                  reverse=True)
 
200
        if sorted_fullnames[0][1] == '' and sorted_emails[0][1] == '':
 
201
            to_file.write('%4d %s\n'
 
202
                          % (count, 'Unknown'))
 
203
        else:
 
204
            to_file.write('%4d %s <%s>\n'
 
205
                          % (count, sorted_fullnames[0][1],
 
206
                             sorted_emails[0][1]))
 
207
        if len(sorted_fullnames) > 1:
 
208
            to_file.write('     Other names:\n')
 
209
            for count, fname in sorted_fullnames:
 
210
                to_file.write('     %4d ' % (count,))
 
211
                if fname == '':
 
212
                    to_file.write("''\n")
 
213
                else:
 
214
                    to_file.write("%s\n" % (fname,))
 
215
        if len(sorted_emails) > 1:
 
216
            to_file.write('     Other email addresses:\n')
 
217
            for count, email in sorted_emails:
 
218
                to_file.write('     %4d ' % (count,))
 
219
                if email == '':
 
220
                    to_file.write("''\n")
 
221
                else:
 
222
                    to_file.write("%s\n" % (email,))
 
223
        if gather_class_stats is not None:
 
224
            to_file.write('     Contributions:\n')
 
225
            classes, total = gather_class_stats(revs)
 
226
            for name, count in sorted(classes.items(), key=classify_key):
 
227
                if name is None:
 
228
                    name = "Unknown"
 
229
                to_file.write("     %4.0f%% %s\n" % ((float(count) / total) * 100.0, name))
 
230
 
 
231
 
 
232
class cmd_committer_statistics(commands.Command):
 
233
    """Generate statistics for LOCATION."""
 
234
 
 
235
    aliases = ['stats', 'committer-stats']
 
236
    takes_args = ['location?']
 
237
    takes_options = ['revision', 
 
238
            option.Option('show-class', help="Show the class of contributions.")]
 
239
 
 
240
    encoding_type = 'replace'
 
241
 
 
242
    def run(self, location='.', revision=None, show_class=False):
 
243
        alternate_rev = None
 
244
        try:
 
245
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
246
        except errors.NoWorkingTree:
 
247
            a_branch = branch.Branch.open(location)
 
248
            last_rev = a_branch.last_revision()
 
249
        else:
 
250
            a_branch = wt.branch
 
251
            last_rev = wt.last_revision()
 
252
 
 
253
        if revision is not None:
 
254
            last_rev = revision[0].in_history(a_branch).rev_id
 
255
            if len(revision) > 1:
 
256
                alternate_rev = revision[1].in_history(a_branch).rev_id
 
257
 
 
258
        with a_branch.lock_read():
 
259
            if alternate_rev:
 
260
                info = get_diff_info(a_branch.repository, last_rev,
 
261
                                     alternate_rev)
 
262
            else:
 
263
                info = get_info(a_branch.repository, last_rev)
 
264
        if show_class:
 
265
            def fetch_class_stats(revs):
 
266
                return gather_class_stats(a_branch.repository, revs)
 
267
        else:
 
268
            fetch_class_stats = None
 
269
        display_info(info, self.outf, fetch_class_stats)
 
270
 
 
271
 
 
272
class cmd_ancestor_growth(commands.Command):
 
273
    """Figure out the ancestor graph for LOCATION"""
 
274
 
 
275
    takes_args = ['location?']
 
276
 
 
277
    encoding_type = 'replace'
 
278
 
 
279
    def run(self, location='.'):
 
280
        try:
 
281
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
282
        except errors.NoWorkingTree:
 
283
            a_branch = branch.Branch.open(location)
 
284
            last_rev = a_branch.last_revision()
 
285
        else:
 
286
            a_branch = wt.branch
 
287
            last_rev = wt.last_revision()
 
288
 
 
289
        with a_branch.lock_read():
 
290
            graph = a_branch.repository.get_graph()
 
291
            revno = 0
 
292
            cur_parents = 0
 
293
            sorted_graph = tsort.merge_sort(graph.iter_ancestry([last_rev]),
 
294
                                            last_rev)
 
295
            for num, node_name, depth, isend in reversed(sorted_graph):
 
296
                cur_parents += 1
 
297
                if depth == 0:
 
298
                    revno += 1
 
299
                    self.outf.write('%4d, %4d\n' % (revno, cur_parents))
 
300
 
 
301
 
 
302
def gather_class_stats(repository, revs):
 
303
    ret = {}
 
304
    total = 0
 
305
    with ui.ui_factory.nested_progress_bar() as pb:
 
306
        with repository.lock_read():
 
307
            i = 0
 
308
            for delta in repository.get_deltas_for_revisions(revs):
 
309
                pb.update("classifying commits", i, len(revs))
 
310
                for c in classify_delta(delta):
 
311
                    if not c in ret:
 
312
                        ret[c] = 0
 
313
                    ret[c] += 1
 
314
                    total += 1
 
315
                i += 1
 
316
    return ret, total
 
317
 
 
318
 
 
319
def classify_key(item):
 
320
    """Sort key for item of (author, count) from classify_delta."""
 
321
    return -item[1], item[0]
 
322
 
 
323
 
 
324
def display_credits(credits, to_file):
 
325
    (coders, documenters, artists, translators) = credits
 
326
    def print_section(name, lst):
 
327
        if len(lst) == 0:
 
328
            return
 
329
        to_file.write("%s:\n" % name)
 
330
        for name in lst:
 
331
            to_file.write("%s\n" % name)
 
332
        to_file.write('\n')
 
333
    print_section("Code", coders)
 
334
    print_section("Documentation", documenters)
 
335
    print_section("Art", artists)
 
336
    print_section("Translations", translators)
 
337
 
 
338
 
 
339
def find_credits(repository, revid):
 
340
    """Find the credits of the contributors to a revision.
 
341
 
 
342
    :return: tuple with (authors, documenters, artists, translators)
 
343
    """
 
344
    ret = {"documentation": {},
 
345
           "code": {},
 
346
           "art": {},
 
347
           "translation": {},
 
348
           None: {}
 
349
           }
 
350
    with repository.lock_read():
 
351
        graph = repository.get_graph()
 
352
        ancestry = [r for (r, ps) in graph.iter_ancestry([revid])
 
353
                    if ps is not None and r != NULL_REVISION]
 
354
        revs = repository.get_revisions(ancestry)
 
355
        with ui.ui_factory.nested_progress_bar() as pb:
 
356
            iterator = zip(revs, repository.get_deltas_for_revisions(revs))
 
357
            for i, (rev, delta) in enumerate(iterator):
 
358
                pb.update("analysing revisions", i, len(revs))
 
359
                # Don't count merges
 
360
                if len(rev.parent_ids) > 1:
 
361
                    continue
 
362
                for c in set(classify_delta(delta)):
 
363
                    for author in rev.get_apparent_authors():
 
364
                        if not author in ret[c]:
 
365
                            ret[c][author] = 0
 
366
                        ret[c][author] += 1
 
367
    def sort_class(name):
 
368
        return [author
 
369
            for author, _  in sorted(ret[name].items(), key=classify_key)]
 
370
    return (sort_class("code"), sort_class("documentation"), sort_class("art"), sort_class("translation"))
 
371
 
 
372
 
 
373
class cmd_credits(commands.Command):
 
374
    """Determine credits for LOCATION."""
 
375
 
 
376
    takes_args = ['location?']
 
377
    takes_options = ['revision']
 
378
 
 
379
    encoding_type = 'replace'
 
380
 
 
381
    def run(self, location='.', revision=None):
 
382
        try:
 
383
            wt = workingtree.WorkingTree.open_containing(location)[0]
 
384
        except errors.NoWorkingTree:
 
385
            a_branch = branch.Branch.open(location)
 
386
            last_rev = a_branch.last_revision()
 
387
        else:
 
388
            a_branch = wt.branch
 
389
            last_rev = wt.last_revision()
 
390
 
 
391
        if revision is not None:
 
392
            last_rev = revision[0].in_history(a_branch).rev_id
 
393
 
 
394
        with a_branch.lock_read():
 
395
            credits = find_credits(a_branch.repository, last_rev)
 
396
            display_credits(credits, self.outf)