/brz/remove-bazaar : contents of __init_

: (revision 0.140.17)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	1	"""A Simple bzr plugin to generate statistics about the history."""
	2
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	3	import re
	4
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	5	from bzrlib.lazy_import import lazy_import
	6	lazy_import(globals(), """
	7	from bzrlib import (
	8	branch,
	9	commands,
	10	config,
	11	errors,
	12	tsort,
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	13	ui,
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	14	workingtree,
	15	)
	16	""")
	17	from bzrlib import lazy_regex
	18
	19
	20	_fullname_re = lazy_regex.lazy_compile(r'(?P<fullname>.?)\s<')
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	21
	22	def extract_fullname(committer):
	23	"""Try to get the user's name from their committer info."""
	24	m = _fullname_re.match(committer)
	25	if m:
	26	return m.group('fullname')
	27	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	28	email = config.extract_email_address(committer)
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	29	except errors.BzrError:
	30	return committer
	31	else:
	32	# We found an email address, but not a fullname
	33	# so there is no fullname
	34	return ''
	35
	36
	37	def find_fullnames(lst):
	38	"""Find the fullnames for a list committer names."""
	39
	40	counts = {}
	41	for committer in lst:
	42	fullname = extract_fullname(committer)
	43	counts.setdefault(fullname, 0)
	44	counts[fullname] += 1
	45	return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
	46
	47
0.140.16 by Jelmer Vernooij Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning	48	def collapse_by_person(committers):
	49	"""The committers list is sorted by email, fix it up by person.
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	50
	51	Some people commit with a similar username, but different email
	52	address. Which makes it hard to sort out when they have multiple
	53	entries. Email is actually more stable, though, since people
	54	frequently forget to set their name properly.
	55
	56	So take the most common username for each email address, and
	57	combine them into one new list.
	58	"""
	59	# Just an indirection so that multiple names can reference
	60	# the same record information
	61	name_to_counter = {}
	62	# indirection back to real information
	63	# [[full_rev_list], {email:count}, {fname:count}]
	64	counter_to_info = {}
	65	counter = 0
	66	for email, revs in committers.iteritems():
	67	fullnames = find_fullnames(rev.committer for rev in revs)
	68	match = None
	69	for count, fullname in fullnames:
	70	if fullname and fullname in name_to_counter:
	71	# We found a match
	72	match = name_to_counter[fullname]
	73	break
	74
	75	if match:
	76	# One of the names matched, we need to collapse to records
	77	record = counter_to_info[match]
	78	record[0].extend(revs)
	79	record[1][email] = len(revs)
	80	for count, fullname in fullnames:
	81	name_to_counter[fullname] = match
	82	record[2].setdefault(fullname, 0)
	83	record[2][fullname] += count
	84	else:
	85	# just add this one to the list
	86	counter += 1
	87	for count, fullname in fullnames:
	88	if fullname:
	89	name_to_counter[fullname] = counter
	90	fname_map = dict((fullname, count) for count, fullname in fullnames)
	91	counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	92	return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	93	for revs, email, fname in counter_to_info.values()), reverse=True)
	94
	95
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	96	def sort_by_committer(a_repo, revids):
	97	committers = {}
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	98	pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	99	try:
	100	pb.note('getting revisions')
	101	revisions = a_repo.get_revisions(revids)
	102	for count, rev in enumerate(revisions):
	103	pb.update('checking', count, len(revids))
	104	try:
0.140.17 by Jelmer Vernooij Use get_apparent_author rather than committer.	105	email = config.extract_email_address(rev.get_apparent_author())
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	106	except errors.BzrError:
0.140.17 by Jelmer Vernooij Use get_apparent_author rather than committer.	107	email = rev.get_apparent_author()
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	108	committers.setdefault(email, []).append(rev)
	109	finally:
	110	pb.finished()
	111
	112	return committers
	113
	114
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	115	def get_info(a_repo, revision):
	116	"""Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	117	pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	118	a_repo.lock_read()
	119	try:
	120	pb.note('getting ancestry')
	121	ancestry = a_repo.get_ancestry(revision)[1:]
	122
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	123	committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	124	finally:
	125	a_repo.unlock()
	126	pb.finished()
	127
0.140.16 by Jelmer Vernooij Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning	128	return collapse_by_person(committers)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	129
	130
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	131	def get_diff_info(a_repo, start_rev, end_rev):
	132	"""Get only the info for new revisions between the two revisions
	133
	134	This lets us figure out what has actually changed between 2 revisions.
	135	"""
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	136	pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	137	committers = {}
	138	a_repo.lock_read()
	139	try:
	140	pb.note('getting ancestry 1')
	141	start_ancestry = set(a_repo.get_ancestry(start_rev))
	142	pb.note('getting ancestry 2')
	143	ancestry = a_repo.get_ancestry(end_rev)[1:]
	144	ancestry = [rev for rev in ancestry if rev not in start_ancestry]
	145	pb.note('getting revisions')
	146	revisions = a_repo.get_revisions(ancestry)
	147
	148	for count, rev in enumerate(revisions):
	149	pb.update('checking', count, len(ancestry))
	150	try:
0.140.17 by Jelmer Vernooij Use get_apparent_author rather than committer.	151	email = config.extract_email_address(rev.get_apparent_author())
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	152	except errors.BzrError:
0.140.17 by Jelmer Vernooij Use get_apparent_author rather than committer.	153	email = rev.get_apparent_author()
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	154	committers.setdefault(email, []).append(rev)
	155	finally:
	156	a_repo.unlock()
	157	pb.finished()
	158
0.140.16 by Jelmer Vernooij Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning	159	info = collapse_by_person(committers)
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	160	return info
	161
0.140.16 by Jelmer Vernooij Rename collapse_by_author -> collapse_by_person since author has an unambigous meaning	162
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	163	def display_info(info, to_file):
	164	"""Write out the information"""
	165
	166	for count, revs, emails, fullnames in info:
	167	# Get the most common email name
	168	sorted_emails = sorted(((count, email)
	169	for email,count in emails.iteritems()),
	170	reverse=True)
	171	sorted_fullnames = sorted(((count, fullname)
	172	for fullname,count in fullnames.iteritems()),
	173	reverse=True)
	174	to_file.write('%4d %s <%s>\n'
	175	% (count, sorted_fullnames[0][1],
	176	sorted_emails[0][1]))
	177	if len(sorted_fullnames) > 1:
	178	print ' Other names:'
	179	for count, fname in sorted_fullnames[1:]:
	180	to_file.write(' %4d ' % (count,))
	181	if fname == '':
	182	to_file.write("''\n")
	183	else:
	184	to_file.write("%s\n" % (fname,))
	185	if len(sorted_emails) > 1:
	186	print ' Other email addresses:'
	187	for count, email in sorted_emails:
	188	to_file.write(' %4d ' % (count,))
	189	if email == '':
	190	to_file.write("''\n")
	191	else:
	192	to_file.write("%s\n" % (email,))
	193
	194
0.140.14 by Jelmer Vernooij Merge upstream.	195	class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	196	"""Generate statistics for LOCATION."""
	197
0.140.12 by Jelmer Vernooij Change name to committer-stats, to allow for other sorts of stats too.	198	aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	199	takes_args = ['location?']
0.140.5 by John Arbash Meinel Allow to specify an exact revision to generate ancestry.	200	takes_options = ['revision']
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	201
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	202	encoding_type = 'replace'
	203
0.140.5 by John Arbash Meinel Allow to specify an exact revision to generate ancestry.	204	def run(self, location='.', revision=None):
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	205	alternate_rev = None
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	206	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	207	wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	208	except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	209	a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	210	last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	211	else:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	212	a_branch = wt.branch
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	213	last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	214
0.140.8 by John Arbash Meinel Allow branch: to work, which needs a write lock	215	if revision is not None:
	216	last_rev = revision[0].in_history(a_branch).rev_id
	217	if len(revision) > 1:
	218	alternate_rev = revision[1].in_history(a_branch).rev_id
	219
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	220	a_branch.lock_read()
	221	try:
	222	if alternate_rev:
	223	info = get_diff_info(a_branch.repository, last_rev,
	224	alternate_rev)
	225	else:
	226	info = get_info(a_branch.repository, last_rev)
	227	finally:
	228	a_branch.unlock()
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	229	display_info(info, self.outf)
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	230
	231
0.140.14 by Jelmer Vernooij Merge upstream.	232	commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	233
	234
	235	class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	236	"""Figure out the ancestor graph for LOCATION"""
	237
	238	takes_args = ['location?']
	239
	240	encoding_type = 'replace'
	241
	242	def run(self, location='.'):
	243	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	244	wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	245	except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	246	a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	247	last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	248	else:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	249	a_branch = wt.branch
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	250	last_rev = wt.last_revision()
	251
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	252	a_branch.lock_read()
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	253	try:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	254	graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	255	finally:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	256	a_branch.unlock()
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	257
	258	revno = 0
	259	cur_parents = 0
	260	sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
	261	for num, node_name, depth, isend in reversed(sorted_graph):
	262	cur_parents += 1
	263	if depth == 0:
	264	revno += 1
	265	self.outf.write('%4d, %4d\n' % (revno, cur_parents))
	266
	267
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	268	commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij Add some simple tests for extract_fullname.	269
0.140.10 by John Arbash Meinel Minor whitespace cleanup	270
0.141.1 by Jelmer Vernooij Add some simple tests for extract_fullname.	271	def test_suite():
	272	from unittest import TestSuite
	273	from bzrlib.tests import TestLoader
	274	import test_stats
	275	suite = TestSuite()
	276	loader = TestLoader()
	277	testmod_names = ['test_stats']
	278	suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
	279	return suite
	280