/brz/remove-bazaar : contents of __init_

: (revision 0.140.14)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	1	"""A Simple bzr plugin to generate statistics about the history."""
	2
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	3	import re
	4
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	5	from bzrlib.lazy_import import lazy_import
	6	lazy_import(globals(), """
	7	from bzrlib import (
	8	branch,
	9	commands,
	10	config,
	11	errors,
	12	tsort,
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	13	ui,
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	14	workingtree,
	15	)
	16	""")
	17	from bzrlib import lazy_regex
	18
	19
	20	_fullname_re = lazy_regex.lazy_compile(r'(?P<fullname>.?)\s<')
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	21
	22	def extract_fullname(committer):
	23	"""Try to get the user's name from their committer info."""
	24	m = _fullname_re.match(committer)
	25	if m:
	26	return m.group('fullname')
	27	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	28	email = config.extract_email_address(committer)
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	29	except errors.BzrError:
	30	return committer
	31	else:
	32	# We found an email address, but not a fullname
	33	# so there is no fullname
	34	return ''
	35
	36
	37	def find_fullnames(lst):
	38	"""Find the fullnames for a list committer names."""
	39
	40	counts = {}
	41	for committer in lst:
	42	fullname = extract_fullname(committer)
	43	counts.setdefault(fullname, 0)
	44	counts[fullname] += 1
	45	return sorted(((count, name) for name,count in counts.iteritems()), reverse=True)
	46
	47
	48	def collapse_by_author(committers):
	49	"""The committers list is sorted by email, fix it up by author.
	50
	51	Some people commit with a similar username, but different email
	52	address. Which makes it hard to sort out when they have multiple
	53	entries. Email is actually more stable, though, since people
	54	frequently forget to set their name properly.
	55
	56	So take the most common username for each email address, and
	57	combine them into one new list.
	58	"""
	59	# Just an indirection so that multiple names can reference
	60	# the same record information
	61	name_to_counter = {}
	62	# indirection back to real information
	63	# [[full_rev_list], {email:count}, {fname:count}]
	64	counter_to_info = {}
	65	counter = 0
	66	for email, revs in committers.iteritems():
	67	fullnames = find_fullnames(rev.committer for rev in revs)
	68	match = None
	69	for count, fullname in fullnames:
	70	if fullname and fullname in name_to_counter:
	71	# We found a match
	72	match = name_to_counter[fullname]
	73	break
	74
	75	if match:
	76	# One of the names matched, we need to collapse to records
	77	record = counter_to_info[match]
	78	record[0].extend(revs)
	79	record[1][email] = len(revs)
	80	for count, fullname in fullnames:
	81	name_to_counter[fullname] = match
	82	record[2].setdefault(fullname, 0)
	83	record[2][fullname] += count
	84	else:
	85	# just add this one to the list
	86	counter += 1
	87	for count, fullname in fullnames:
	88	if fullname:
	89	name_to_counter[fullname] = counter
	90	fname_map = dict((fullname, count) for count, fullname in fullnames)
	91	counter_to_info[counter] = [revs, {email:len(revs)}, fname_map]
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	92	return sorted(((len(revs), revs, email, fname)
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	93	for revs, email, fname in counter_to_info.values()), reverse=True)
	94
	95
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	96	def sort_by_committer(a_repo, revids):
	97	committers = {}
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	98	pb = ui.ui_factory.nested_progress_bar()
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	99	try:
	100	pb.note('getting revisions')
	101	revisions = a_repo.get_revisions(revids)
	102	for count, rev in enumerate(revisions):
	103	pb.update('checking', count, len(revids))
	104	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	105	email = config.extract_email_address(rev.committer)
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	106	except errors.BzrError:
	107	email = rev.committer
	108	committers.setdefault(email, []).append(rev)
	109	finally:
	110	pb.finished()
	111
	112	return committers
	113
	114
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	115	def get_info(a_repo, revision):
	116	"""Get all of the information for a particular revision"""
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	117	pb = ui.ui_factory.nested_progress_bar()
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	118	a_repo.lock_read()
	119	try:
	120	pb.note('getting ancestry')
	121	ancestry = a_repo.get_ancestry(revision)[1:]
	122
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	123	committers = sort_by_committer(a_repo, ancestry)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	124	finally:
	125	a_repo.unlock()
	126	pb.finished()
	127
0.142.2 by Jelmer Vernooij Split out functionality that sorts revids by commmitter.	128	return collapse_by_author(committers)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	129
	130
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	131	def get_diff_info(a_repo, start_rev, end_rev):
	132	"""Get only the info for new revisions between the two revisions
	133
	134	This lets us figure out what has actually changed between 2 revisions.
	135	"""
0.144.1 by Wesley J. Landaker Added ui to bzrlib lazy imports.	136	pb = ui.ui_factory.nested_progress_bar()
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	137	committers = {}
	138	a_repo.lock_read()
	139	try:
	140	pb.note('getting ancestry 1')
	141	start_ancestry = set(a_repo.get_ancestry(start_rev))
	142	pb.note('getting ancestry 2')
	143	ancestry = a_repo.get_ancestry(end_rev)[1:]
	144	ancestry = [rev for rev in ancestry if rev not in start_ancestry]
	145	pb.note('getting revisions')
	146	revisions = a_repo.get_revisions(ancestry)
	147
	148	for count, rev in enumerate(revisions):
	149	pb.update('checking', count, len(ancestry))
	150	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	151	email = config.extract_email_address(rev.committer)
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	152	except errors.BzrError:
	153	email = rev.committer
	154	committers.setdefault(email, []).append(rev)
	155	finally:
	156	a_repo.unlock()
	157	pb.finished()
	158
	159	info = collapse_by_author(committers)
	160	return info
	161
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	162	def display_info(info, to_file):
	163	"""Write out the information"""
	164
	165	for count, revs, emails, fullnames in info:
	166	# Get the most common email name
	167	sorted_emails = sorted(((count, email)
	168	for email,count in emails.iteritems()),
	169	reverse=True)
	170	sorted_fullnames = sorted(((count, fullname)
	171	for fullname,count in fullnames.iteritems()),
	172	reverse=True)
	173	to_file.write('%4d %s <%s>\n'
	174	% (count, sorted_fullnames[0][1],
	175	sorted_emails[0][1]))
	176	if len(sorted_fullnames) > 1:
	177	print ' Other names:'
	178	for count, fname in sorted_fullnames[1:]:
	179	to_file.write(' %4d ' % (count,))
	180	if fname == '':
	181	to_file.write("''\n")
	182	else:
	183	to_file.write("%s\n" % (fname,))
	184	if len(sorted_emails) > 1:
	185	print ' Other email addresses:'
	186	for count, email in sorted_emails:
	187	to_file.write(' %4d ' % (count,))
	188	if email == '':
	189	to_file.write("''\n")
	190	else:
	191	to_file.write("%s\n" % (email,))
	192
	193
0.140.14 by Jelmer Vernooij Merge upstream.	194	class cmd_committer_statistics(commands.Command):
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	195	"""Generate statistics for LOCATION."""
	196
0.140.12 by Jelmer Vernooij Change name to committer-stats, to allow for other sorts of stats too.	197	aliases = ['stats', 'committer-stats']
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	198	takes_args = ['location?']
0.140.5 by John Arbash Meinel Allow to specify an exact revision to generate ancestry.	199	takes_options = ['revision']
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	200
0.140.3 by John Arbash Meinel Updated to combine by author name, as well as by email address, and report on multiple names/addresses	201	encoding_type = 'replace'
	202
0.140.5 by John Arbash Meinel Allow to specify an exact revision to generate ancestry.	203	def run(self, location='.', revision=None):
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	204	alternate_rev = None
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	205	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	206	wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	207	except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	208	a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	209	last_rev = a_branch.last_revision()
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	210	else:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	211	a_branch = wt.branch
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	212	last_rev = wt.last_revision()
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	213
0.140.8 by John Arbash Meinel Allow branch: to work, which needs a write lock	214	if revision is not None:
	215	last_rev = revision[0].in_history(a_branch).rev_id
	216	if len(revision) > 1:
	217	alternate_rev = revision[1].in_history(a_branch).rev_id
	218
0.140.7 by John Arbash Meinel Compute the revisions using a difference check	219	a_branch.lock_read()
	220	try:
	221	if alternate_rev:
	222	info = get_diff_info(a_branch.repository, last_rev,
	223	alternate_rev)
	224	else:
	225	info = get_info(a_branch.repository, last_rev)
	226	finally:
	227	a_branch.unlock()
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	228	display_info(info, self.outf)
0.140.1 by John Arbash Meinel A simple plugin for generating author statistics, may grow into more.	229
	230
0.140.14 by Jelmer Vernooij Merge upstream.	231	commands.register_command(cmd_committer_statistics)
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	232
	233
	234	class cmd_ancestor_growth(commands.Command):
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	235	"""Figure out the ancestor graph for LOCATION"""
	236
	237	takes_args = ['location?']
	238
	239	encoding_type = 'replace'
	240
	241	def run(self, location='.'):
	242	try:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	243	wt = workingtree.WorkingTree.open_containing(location)[0]
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	244	except errors.NoWorkingTree:
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	245	a_branch = branch.Branch.open(location)
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	246	last_rev = a_branch.last_revision()
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	247	else:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	248	a_branch = wt.branch
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	249	last_rev = wt.last_revision()
	250
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	251	a_branch.lock_read()
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	252	try:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	253	graph = a_branch.repository.get_revision_graph(last_rev)
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	254	finally:
0.140.6 by John Arbash Meinel refactor in preparation for supporting 2 revision specs	255	a_branch.unlock()
0.140.4 by John Arbash Meinel added ancestry_growth to generate a csv of ancestors.	256
	257	revno = 0
	258	cur_parents = 0
	259	sorted_graph = tsort.merge_sort(graph.iteritems(), last_rev)
	260	for num, node_name, depth, isend in reversed(sorted_graph):
	261	cur_parents += 1
	262	if depth == 0:
	263	revno += 1
	264	self.outf.write('%4d, %4d\n' % (revno, cur_parents))
	265
	266
0.143.1 by John Arbash Meinel Make a lot of imports lazy since they may not actually be used.	267	commands.register_command(cmd_ancestor_growth)
0.141.1 by Jelmer Vernooij Add some simple tests for extract_fullname.	268
0.140.10 by John Arbash Meinel Minor whitespace cleanup	269
0.141.1 by Jelmer Vernooij Add some simple tests for extract_fullname.	270	def test_suite():
	271	from unittest import TestSuite
	272	from bzrlib.tests import TestLoader
	273	import test_stats
	274	suite = TestSuite()
	275	loader = TestLoader()
	276	testmod_names = ['test_stats']
	277	suite.addTest(loader.loadTestsFromModuleNames(['%s.%s' % (__name__, i) for i in testmod_names]))
	278	return suite
	279