/brz/remove-bazaar : contents of breezy/plugins/fastimport/processors/info

: (revision 6830)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	1	# Copyright (C) 2008 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	15
	16	"""Import processor that dump stats about the input (and doesn't import)."""
	17
6628.1.2 by Jelmer Vernooĳ Fix imports, move exporter.py, drop explorer metadata.	18	from __future__ import absolute_import
	19
	20	from .. import (
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	21	reftracker,
	22	)
6628.1.2 by Jelmer Vernooĳ Fix imports, move exporter.py, drop explorer metadata.	23	from ..helpers import (
0.139.1 by Jelmer Vernooij Import helper functions that have been removed from python-fastimport.	24	invert_dict,
	25	invert_dictset,
	26	)
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	27	from fastimport import (
	28	commands,
	29	processor,
	30	)
	31	import stat
	32
	33
	34	class InfoProcessor(processor.ImportProcessor):
	35	"""An import processor that dumps statistics about the input.
	36
	37	No changes to the current repository are made.
	38
	39	As well as providing useful information about an import
	40	stream before importing it, this processor is useful for
	41	benchmarking the speed at which data can be extracted from
	42	the source.
	43	"""
	44
	45	def __init__(self, params=None, verbose=0, outf=None):
	46	processor.ImportProcessor.__init__(self, params, verbose,
	47	outf=outf)
	48
	49	def pre_process(self):
	50	# Init statistics
	51	self.cmd_counts = {}
	52	for cmd in commands.COMMAND_NAMES:
	53	self.cmd_counts[cmd] = 0
	54	self.file_cmd_counts = {}
	55	for fc in commands.FILE_COMMAND_NAMES:
	56	self.file_cmd_counts[fc] = 0
	57	self.parent_counts = {}
	58	self.max_parent_count = 0
	59	self.committers = set()
	60	self.separate_authors_found = False
	61	self.symlinks_found = False
	62	self.executables_found = False
	63	self.sha_blob_references = False
	64	self.lightweight_tags = 0
	65	# Blob usage tracking
	66	self.blobs = {}
	67	for usage in ['new', 'used', 'unknown', 'unmarked']:
	68	self.blobs[usage] = set()
	69	self.blob_ref_counts = {}
	70	# Head tracking
	71	self.reftracker = reftracker.RefTracker()
	72	# Stuff to cache: a map from mark to # of times that mark is merged
	73	self.merges = {}
	74	# Stuff to cache: these are maps from mark to sets
	75	self.rename_old_paths = {}
	76	self.copy_source_paths = {}
	77
	78	def post_process(self):
	79	# Dump statistics
	80	cmd_names = commands.COMMAND_NAMES
	81	fc_names = commands.FILE_COMMAND_NAMES
	82	self._dump_stats_group("Command counts",
	83	[(c, self.cmd_counts[c]) for c in cmd_names], str)
	84	self._dump_stats_group("File command counts",
	85	[(c, self.file_cmd_counts[c]) for c in fc_names], str)
	86
	87	# Commit stats
	88	if self.cmd_counts['commit']:
	89	p_items = []
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	90	for i in range(self.max_parent_count + 1):
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	91	if i in self.parent_counts:
	92	count = self.parent_counts[i]
	93	p_items.append(("parents-%d" % i, count))
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	94	merges_count = len(self.merges)
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	95	p_items.append(('total revisions merged', merges_count))
	96	flags = {
	97	'separate authors found': self.separate_authors_found,
	98	'executables': self.executables_found,
	99	'symlinks': self.symlinks_found,
	100	'blobs referenced by SHA': self.sha_blob_references,
	101	}
	102	self._dump_stats_group("Parent counts", p_items, str)
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	103	self._dump_stats_group("Commit analysis", flags.items(), _found)
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	104	heads = invert_dictset(self.reftracker.heads)
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	105	self._dump_stats_group("Head analysis", heads.items(), None,
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	106	_iterable_as_config_list)
	107	# note("\t%d\t%s" % (len(self.committers), 'unique committers'))
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	108	self._dump_stats_group("Merges", self.merges.items(), None)
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	109	# We only show the rename old path and copy source paths when -vv
	110	# (verbose=2) is specified. The output here for mysql's data can't
	111	# be parsed currently so this bit of code needs more work anyhow ..
	112	if self.verbose >= 2:
	113	self._dump_stats_group("Rename old paths",
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	114	self.rename_old_paths.items(), len,
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	115	_iterable_as_config_list)
	116	self._dump_stats_group("Copy source paths",
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	117	self.copy_source_paths.items(), len,
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	118	_iterable_as_config_list)
	119
	120	# Blob stats
	121	if self.cmd_counts['blob']:
	122	# In verbose mode, don't list every blob used
	123	if self.verbose:
	124	del self.blobs['used']
	125	self._dump_stats_group("Blob usage tracking",
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	126	self.blobs.items(), len, _iterable_as_config_list)
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	127	if self.blob_ref_counts:
	128	blobs_by_count = invert_dict(self.blob_ref_counts)
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	129	blob_items = sorted(blobs_by_count.items())
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	130	self._dump_stats_group("Blob reference counts",
	131	blob_items, len, _iterable_as_config_list)
	132
	133	# Other stats
	134	if self.cmd_counts['reset']:
	135	reset_stats = {
	136	'lightweight tags': self.lightweight_tags,
	137	}
6656.1.1 by Martin Apply 2to3 dict fixer and clean up resulting mess using view helpers	138	self._dump_stats_group("Reset analysis", reset_stats.items())
0.64.349 by Jelmer Vernooij Reimport some modules removed from python-fastimport 0.9.2.	139
	140	def _dump_stats_group(self, title, items, normal_formatter=None,
	141	verbose_formatter=None):
	142	"""Dump a statistics group.
	143
	144	In verbose mode, do so as a config file so
	145	that other processors can load the information if they want to.
	146	:param normal_formatter: the callable to apply to the value
	147	before displaying it in normal mode
	148	:param verbose_formatter: the callable to apply to the value
	149	before displaying it in verbose mode
	150	"""
	151	if self.verbose:
	152	self.outf.write("[%s]\n" % (title,))
	153	for name, value in items:
	154	if verbose_formatter is not None:
	155	value = verbose_formatter(value)
	156	if type(name) == str:
	157	name = name.replace(' ', '-')
	158	self.outf.write("%s = %s\n" % (name, value))
	159	self.outf.write("\n")
	160	else:
	161	self.outf.write("%s:\n" % (title,))
	162	for name, value in items:
	163	if normal_formatter is not None:
	164	value = normal_formatter(value)
	165	self.outf.write("\t%s\t%s\n" % (value, name))
	166
	167	def progress_handler(self, cmd):
	168	"""Process a ProgressCommand."""
	169	self.cmd_counts[cmd.name] += 1
	170
	171	def blob_handler(self, cmd):
	172	"""Process a BlobCommand."""
	173	self.cmd_counts[cmd.name] += 1
	174	if cmd.mark is None:
	175	self.blobs['unmarked'].add(cmd.id)
	176	else:
	177	self.blobs['new'].add(cmd.id)
	178	# Marks can be re-used so remove it from used if already there.
	179	# Note: we definitely do NOT want to remove it from multi if
	180	# it's already in that set.
	181	try:
	182	self.blobs['used'].remove(cmd.id)
	183	except KeyError:
	184	pass
	185
	186	def checkpoint_handler(self, cmd):
	187	"""Process a CheckpointCommand."""
	188	self.cmd_counts[cmd.name] += 1
	189
	190	def commit_handler(self, cmd):
	191	"""Process a CommitCommand."""
	192	self.cmd_counts[cmd.name] += 1
	193	self.committers.add(cmd.committer)
	194	if cmd.author is not None:
	195	self.separate_authors_found = True
	196	for fc in cmd.iter_files():
	197	self.file_cmd_counts[fc.name] += 1
	198	if isinstance(fc, commands.FileModifyCommand):
	199	if fc.mode & 0111:
	200	self.executables_found = True
	201	if stat.S_ISLNK(fc.mode):
	202	self.symlinks_found = True
203	if fc.dataref is not None:
204	if fc.dataref[0] == ':':
205	self._track_blob(fc.dataref)
206	else:
207	self.sha_blob_references = True
208	elif isinstance(fc, commands.FileRenameCommand):
209	self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
210	elif isinstance(fc, commands.FileCopyCommand):
211	self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
212
213	# Track the heads
214	parents = self.reftracker.track_heads(cmd)
215
216	# Track the parent counts
217	parent_count = len(parents)
218	if self.parent_counts.has_key(parent_count):
219	self.parent_counts[parent_count] += 1
220	else:
221	self.parent_counts[parent_count] = 1
222	if parent_count > self.max_parent_count:
223	self.max_parent_count = parent_count
224
225	# Remember the merges
226	if cmd.merges:
227	#self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
228	for merge in cmd.merges:
229	if merge in self.merges:
230	self.merges[merge] += 1
231	else:
232	self.merges[merge] = 1
233
234	def reset_handler(self, cmd):
235	"""Process a ResetCommand."""
236	self.cmd_counts[cmd.name] += 1
237	if cmd.ref.startswith('refs/tags/'):
238	self.lightweight_tags += 1
239	else:
240	if cmd.from_ is not None:
241	self.reftracker.track_heads_for_ref(
242	cmd.ref, cmd.from_)
243
244	def tag_handler(self, cmd):
245	"""Process a TagCommand."""
246	self.cmd_counts[cmd.name] += 1
247
248	def feature_handler(self, cmd):
249	"""Process a FeatureCommand."""
250	self.cmd_counts[cmd.name] += 1
251	feature = cmd.feature_name
252	if feature not in commands.FEATURE_NAMES:
253	self.warning("feature %s is not supported - parsing may fail"
254	% (feature,))
255
256	def _track_blob(self, mark):
257	if mark in self.blob_ref_counts:
258	self.blob_ref_counts[mark] += 1
259	pass
260	elif mark in self.blobs['used']:
261	self.blob_ref_counts[mark] = 2
262	self.blobs['used'].remove(mark)
263	elif mark in self.blobs['new']:
264	self.blobs['used'].add(mark)
265	self.blobs['new'].remove(mark)
266	else:
267	self.blobs['unknown'].add(mark)
268
269	def _found(b):
270	"""Format a found boolean as a string."""
271	return ['no', 'found'][b]
272
273	def _iterable_as_config_list(s):
274	"""Format an iterable as a sequence of comma-separated strings.
275
276	To match what ConfigObj expects, a single item list has a trailing comma.
277	"""
278	items = sorted(s)
279	if len(items) == 1:
280	return "%s," % (items[0],)
281	else:
282	return ", ".join(items)