/brz/remove-bazaar : revision 0.76.2

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to processors/info_processor.py

Committer: Ian Clatworthy
Date: 2009-02-17 23:37:24 UTC
mto: (0.64.114 trunk)
mto: This revision was merged to the branch mainline in revision 6631.
Revision ID: ian.clatworthy@canonical.com-20090217233724-y6q12cyoyln6vkh6

code & tests for file copying

files added:
.bzrignore

COPYING.txt

NEWS

README.txt

__init__.py

commands.py

dates.py

doc/notes.txt

errors.py

exporters

exporters/Makefile

exporters/bzr-fast-export

exporters/bzr-fast-export.LICENSE

exporters/bzr-fast-export.README

exporters/hg-fast-export.README

exporters/hg-fast-export.py

exporters/hg-fast-export.sh

exporters/hg2git.py

exporters/svn-archive.c

exporters/svn-fast-export.c

exporters/svn-fast-export.py

helpers.py

idmapfile.py

parser.py

processor.py

processors

processors/__init__.py

processors/generic_processor.py

processors/info_processor.py

processors/query_processor.py

revisionloader.py

setup.py

tests

tests/__init__.py

tests/test_errors.py

tests/test_generic_processor.py

tests/test_parser.py

files removed:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

generate_docs.py

notes

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

Show diffs side-by-side

added added

removed removed

processors/info_processor.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Import processor that dump stats about the input (and doesn't import)."""

from bzrlib.trace import (

note,

warning,

)

from bzrlib.plugins.fastimport import (

commands,

helpers,

processor,

)

class InfoProcessor(processor.ImportProcessor):

"""An import processor that dumps statistics about the input.

No changes to the current repository are made.

As well as providing useful information about an import

stream before importing it, this processor is useful for

benchmarking the speed at which data can be extracted from

the source.

"""

def __init__(self, target=None, params=None, verbose=False):

# Allow creation without a target

processor.ImportProcessor.__init__(self, target, params, verbose)

def pre_process(self):

# Init statistics

self.cmd_counts = {}

for cmd in commands.COMMAND_NAMES:

self.cmd_counts[cmd] = 0

self.file_cmd_counts = {}

for fc in commands.FILE_COMMAND_NAMES:

self.file_cmd_counts[fc] = 0

self.parent_counts = {}

self.max_parent_count = 0

self.committers = set()

self.separate_authors_found = False

self.symlinks_found = False

self.executables_found = False

self.sha_blob_references = False

self.lightweight_tags = 0

self.named_branches = []

# Blob usage tracking

self.blobs = {}

for usage in ['new', 'used', 'multi', 'unknown', 'unmarked']:

self.blobs[usage] = set()

# Head tracking: map of commit mark to ref

self.heads = {}

self.last_ids = {}

def post_process(self):

# Dump statistics

cmd_names = commands.COMMAND_NAMES

fc_names = commands.FILE_COMMAND_NAMES

cmd_values = [self.cmd_counts[c] for c in cmd_names]

fc_values = [self.file_cmd_counts[c] for c in fc_names]

self._dump_stats_group("Command counts", cmd_names, cmd_values, str)

self._dump_stats_group("File command counts", fc_names, fc_values, str)

# Commit stats

if self.cmd_counts['commit']:

p_names = []

p_values = []

for i in xrange(0, self.max_parent_count + 1):

if i in self.parent_counts:

count = self.parent_counts[i]

p_names.append("parents-%d" % i)

p_values.append(count)

flags = {

'separate authors found': self.separate_authors_found,

'executables': self.executables_found,

'symlinks': self.symlinks_found,

'blobs referenced by SHA': self.sha_blob_references,

}

self._dump_stats_group("Parent counts", p_names, p_values, str)

self._dump_stats_group("Commit analysis", flags.keys(),

flags.values(), _found)

heads = helpers.invert_dict(self.heads)

self._dump_stats_group("Head analysis", heads.keys(),

100

heads.values(), None, _iterable_as_config_list)

101

# note("\t%d\t%s" % (len(self.committers), 'unique committers'))

102

103

# Blob stats

104

if self.cmd_counts['blob']:

105

# In verbose mode, don't list every blob used

106

if self.verbose:

107

del self.blobs['used']

108

self._dump_stats_group("Blob usage tracking", self.blobs.keys(),

109

self.blobs.values(), len, _iterable_as_config_list)

110

111

# Other stats

112

if self.cmd_counts['reset']:

113

reset_stats = {

114

'lightweight tags': self.lightweight_tags,

115

'other resets': self.named_branches,

116

}

117

self._dump_stats_group("Reset analysis", reset_stats.keys(),

118

reset_stats.values())

119

120

def _dump_stats_group(self, title, names, values, normal_formatter=None,

121

verbose_formatter=None):

122

"""Dump a statistics group.

123

124

In verbose mode, do so as a config file so

125

that other processors can load the information if they want to.

126

:param normal_formatter: the callable to apply to the value

127

before displaying it in normal mode

128

:param verbose_formatter: the callable to apply to the value

129

before displaying it in verbose mode

130

"""

131

if self.verbose:

132

print "[%s]" % (title,)

133

for name, value in zip(names, values):

134

if verbose_formatter is not None:

135

value = verbose_formatter(value)

136

print "%s = %s" % (name.replace(' ', '-'),value)

137

print ""

138

else:

139

print "%s:" % (title,)

140

for name, value in zip(names, values):

141

if normal_formatter is not None:

142

value = normal_formatter(value)

143

print "\t%s\t%s" % (value,name)

144

145

def progress_handler(self, cmd):

146

"""Process a ProgressCommand."""

147

self.cmd_counts[cmd.name] += 1

148

149

def blob_handler(self, cmd):

150

"""Process a BlobCommand."""

151

self.cmd_counts[cmd.name] += 1

152

if cmd.mark is None:

153

self.blobs['unmarked'].add(cmd.id)

154

else:

155

self.blobs['new'].add(cmd.id)

156

# Marks can be re-used so remove it from used if already there.

157

# Note: we definitely do NOT want to remove it from multi if

158

# it's already in that set.

159

try:

160

self.blobs['used'].remove(cmd.id)

161

except KeyError:

162

pass

163

164

def checkpoint_handler(self, cmd):

165

"""Process a CheckpointCommand."""

166

self.cmd_counts[cmd.name] += 1

167

168

def commit_handler(self, cmd):

169

"""Process a CommitCommand."""

170

self.cmd_counts[cmd.name] += 1

171

self.committers.add(cmd.committer)

172

if cmd.author is not None:

173

self.separate_authors_found = True

174

for fc in cmd.file_iter():

175

self.file_cmd_counts[fc.name] += 1

176

if isinstance(fc, commands.FileModifyCommand):

177

if fc.is_executable:

178

self.executables_found = True

179

if fc.kind == commands.SYMLINK_KIND:

180

self.symlinks_found = True

181

if fc.dataref is not None:

182

if fc.dataref[0] == ':':

183

self._track_blob(fc.dataref)

184

else:

185

self.sha_blob_references = True

186

# Track the heads

187

if cmd.from_ is not None:

188

parents = [cmd.from_]

189

else:

190

last_id = self.last_ids.get(cmd.ref)

191

if last_id is not None:

192

parents = [last_id]

193

else:

194

parents = []

195

parents.extend(cmd.merges)

196

for parent in parents:

197

try:

198

del self.heads[parent]

199

except KeyError:

200

# it's ok if the parent isn't there - another

201

# commit may have already removed it

202

pass

203

self.heads[cmd.id] = cmd.ref

204

self.last_ids[cmd.ref] = cmd.id

205

parent_count = len(parents)

206

if self.parent_counts.has_key(parent_count):

207

self.parent_counts[parent_count] += 1

208

else:

209

self.parent_counts[parent_count] = 1

210

if parent_count > self.max_parent_count:

211

self.max_parent_count = parent_count

212

213

def reset_handler(self, cmd):

214

"""Process a ResetCommand."""

215

self.cmd_counts[cmd.name] += 1

216

if cmd.ref.startswith('refs/tags/'):

217

self.lightweight_tags += 1

218

else:

219

self.named_branches.append(cmd.ref)

220

221

def tag_handler(self, cmd):

222

"""Process a TagCommand."""

223

self.cmd_counts[cmd.name] += 1

224

225

def _track_blob(self, mark):

226

if mark in self.blobs['multi']:

227

pass

228

elif mark in self.blobs['used']:

229

self.blobs['multi'].add(mark)

230

self.blobs['used'].remove(mark)

231

elif mark in self.blobs['new']:

232

self.blobs['used'].add(mark)

233

self.blobs['new'].remove(mark)

234

else:

235

self.blobs['unknown'].add(mark)

236

237

def _found(b):

238

"""Format a found boolean as a string."""

239

return ['no', 'found'][b]

240

241

def _iterable_as_config_list(s):

242

"""Format an iterable as a sequence of comma-separated strings.

243

244

To match what ConfigObj expects, a single item list has a trailing comma.

245

"""

246

items = sorted(s)

247

if len(items) == 1:

248

return "%s," % (items[0],)

249

else:

250

return ", ".join(items)

Older »