bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
1 |
# Copyright (C) 2008 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Import processor that dump stats about the input (and doesn't import)."""
|
|
18 |
||
19 |
||
20 |
from bzrlib.trace import ( |
|
21 |
note, |
|
22 |
warning, |
|
23 |
)
|
|
24 |
from bzrlib.plugins.fastimport import ( |
|
|
0.83.2
by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import |
25 |
cache_manager, |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
26 |
commands, |
|
0.64.30
by Ian Clatworthy
add heads analysis to info processor |
27 |
helpers, |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
28 |
processor, |
29 |
)
|
|
30 |
||
31 |
||
32 |
class InfoProcessor(processor.ImportProcessor): |
|
33 |
"""An import processor that dumps statistics about the input. |
|
34 |
||
35 |
No changes to the current repository are made.
|
|
36 |
||
37 |
As well as providing useful information about an import
|
|
38 |
stream before importing it, this processor is useful for
|
|
39 |
benchmarking the speed at which data can be extracted from
|
|
40 |
the source.
|
|
41 |
"""
|
|
42 |
||
|
0.64.212
by Ian Clatworthy
configrable output stream for InfoProcessor |
43 |
def __init__(self, target=None, params=None, verbose=0, outf=None): |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
44 |
# Allow creation without a target
|
|
0.64.212
by Ian Clatworthy
configrable output stream for InfoProcessor |
45 |
processor.ImportProcessor.__init__(self, target, params, verbose, |
46 |
outf=outf) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
47 |
|
48 |
def pre_process(self): |
|
|
0.64.213
by Ian Clatworthy
Smarter blob tracking by implicitly collecting statistics before starting the import |
49 |
self.note("Collecting statistics ...") |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
50 |
# Init statistics
|
51 |
self.cmd_counts = {} |
|
52 |
for cmd in commands.COMMAND_NAMES: |
|
53 |
self.cmd_counts[cmd] = 0 |
|
54 |
self.file_cmd_counts = {} |
|
55 |
for fc in commands.FILE_COMMAND_NAMES: |
|
56 |
self.file_cmd_counts[fc] = 0 |
|
57 |
self.parent_counts = {} |
|
|
0.64.105
by Ian Clatworthy
handle > 16 parents in fast-import-info |
58 |
self.max_parent_count = 0 |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
59 |
self.committers = set() |
60 |
self.separate_authors_found = False |
|
61 |
self.symlinks_found = False |
|
62 |
self.executables_found = False |
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
63 |
self.sha_blob_references = False |
|
0.64.12
by Ian Clatworthy
lightweight tags, filter processor and param validation |
64 |
self.lightweight_tags = 0 |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
65 |
# Blob usage tracking
|
66 |
self.blobs = {} |
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
67 |
for usage in ['new', 'used', 'unknown', 'unmarked']: |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
68 |
self.blobs[usage] = set() |
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
69 |
self.blob_ref_counts = {} |
|
0.83.2
by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import |
70 |
# Head tracking - delegate to the cache manager
|
71 |
self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0) |
|
|
0.64.127
by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required |
72 |
# Stuff to cache: a map from mark to # of times that mark is merged
|
|
0.81.11
by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths |
73 |
self.merges = {} |
|
0.64.127
by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required |
74 |
# Stuff to cache: these are maps from mark to sets
|
|
0.81.11
by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths |
75 |
self.rename_old_paths = {} |
76 |
self.copy_source_paths = {} |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
77 |
|
78 |
def post_process(self): |
|
79 |
# Dump statistics
|
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
80 |
cmd_names = commands.COMMAND_NAMES |
81 |
fc_names = commands.FILE_COMMAND_NAMES |
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
82 |
self._dump_stats_group("Command counts", |
83 |
[(c, self.cmd_counts[c]) for c in cmd_names], str) |
|
84 |
self._dump_stats_group("File command counts", |
|
85 |
[(c, self.file_cmd_counts[c]) for c in fc_names], str) |
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
86 |
|
87 |
# Commit stats
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
88 |
if self.cmd_counts['commit']: |
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
89 |
p_items = [] |
|
0.64.105
by Ian Clatworthy
handle > 16 parents in fast-import-info |
90 |
for i in xrange(0, self.max_parent_count + 1): |
91 |
if i in self.parent_counts: |
|
92 |
count = self.parent_counts[i] |
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
93 |
p_items.append(("parents-%d" % i, count)) |
|
0.64.127
by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required |
94 |
merges_count = len(self.merges.keys()) |
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
95 |
p_items.append(('total revisions merged', merges_count)) |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
96 |
flags = { |
97 |
'separate authors found': self.separate_authors_found, |
|
98 |
'executables': self.executables_found, |
|
99 |
'symlinks': self.symlinks_found, |
|
100 |
'blobs referenced by SHA': self.sha_blob_references, |
|
101 |
}
|
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
102 |
self._dump_stats_group("Parent counts", p_items, str) |
103 |
self._dump_stats_group("Commit analysis", flags.iteritems(), _found) |
|
|
0.83.2
by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import |
104 |
heads = helpers.invert_dictset(self.cache_mgr.heads) |
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
105 |
self._dump_stats_group("Head analysis", heads.iteritems(), None, |
106 |
_iterable_as_config_list) |
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
107 |
# note("\t%d\t%s" % (len(self.committers), 'unique committers'))
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
108 |
self._dump_stats_group("Merges", self.merges.iteritems(), None) |
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
109 |
# We only show the rename old path and copy source paths when -vv
|
110 |
# (verbose=2) is specified. The output here for mysql's data can't
|
|
111 |
# be parsed currently so this bit of code needs more work anyhow ..
|
|
112 |
if self.verbose >= 2: |
|
113 |
self._dump_stats_group("Rename old paths", |
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
114 |
self.rename_old_paths.iteritems(), len, |
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
115 |
_iterable_as_config_list) |
116 |
self._dump_stats_group("Copy source paths", |
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
117 |
self.copy_source_paths.iteritems(), len, |
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
118 |
_iterable_as_config_list) |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
119 |
|
120 |
# Blob stats
|
|
121 |
if self.cmd_counts['blob']: |
|
122 |
# In verbose mode, don't list every blob used
|
|
123 |
if self.verbose: |
|
124 |
del self.blobs['used'] |
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
125 |
self._dump_stats_group("Blob usage tracking", |
126 |
self.blobs.iteritems(), len, _iterable_as_config_list) |
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
127 |
if self.blob_ref_counts: |
128 |
blobs_by_count = helpers.invert_dict(self.blob_ref_counts) |
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
129 |
blob_items = blobs_by_count.items() |
130 |
blob_items.sort() |
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
131 |
self._dump_stats_group("Blob reference counts", |
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
132 |
blob_items, len, _iterable_as_config_list) |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
133 |
|
134 |
# Other stats
|
|
|
0.64.12
by Ian Clatworthy
lightweight tags, filter processor and param validation |
135 |
if self.cmd_counts['reset']: |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
136 |
reset_stats = { |
137 |
'lightweight tags': self.lightweight_tags, |
|
138 |
}
|
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
139 |
self._dump_stats_group("Reset analysis", reset_stats.iteritems()) |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
140 |
|
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
141 |
def _dump_stats_group(self, title, items, normal_formatter=None, |
|
0.64.24
by Ian Clatworthy
smart blob caching using analysis done by --info |
142 |
verbose_formatter=None): |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
143 |
"""Dump a statistics group. |
144 |
|
|
145 |
In verbose mode, do so as a config file so
|
|
146 |
that other processors can load the information if they want to.
|
|
|
0.64.24
by Ian Clatworthy
smart blob caching using analysis done by --info |
147 |
:param normal_formatter: the callable to apply to the value
|
148 |
before displaying it in normal mode
|
|
149 |
:param verbose_formatter: the callable to apply to the value
|
|
150 |
before displaying it in verbose mode
|
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
151 |
"""
|
152 |
if self.verbose: |
|
|
0.64.212
by Ian Clatworthy
configrable output stream for InfoProcessor |
153 |
self.outf.write("[%s]\n" % (title,)) |
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
154 |
for name, value in items: |
|
0.64.24
by Ian Clatworthy
smart blob caching using analysis done by --info |
155 |
if verbose_formatter is not None: |
156 |
value = verbose_formatter(value) |
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
157 |
if type(name) == str: |
158 |
name = name.replace(' ', '-') |
|
|
0.64.212
by Ian Clatworthy
configrable output stream for InfoProcessor |
159 |
self.outf.write("%s = %s\n" % (name, value)) |
160 |
self.outf.write("\n") |
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
161 |
else: |
|
0.64.212
by Ian Clatworthy
configrable output stream for InfoProcessor |
162 |
self.outf.write("%s:\n" % (title,)) |
|
0.115.1
by John Arbash Meinel
Change (.keys(), .values()) to using (.iteritems()) |
163 |
for name, value in items: |
|
0.64.24
by Ian Clatworthy
smart blob caching using analysis done by --info |
164 |
if normal_formatter is not None: |
165 |
value = normal_formatter(value) |
|
|
0.64.212
by Ian Clatworthy
configrable output stream for InfoProcessor |
166 |
self.outf.write("\t%s\t%s\n" % (value, name)) |
|
0.64.9
by Ian Clatworthy
dump parameter for info processor |
167 |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
168 |
def progress_handler(self, cmd): |
169 |
"""Process a ProgressCommand.""" |
|
170 |
self.cmd_counts[cmd.name] += 1 |
|
171 |
||
172 |
def blob_handler(self, cmd): |
|
173 |
"""Process a BlobCommand.""" |
|
174 |
self.cmd_counts[cmd.name] += 1 |
|
|
0.64.35
by Ian Clatworthy
identify unmarked blobs and commits by line numbers |
175 |
if cmd.mark is None: |
176 |
self.blobs['unmarked'].add(cmd.id) |
|
177 |
else: |
|
178 |
self.blobs['new'].add(cmd.id) |
|
|
0.64.39
by Ian Clatworthy
fix blob tracking when marks are reused |
179 |
# Marks can be re-used so remove it from used if already there.
|
180 |
# Note: we definitely do NOT want to remove it from multi if
|
|
181 |
# it's already in that set.
|
|
182 |
try: |
|
183 |
self.blobs['used'].remove(cmd.id) |
|
184 |
except KeyError: |
|
185 |
pass
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
186 |
|
187 |
def checkpoint_handler(self, cmd): |
|
188 |
"""Process a CheckpointCommand.""" |
|
189 |
self.cmd_counts[cmd.name] += 1 |
|
190 |
||
191 |
def commit_handler(self, cmd): |
|
192 |
"""Process a CommitCommand.""" |
|
193 |
self.cmd_counts[cmd.name] += 1 |
|
194 |
self.committers.add(cmd.committer) |
|
195 |
if cmd.author is not None: |
|
196 |
self.separate_authors_found = True |
|
197 |
for fc in cmd.file_iter(): |
|
198 |
self.file_cmd_counts[fc.name] += 1 |
|
199 |
if isinstance(fc, commands.FileModifyCommand): |
|
200 |
if fc.is_executable: |
|
201 |
self.executables_found = True |
|
202 |
if fc.kind == commands.SYMLINK_KIND: |
|
203 |
self.symlinks_found = True |
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
204 |
if fc.dataref is not None: |
205 |
if fc.dataref[0] == ':': |
|
|
0.64.24
by Ian Clatworthy
smart blob caching using analysis done by --info |
206 |
self._track_blob(fc.dataref) |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
207 |
else: |
208 |
self.sha_blob_references = True |
|
|
0.81.11
by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths |
209 |
elif isinstance(fc, commands.FileRenameCommand): |
|
0.64.127
by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required |
210 |
self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path) |
|
0.81.11
by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths |
211 |
elif isinstance(fc, commands.FileCopyCommand): |
|
0.64.127
by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required |
212 |
self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path) |
|
0.81.11
by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths |
213 |
|
|
0.64.30
by Ian Clatworthy
add heads analysis to info processor |
214 |
# Track the heads
|
|
0.83.2
by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import |
215 |
parents = self.cache_mgr.track_heads(cmd) |
|
0.64.127
by Ian Clatworthy
add total merges to fast-import-info & record when old/src paths no longer required |
216 |
|
217 |
# Track the parent counts
|
|
|
0.64.105
by Ian Clatworthy
handle > 16 parents in fast-import-info |
218 |
parent_count = len(parents) |
219 |
if self.parent_counts.has_key(parent_count): |
|
220 |
self.parent_counts[parent_count] += 1 |
|
221 |
else: |
|
222 |
self.parent_counts[parent_count] = 1 |
|
223 |
if parent_count > self.max_parent_count: |
|
224 |
self.max_parent_count = parent_count |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
225 |
|
|
0.81.11
by Ian Clatworthy
extend fast-import-info with merges, rename old paths & copy source paths |
226 |
# Remember the merges
|
227 |
if cmd.merges: |
|
228 |
#self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
|
|
229 |
for merge in cmd.merges: |
|
230 |
if merge in self.merges: |
|
231 |
self.merges[merge] += 1 |
|
232 |
else: |
|
233 |
self.merges[merge] = 1 |
|
234 |
||
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
235 |
def reset_handler(self, cmd): |
236 |
"""Process a ResetCommand.""" |
|
237 |
self.cmd_counts[cmd.name] += 1 |
|
|
0.64.12
by Ian Clatworthy
lightweight tags, filter processor and param validation |
238 |
if cmd.ref.startswith('refs/tags/'): |
239 |
self.lightweight_tags += 1 |
|
240 |
else: |
|
|
0.83.2
by Ian Clatworthy
update fast-import-info to use same head-tracking code as fast-import |
241 |
if cmd.from_ is not None: |
242 |
self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
243 |
|
244 |
def tag_handler(self, cmd): |
|
245 |
"""Process a TagCommand.""" |
|
246 |
self.cmd_counts[cmd.name] += 1 |
|
247 |
||
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
248 |
def feature_handler(self, cmd): |
249 |
"""Process a FeatureCommand.""" |
|
250 |
self.cmd_counts[cmd.name] += 1 |
|
|
0.102.11
by Ian Clatworthy
Validate features are known before importing |
251 |
feature = cmd.feature_name |
252 |
if feature not in commands.FEATURE_NAMES: |
|
253 |
self.warning("feature %s is not supported - parsing may fail" |
|
254 |
% (feature,)) |
|
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
255 |
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
256 |
def _track_blob(self, mark): |
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
257 |
if mark in self.blob_ref_counts: |
258 |
self.blob_ref_counts[mark] += 1 |
|
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
259 |
pass
|
260 |
elif mark in self.blobs['used']: |
|
|
0.64.168
by Ian Clatworthy
blob reference counting, not just sticky vs otherwise |
261 |
self.blob_ref_counts[mark] = 2 |
|
0.64.23
by Ian Clatworthy
blob tracking analysis and verbose mode for info processor |
262 |
self.blobs['used'].remove(mark) |
263 |
elif mark in self.blobs['new']: |
|
264 |
self.blobs['used'].add(mark) |
|
265 |
self.blobs['new'].remove(mark) |
|
266 |
else: |
|
267 |
self.blobs['unknown'].add(mark) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
268 |
|
269 |
def _found(b): |
|
270 |
"""Format a found boolean as a string.""" |
|
271 |
return ['no', 'found'][b] |
|
|
0.64.24
by Ian Clatworthy
smart blob caching using analysis done by --info |
272 |
|
|
0.64.30
by Ian Clatworthy
add heads analysis to info processor |
273 |
def _iterable_as_config_list(s): |
274 |
"""Format an iterable as a sequence of comma-separated strings. |
|
|
0.64.24
by Ian Clatworthy
smart blob caching using analysis done by --info |
275 |
|
276 |
To match what ConfigObj expects, a single item list has a trailing comma.
|
|
277 |
"""
|
|
278 |
items = sorted(s) |
|
279 |
if len(items) == 1: |
|
280 |
return "%s," % (items[0],) |
|
281 |
else: |
|
282 |
return ", ".join(items) |