1
# Copyright (C) 2007 Canonical Ltd
 
 
3
# This program is free software; you can redistribute it and/or modify
 
 
4
# it under the terms of the GNU General Public License as published by
 
 
5
# the Free Software Foundation; either version 2 of the License, or
 
 
6
# (at your option) any later version.
 
 
8
# This program is distributed in the hope that it will be useful,
 
 
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 
11
# GNU General Public License for more details.
 
 
13
# You should have received a copy of the GNU General Public License
 
 
14
# along with this program; if not, write to the Free Software
 
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 
17
"""Benchmarks for bzr DirState performance."""
 
 
28
from bzrlib.tests.test__dirstate_helpers import (
 
 
29
    CompiledDirstateHelpersFeature,
 
 
33
class BenchmarkDirState(benchmarks.Benchmark):
 
 
34
    """Benchmarks for DirState functions."""
 
 
36
    def build_helper(self, layout):
 
 
37
        """This is a helper with the common build_??_dirstate funcs.
 
 
39
        :param layout: [(num_dirs, files_per_dir)]
 
 
40
            The number of directories per level, and the number of files to put
 
 
42
        :return: A DirState object with the given layout. The blocks will be
 
 
43
            modified in memory, and the object will be write locked. (Callers
 
 
44
            must save and unlock the object).
 
 
46
        self.build_tree(['dir/'])
 
 
48
        self.build_tree_contents([('file', contents)])
 
 
49
        file_stat = os.lstat('file')
 
 
50
        dir_stat = os.lstat('dir')
 
 
51
        file_sha1 = osutils.sha_string(contents)
 
 
53
        state = dirstate.DirState.initialize('state')
 
 
54
        def create_entries(base, layout):
 
 
57
            num_dirs, num_files = layout[0]
 
 
58
            for dnum in xrange(num_dirs):
 
 
60
                    path = '%s/%02d_directory' % (base, dnum)
 
 
62
                    path = '%02d_directory' % (dnum,)
 
 
63
                dir_id = generate_ids.gen_file_id(path)
 
 
64
                state.add(path, dir_id, 'directory', dir_stat, '')
 
 
65
                for fnum in xrange(num_files):
 
 
66
                    fname = '%s/%02d_filename' % (path, fnum)
 
 
67
                    file_id = generate_ids.gen_file_id(fname)
 
 
68
                    state.add(fname, file_id, 'file', file_stat, file_sha1)
 
 
69
                create_entries(path, layout[1:])
 
 
70
        create_entries(None, layout)
 
 
73
    def build_10k_dirstate_dirs(self):
 
 
74
        """Build a DirState file with 10k directories"""
 
 
75
        state = self.build_helper([(10, 0), (10, 0), (10, 0), (10, 1)])
 
 
80
    def build_20k_dirstate(self):
 
 
81
        """Build a DirState file with 20k records.
 
 
83
        This approximates a kernel tree, based on the number of directories
 
 
84
        (1000), and number of files per directory (20) and depth (3).
 
 
85
        Because DirState doesn't have to have actual disk records, we just add
 
 
87
        We try to have reasonable filename lengths, as well as a reasonable
 
 
90
        state = self.build_helper([(10, 0), (10, 0), (10, 20)])
 
 
95
    def build_20k_dirstate_with_parents(self, num_parents):
 
 
96
        """Build a DirState file with 20k records and N parents.
 
 
98
        With 1 parent, this is equivalent to after a simple commit. With 2 it
 
 
99
        is equivalent to after a merge.
 
 
101
        # All files are marked as changed in the same revision, and this occurs
 
 
102
        # supposedly in the history of the current trees.
 
 
103
        last_changed_id = generate_ids.gen_revision_id('joe@foo.com')
 
 
104
        parent_revision_ids = [generate_ids.gen_revision_id('joe@foo.com')
 
 
105
                               for i in xrange(num_parents)]
 
 
106
        # Start with a dirstate file with 0 parents
 
 
107
        state = self.build_helper([(10, 0), (10, 0), (10, 20)])
 
 
109
            # This invasively updates the internals of DirState to be fast,
 
 
110
            # since we don't have an api other than passing in Revision Tree
 
 
111
            # objects, but that requires having a real inventory, etc.
 
 
113
                for entry in state._iter_entries():
 
 
114
                    minikind, fingerprint, size, is_exec, packed_stat = entry[1][0]
 
 
115
                    for parent_id in parent_revision_ids:
 
 
116
                        # Add a parent record for this record
 
 
117
                        entry[1].append((minikind, fingerprint, size, is_exec,
 
 
119
            state._parents = parent_revision_ids
 
 
126
    def test_add_20k_entries(self):
 
 
127
        """Time how long it takes to add lots of entries."""
 
 
128
        state = self.time(self.build_helper, [(10, 0), (10, 0), (10, 20)])
 
 
131
    def test_build_20k_dirstate(self):
 
 
132
        state = self.time(self.build_20k_dirstate)
 
 
135
            entries = list(state._iter_entries())
 
 
136
            self.assertEqual(21111, len(entries))
 
 
140
    def test_build_20k_dirstate_1_parent(self):
 
 
141
        state = self.time(self.build_20k_dirstate_with_parents, 1)
 
 
145
            entries = list(state._iter_entries())
 
 
146
            self.assertEqual(21111, len(entries))
 
 
150
    def test_build_20k_dirstate_2_parents(self):
 
 
151
        state = self.time(self.build_20k_dirstate_with_parents, 2)
 
 
155
            entries = list(state._iter_entries())
 
 
156
            self.assertEqual(21111, len(entries))
 
 
160
    def test_save_20k_tree_0_parents(self):
 
 
161
        state = self.build_20k_dirstate()
 
 
164
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
165
                             state._dirblock_state)
 
 
166
            state._read_dirblocks_if_needed()
 
 
167
            state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
 
 
168
            self.time(state.save)
 
 
172
    def test_save_20k_tree_1_parent(self):
 
 
173
        state = self.build_20k_dirstate_with_parents(1)
 
 
176
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
177
                             state._dirblock_state)
 
 
178
            state._read_dirblocks_if_needed()
 
 
179
            state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
 
 
180
            self.time(state.save)
 
 
184
    def test_save_20k_tree_2_parents(self):
 
 
185
        state = self.build_20k_dirstate_with_parents(2)
 
 
188
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
189
                             state._dirblock_state)
 
 
190
            state._read_dirblocks_if_needed()
 
 
191
            state._dirblock_state = dirstate.DirState.IN_MEMORY_MODIFIED
 
 
192
            self.time(state.save)
 
 
196
    def test__read_dirblocks_20k_tree_0_parents_py(self):
 
 
197
        from bzrlib._dirstate_helpers_py import _read_dirblocks_py
 
 
198
        state = self.build_20k_dirstate()
 
 
201
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
202
                             state._dirblock_state)
 
 
203
            state._read_header_if_needed()
 
 
204
            self.time(_read_dirblocks_py, state)
 
 
208
    def test__read_dirblocks_20k_tree_0_parents_c(self):
 
 
209
        self.requireFeature(CompiledDirstateHelpersFeature)
 
 
210
        from bzrlib._dirstate_helpers_c import _read_dirblocks_c
 
 
211
        state = self.build_20k_dirstate()
 
 
214
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
215
                             state._dirblock_state)
 
 
216
            state._read_header_if_needed()
 
 
217
            self.time(_read_dirblocks_c, state)
 
 
221
    def test__read_dirblocks_20k_tree_1_parent_py(self):
 
 
222
        from bzrlib._dirstate_helpers_py import _read_dirblocks_py
 
 
223
        state = self.build_20k_dirstate_with_parents(1)
 
 
226
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
227
                             state._dirblock_state)
 
 
228
            state._read_header_if_needed()
 
 
229
            self.time(_read_dirblocks_py, state)
 
 
233
    def test__read_dirblocks_20k_tree_1_parent_c(self):
 
 
234
        self.requireFeature(CompiledDirstateHelpersFeature)
 
 
235
        from bzrlib._dirstate_helpers_c import _read_dirblocks_c
 
 
236
        state = self.build_20k_dirstate_with_parents(1)
 
 
239
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
240
                             state._dirblock_state)
 
 
241
            state._read_header_if_needed()
 
 
242
            self.time(_read_dirblocks_c, state)
 
 
246
    def test__read_dirblocks_20k_tree_2_parents_py(self):
 
 
247
        from bzrlib._dirstate_helpers_py import _read_dirblocks_py
 
 
248
        state = self.build_20k_dirstate_with_parents(2)
 
 
251
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
252
                             state._dirblock_state)
 
 
253
            state._read_header_if_needed()
 
 
254
            self.time(_read_dirblocks_py, state)
 
 
258
    def test__read_dirblocks_20k_tree_2_parents_c(self):
 
 
259
        self.requireFeature(CompiledDirstateHelpersFeature)
 
 
260
        from bzrlib._dirstate_helpers_c import _read_dirblocks_c
 
 
261
        state = self.build_20k_dirstate_with_parents(2)
 
 
264
            self.assertEqual(dirstate.DirState.NOT_IN_MEMORY,
 
 
265
                             state._dirblock_state)
 
 
266
            state._read_header_if_needed()
 
 
267
            self.time(_read_dirblocks_c, state)
 
 
271
    def do_bisect_list(self, bisect_func):
 
 
272
        """Call bisect_dirblock for each path."""
 
 
273
        # We use self._paths and self._blocks because we expect it to be a very
 
 
274
        # long list. And the interface for 'self.time()' causes the parameters
 
 
275
        # to be printed when run with --lsprof-timed. Which is *really* ugly
 
 
276
        # when the list is thousands of entries.
 
 
277
        blocks = self._blocks
 
 
278
        return [bisect_func(blocks, path) for path in self._paths]
 
 
280
    def do_bisect_list_cached(self, bisect_func):
 
 
281
        """Same as do_bisect_list, but cache the split paths"""
 
 
283
        blocks = self._blocks
 
 
284
        return [bisect_func(blocks, path, cache=cache) for path in self._paths]
 
 
286
    def setup_paths_and_offsets(self, state):
 
 
287
        """Get a list of paths and expected offsets.
 
 
289
        This will be used to check do_bisect_list*
 
 
291
        state._read_dirblocks_if_needed()
 
 
293
        expected_offsets = [0]
 
 
294
        for offset, info in enumerate(state._dirblocks):
 
 
296
            # We already handled the empty path
 
 
299
            # all paths are of the form ##_directory
 
 
300
            # so search for ##_director, ##_directory
 
 
301
            paths.extend([dirname[:-1], dirname])
 
 
302
            expected_offsets.extend([offset, offset])
 
 
304
        self._expected_offsets = expected_offsets
 
 
305
        self._blocks = state._dirblocks
 
 
307
    def checkOffsets(self, offsets):
 
 
308
        """Make sure offsets matches self._expected_offsets"""
 
 
309
        # These are really long lists, so it is easier to compare them with
 
 
310
        # assertEqualDiff. So turn them into strings.
 
 
311
        expected_str = '\n'.join(str(x) for x in self._expected_offsets)
 
 
312
        offset_str = '\n'.join(str(x) for x in offsets)
 
 
313
        self.assertEqualDiff(expected_str, offset_str)
 
 
315
    def test_bisect_dirblock_py(self):
 
 
316
        from bzrlib._dirstate_helpers_py import bisect_dirblock_py
 
 
317
        state = self.build_10k_dirstate_dirs()
 
 
320
            self.setup_paths_and_offsets(state)
 
 
321
            offsets = self.time(self.do_bisect_list,
 
 
323
            self.checkOffsets(offsets)
 
 
327
    def test_bisect_dirblock_cached_py(self):
 
 
328
        from bzrlib._dirstate_helpers_py import bisect_dirblock_py
 
 
329
        state = self.build_10k_dirstate_dirs()
 
 
332
            self.setup_paths_and_offsets(state)
 
 
333
            offsets = self.time(self.do_bisect_list_cached,
 
 
335
            self.checkOffsets(offsets)
 
 
339
    def test_bisect_dirblock_c(self):
 
 
340
        self.requireFeature(CompiledDirstateHelpersFeature)
 
 
341
        from bzrlib._dirstate_helpers_c import bisect_dirblock_c
 
 
342
        state = self.build_10k_dirstate_dirs()
 
 
345
            self.setup_paths_and_offsets(state)
 
 
346
            offsets = self.time(self.do_bisect_list, bisect_dirblock_c)
 
 
347
            self.checkOffsets(offsets)
 
 
351
    def create_path_names(self, layout, base=''):
 
 
352
        """Create a list of paths with auto-generated names.
 
 
354
        :param layout: A list of [(num_dirs, num_files)] tuples. For each
 
 
355
            level, the given number of directories will be created, each
 
 
356
            containing that many files.
 
 
357
            So [(2, 5), (3, 4)] will create 2 top level directories, containing
 
 
358
            5 files, and each top level directory will contain 3 subdirs with 4
 
 
360
        :param base: The base path to prepend to all entries, most callers will
 
 
362
        :return: A list of path names.
 
 
368
        num_dirs, num_files = layout[0]
 
 
369
        for dnum in xrange(num_dirs):
 
 
371
                path = '%s/%02d_directory' % (base, dnum)
 
 
373
                path = '%02d_directory' % (dnum,)
 
 
375
            for fnum in xrange(num_files):
 
 
376
                fname = '%s/%02d_filename' % (path, fnum)
 
 
378
            paths.extend(self.create_path_names(layout[1:], base=path))
 
 
381
    def test_create_path_names(self):
 
 
382
        names = self.create_path_names([(2, 3), (1, 2)])
 
 
383
        self.assertEqual(['00_directory',
 
 
384
                          '00_directory/00_filename',
 
 
385
                          '00_directory/01_filename',
 
 
386
                          '00_directory/02_filename',
 
 
387
                          '00_directory/00_directory',
 
 
388
                          '00_directory/00_directory/00_filename',
 
 
389
                          '00_directory/00_directory/01_filename',
 
 
391
                          '01_directory/00_filename',
 
 
392
                          '01_directory/01_filename',
 
 
393
                          '01_directory/02_filename',
 
 
394
                          '01_directory/00_directory',
 
 
395
                          '01_directory/00_directory/00_filename',
 
 
396
                          '01_directory/00_directory/01_filename',
 
 
398
        names = self.time(self.create_path_names, [(10, 2), (10, 2), (10, 20)])
 
 
399
        # 20 files + 1 directory name, 10 times, plus 2 filenames and 1 dir, 10
 
 
400
        # times, and another 2 files + 1 dir, 10 times
 
 
401
        self.assertEqual(21330, 10*(3 + 10*(3 + 10*(1 + 20))))
 
 
402
        self.assertEqual(21330, len(names))
 
 
404
    def compareAllPaths(self, cmp_func, layout):
 
 
405
        """Compare N^2 paths.
 
 
407
        Basically, compare every path in the list against every other path.
 
 
409
        paths = self.create_path_names(layout)
 
 
413
                    cmp_func(path1, path2)
 
 
414
        self.time(compare_all)
 
 
416
    def test_cmp_by_dirs_py(self):
 
 
417
        """Benchmark 103041 comparisons."""
 
 
418
        from bzrlib._dirstate_helpers_py import cmp_by_dirs_py
 
 
419
        self.compareAllPaths(cmp_by_dirs_py,
 
 
420
                             [(3, 1), (3, 1), (3, 1), (3, 2)])
 
 
422
    def test_cmp_by_dirs_c(self):
 
 
423
        self.requireFeature(CompiledDirstateHelpersFeature)
 
 
424
        from bzrlib._dirstate_helpers_c import cmp_by_dirs_c
 
 
425
        self.compareAllPaths(cmp_by_dirs_c,
 
 
426
                             [(3, 1), (3, 1), (3, 1), (3, 2)])