bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
2052.3.2
by John Arbash Meinel
 Change Copyright .. by Canonical to Copyright ... Canonical  | 
1  | 
# Copyright (C) 2006 Canonical Ltd
 | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
2  | 
#
 | 
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
| 
2052.3.1
by John Arbash Meinel
 Add tests to cleanup the copyright of all source files  | 
4  | 
# it under the terms of the GNU General Public License as published by
 | 
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
7  | 
#
 | 
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
| 
2052.3.1
by John Arbash Meinel
 Add tests to cleanup the copyright of all source files  | 
17  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
18  | 
"""Tests for encoding performance."""
 | 
19  | 
||
20  | 
from bzrlib import (  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
21  | 
cache_utf8,  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
22  | 
osutils,  | 
23  | 
    )
 | 
|
24  | 
||
25  | 
from bzrlib.benchmarks import Benchmark  | 
|
26  | 
||
27  | 
||
28  | 
_normal_revision_id = (u'john@arbash-meinel.com-20060801200018'  | 
|
29  | 
u'-cafa6272d9b8cac4')  | 
|
30  | 
_unicode_revision_id = (u'\u062c\u0648\u062c\u0648@\xe5rbash-meinel.com-'  | 
|
31  | 
u'\xb5\xb5\xb5-20060801200018-cafa6272d9b8cac4')  | 
|
32  | 
||
33  | 
_normal_revision_id_utf8 = _normal_revision_id.encode('utf-8')  | 
|
34  | 
_unicode_revision_id_utf8 = _unicode_revision_id.encode('utf-8')  | 
|
35  | 
||
36  | 
||
37  | 
class EncodingBenchmark(Benchmark):  | 
|
| 
2399.1.7
by John Arbash Meinel
 Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.  | 
38  | 
"""Benchmark the time to encode strings."""  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
39  | 
|
40  | 
def setUp(self):  | 
|
41  | 
super(EncodingBenchmark, self).setUp()  | 
|
42  | 
        # Make sure we start and end with a clean cache
 | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
43  | 
cache_utf8.clear_encoding_cache()  | 
44  | 
self.addCleanup(cache_utf8.clear_encoding_cache)  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
45  | 
|
46  | 
def encode_1M(self, revision_id):  | 
|
47  | 
"""Encode the given revision id 1 million times"""  | 
|
48  | 
        # In a real kernel tree there are 7.7M lines of code
 | 
|
49  | 
        # so the initial import actually has to encode a revision
 | 
|
50  | 
        # id to store annotated lines one time for every line.
 | 
|
51  | 
for i in xrange(1000000):  | 
|
52  | 
revision_id.encode('utf8')  | 
|
53  | 
||
54  | 
def encode_cached_1M(self, revision_id):  | 
|
55  | 
"""Encode the given revision id 1 million times using the cache"""  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
56  | 
encode = cache_utf8.encode  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
57  | 
for i in xrange(1000000):  | 
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
58  | 
encode(revision_id)  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
59  | 
|
60  | 
def encode_multi(self, revision_list, count):  | 
|
61  | 
"""Encode each entry in the list count times"""  | 
|
62  | 
for i in xrange(count):  | 
|
63  | 
for revision_id in revision_list:  | 
|
64  | 
revision_id.encode('utf-8')  | 
|
65  | 
||
66  | 
def encode_cached_multi(self, revision_list, count):  | 
|
67  | 
"""Encode each entry in the list count times"""  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
68  | 
encode = cache_utf8.encode  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
69  | 
for i in xrange(count):  | 
70  | 
for revision_id in revision_list:  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
71  | 
encode(revision_id)  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
72  | 
|
73  | 
def test_encode_1_by_1M_ascii(self):  | 
|
74  | 
"""Test encoding a single revision id 1 million times."""  | 
|
75  | 
self.time(self.encode_1M, _normal_revision_id)  | 
|
76  | 
||
77  | 
def test_encode_1_by_1M_ascii_cached(self):  | 
|
78  | 
"""Test encoding a single revision id 1 million times."""  | 
|
79  | 
self.time(self.encode_cached_1M, _normal_revision_id)  | 
|
80  | 
||
81  | 
def test_encode_1_by_1M_ascii_str(self):  | 
|
82  | 
        # We have places that think they have a unicode revision id
 | 
|
83  | 
        # but actually, they have a plain string. So .encode(utf8)
 | 
|
84  | 
        # actually has to decode from ascii, and then encode into utf8
 | 
|
85  | 
self.time(self.encode_1M, str(_normal_revision_id))  | 
|
86  | 
||
87  | 
def test_encode_1_by_1M_ascii_str_cached(self):  | 
|
88  | 
self.time(self.encode_cached_1M, str(_normal_revision_id))  | 
|
89  | 
||
90  | 
def test_encode_1_by_1M_unicode(self):  | 
|
91  | 
"""Test encoding a single revision id 1 million times."""  | 
|
92  | 
self.time(self.encode_1M, _unicode_revision_id)  | 
|
93  | 
||
94  | 
def test_encode_1_by_1M_unicode_cached(self):  | 
|
95  | 
"""Test encoding a single revision id 1 million times."""  | 
|
96  | 
self.time(self.encode_cached_1M, _unicode_revision_id)  | 
|
97  | 
||
98  | 
def test_encode_1k_by_1k_ascii(self):  | 
|
99  | 
"""Test encoding 5 revisions 100k times"""  | 
|
100  | 
revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]  | 
|
101  | 
self.time(self.encode_multi, revisions, 1000)  | 
|
102  | 
||
103  | 
def test_encode_1k_by_1k_ascii_cached(self):  | 
|
104  | 
"""Test encoding 5 revisions 100k times"""  | 
|
105  | 
revisions = [unicode(osutils.rand_chars(60)) for x in xrange(1000)]  | 
|
106  | 
self.time(self.encode_cached_multi, revisions, 1000)  | 
|
107  | 
||
108  | 
def test_encode_1k_by_1k_unicode(self):  | 
|
109  | 
"""Test encoding 5 revisions 100k times"""  | 
|
| 
2152.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8  | 
110  | 
revisions = [u'\u062c\u0648\u062c\u0648' +  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
111  | 
unicode(osutils.rand_chars(60)) for x in xrange(1000)]  | 
112  | 
self.time(self.encode_multi, revisions, 1000)  | 
|
113  | 
||
114  | 
def test_encode_1k_by_1k_unicode_cached(self):  | 
|
115  | 
"""Test encoding 5 revisions 100k times"""  | 
|
| 
2152.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8  | 
116  | 
revisions = [u'\u062c\u0648\u062c\u0648' +  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
117  | 
unicode(osutils.rand_chars(60)) for x in xrange(1000)]  | 
118  | 
self.time(self.encode_cached_multi, revisions, 1000)  | 
|
119  | 
||
| 
2152.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8  | 
120  | 
def test_encode_500K_by_1_ascii(self):  | 
121  | 
revisions = [unicode("test%07d" % x) for x in xrange(500000)]  | 
|
122  | 
self.time(self.encode_multi, revisions, 1)  | 
|
123  | 
||
124  | 
def test_encode_500K_by_1_ascii_cached(self):  | 
|
125  | 
revisions = [unicode("test%07d" % x) for x in xrange(500000)]  | 
|
126  | 
self.time(self.encode_cached_multi, revisions, 1)  | 
|
127  | 
||
128  | 
def test_encode_500K_by_1_unicode(self):  | 
|
129  | 
revisions = [u'\u062c\u0648\u062c\u0648' +  | 
|
130  | 
unicode("%07d" % x) for x in xrange(500000)]  | 
|
131  | 
self.time(self.encode_multi, revisions, 1)  | 
|
132  | 
||
133  | 
def test_encode_500K_by_1_unicode_cached(self):  | 
|
134  | 
revisions = [u'\u062c\u0648\u062c\u0648' +  | 
|
135  | 
unicode("%07d" % x) for x in xrange(500000)]  | 
|
136  | 
self.time(self.encode_cached_multi, revisions, 1)  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
137  | 
|
| 
2399.1.7
by John Arbash Meinel
 Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.  | 
138  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
139  | 
class DecodingBenchmarks(Benchmark):  | 
| 
2399.1.7
by John Arbash Meinel
 Cleanup bzrlib/benchmarks/* so that everything at least has a valid doc string.  | 
140  | 
"""Benchmark the time to decode strings."""  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
141  | 
|
142  | 
def setUp(self):  | 
|
143  | 
super(DecodingBenchmarks, self).setUp()  | 
|
144  | 
        # Make sure we start and end with a clean cache
 | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
145  | 
cache_utf8.clear_encoding_cache()  | 
146  | 
self.addCleanup(cache_utf8.clear_encoding_cache)  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
147  | 
|
148  | 
def decode_1M(self, revision_id):  | 
|
149  | 
for i in xrange(1000000):  | 
|
150  | 
revision_id.decode('utf8')  | 
|
151  | 
||
152  | 
def decode_cached_1M(self, revision_id):  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
153  | 
decode = cache_utf8.decode  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
154  | 
for i in xrange(1000000):  | 
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
155  | 
decode(revision_id)  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
156  | 
|
157  | 
def decode_multi(self, revision_list, count):  | 
|
158  | 
for i in xrange(count):  | 
|
159  | 
for revision_id in revision_list:  | 
|
160  | 
revision_id.decode('utf-8')  | 
|
161  | 
||
162  | 
def decode_cached_multi(self, revision_list, count):  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
163  | 
decode = cache_utf8.decode  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
164  | 
for i in xrange(count):  | 
165  | 
for revision_id in revision_list:  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
166  | 
decode(revision_id)  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
167  | 
|
168  | 
def test_decode_1_by_1M_ascii(self):  | 
|
169  | 
"""Test decoding a single revision id 1 million times."""  | 
|
170  | 
self.time(self.decode_1M, _normal_revision_id_utf8)  | 
|
171  | 
||
172  | 
def test_decode_1_by_1M_ascii_cached(self):  | 
|
173  | 
"""Test decoding a single revision id 1 million times."""  | 
|
174  | 
self.time(self.decode_cached_1M, _normal_revision_id_utf8)  | 
|
175  | 
||
176  | 
def test_decode_1_by_1M_unicode(self):  | 
|
177  | 
"""Test decoding a single revision id 1 million times."""  | 
|
178  | 
self.time(self.decode_1M, _unicode_revision_id_utf8)  | 
|
179  | 
||
180  | 
def test_decode_1_by_1M_unicode_cached(self):  | 
|
181  | 
"""Test decoding a single revision id 1 million times."""  | 
|
182  | 
self.time(self.decode_cached_1M, _unicode_revision_id_utf8)  | 
|
183  | 
||
184  | 
def test_decode_1k_by_1k_ascii(self):  | 
|
185  | 
"""Test decoding 5 revisions 100k times"""  | 
|
186  | 
revisions = [osutils.rand_chars(60) for x in xrange(1000)]  | 
|
187  | 
self.time(self.decode_multi, revisions, 1000)  | 
|
188  | 
||
189  | 
def test_decode_1k_by_1k_ascii_cached(self):  | 
|
190  | 
"""Test decoding 5 revisions 100k times"""  | 
|
191  | 
revisions = [osutils.rand_chars(60) for x in xrange(1000)]  | 
|
192  | 
self.time(self.decode_cached_multi, revisions, 1000)  | 
|
193  | 
||
194  | 
def test_decode_1k_by_1k_unicode(self):  | 
|
195  | 
"""Test decoding 5 revisions 100k times"""  | 
|
| 
2152.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8  | 
196  | 
revisions = [(u'\u062c\u0648\u062c\u0648' +  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
197  | 
unicode(osutils.rand_chars(60))).encode('utf8')  | 
198  | 
for x in xrange(1000)]  | 
|
199  | 
self.time(self.decode_multi, revisions, 1000)  | 
|
200  | 
||
201  | 
def test_decode_1k_by_1k_unicode_cached(self):  | 
|
202  | 
"""Test decoding 5 revisions 100k times"""  | 
|
| 
2152.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8  | 
203  | 
revisions = [(u'\u062c\u0648\u062c\u0648' +  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
204  | 
unicode(osutils.rand_chars(60))).encode('utf8')  | 
205  | 
for x in xrange(1000)]  | 
|
206  | 
self.time(self.decode_cached_multi, revisions, 1000)  | 
|
| 
2152.1.1
by John Arbash Meinel
 (Dmitry Vasiliev) Update and improve benchmarks for cache_utf8  | 
207  | 
|
208  | 
def test_decode_500K_by_1_ascii(self):  | 
|
209  | 
revisions = [("test%07d" % x) for x in xrange(500000)]  | 
|
210  | 
self.time(self.decode_multi, revisions, 1)  | 
|
211  | 
||
212  | 
def test_decode_500K_by_1_ascii_cached(self):  | 
|
213  | 
revisions = [("test%07d" % x) for x in xrange(500000)]  | 
|
214  | 
self.time(self.decode_cached_multi, revisions, 1)  | 
|
215  | 
||
216  | 
def test_decode_500K_by_1_unicode(self):  | 
|
217  | 
revisions = [(u'\u062c\u0648\u062c\u0648' +  | 
|
218  | 
unicode("%07d" % x)).encode('utf-8')  | 
|
219  | 
for x in xrange(500000)]  | 
|
220  | 
self.time(self.decode_multi, revisions, 1)  | 
|
221  | 
||
222  | 
def test_decode_500K_by_1_unicode_cached(self):  | 
|
223  | 
revisions = [(u'\u062c\u0648\u062c\u0648' +  | 
|
224  | 
unicode("%07d" % x)).encode('utf-8')  | 
|
225  | 
for x in xrange(500000)]  | 
|
226  | 
self.time(self.decode_cached_multi, revisions, 1)  |