1
# Copyright (C) 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Direct tests of the btree serializer extension"""
24
from ..sixish import (
28
from .test_btree_index import compiled_btreeparser_feature
31
class TestBtreeSerializer(tests.TestCase):
33
_test_needs_features = [compiled_btreeparser_feature]
37
return compiled_btreeparser_feature.module
40
class TestHexAndUnhex(TestBtreeSerializer):
42
def assertHexlify(self, as_binary):
43
self.assertEqual(binascii.hexlify(as_binary),
44
self.module._py_hexlify(as_binary))
46
def assertUnhexlify(self, as_hex):
47
ba_unhex = binascii.unhexlify(as_hex)
48
mod_unhex = self.module._py_unhexlify(as_hex)
49
if ba_unhex != mod_unhex:
53
mod_hex = binascii.hexlify(mod_unhex)
54
self.fail('_py_unhexlify returned a different answer'
55
' from binascii:\n %r\n != %r'
56
% (binascii.hexlify(ba_unhex), mod_hex))
58
def assertFailUnhexlify(self, as_hex):
60
self.assertIs(None, self.module._py_unhexlify(as_hex))
62
def test_to_hex(self):
63
raw_bytes = b''.join(map(int2byte, range(256)))
64
for i in range(0, 240, 20):
65
self.assertHexlify(raw_bytes[i:i+20])
66
self.assertHexlify(raw_bytes[240:]+raw_bytes[0:4])
68
def test_from_hex(self):
69
self.assertUnhexlify(b'0123456789abcdef0123456789abcdef01234567')
70
self.assertUnhexlify(b'123456789abcdef0123456789abcdef012345678')
71
self.assertUnhexlify(b'0123456789ABCDEF0123456789ABCDEF01234567')
72
self.assertUnhexlify(b'123456789ABCDEF0123456789ABCDEF012345678')
73
hex_chars = binascii.hexlify(b''.join(map(int2byte, range(256))))
74
for i in range(0, 480, 40):
75
self.assertUnhexlify(hex_chars[i:i+40])
76
self.assertUnhexlify(hex_chars[480:]+hex_chars[0:8])
78
def test_from_invalid_hex(self):
79
self.assertFailUnhexlify(b'123456789012345678901234567890123456789X')
80
self.assertFailUnhexlify(b'12345678901234567890123456789012345678X9')
82
def test_bad_argument(self):
83
self.assertRaises(ValueError, self.module._py_unhexlify, u'1a')
84
self.assertRaises(ValueError, self.module._py_unhexlify, b'1b')
87
_hex_form = b'123456789012345678901234567890abcdefabcd'
89
class Test_KeyToSha1(TestBtreeSerializer):
91
def assertKeyToSha1(self, expected, key):
95
expected_bin = binascii.unhexlify(expected)
96
actual_sha1 = self.module._py_key_to_sha1(key)
97
if expected_bin != actual_sha1:
98
actual_hex_sha1 = None
99
if actual_sha1 is not None:
100
actual_hex_sha1 = binascii.hexlify(actual_sha1)
101
self.fail('_key_to_sha1 returned:\n %s\n != %s'
102
% (actual_sha1, expected))
104
def test_simple(self):
105
self.assertKeyToSha1(_hex_form, (b'sha1:' + _hex_form,))
107
def test_invalid_not_tuple(self):
108
self.assertKeyToSha1(None, _hex_form)
109
self.assertKeyToSha1(None, b'sha1:' + _hex_form)
111
def test_invalid_empty(self):
112
self.assertKeyToSha1(None, ())
114
def test_invalid_not_string(self):
115
self.assertKeyToSha1(None, (None,))
116
self.assertKeyToSha1(None, (list(_hex_form),))
118
def test_invalid_not_sha1(self):
119
self.assertKeyToSha1(None, (_hex_form,))
120
self.assertKeyToSha1(None, (b'sha2:' + _hex_form,))
122
def test_invalid_not_hex(self):
123
self.assertKeyToSha1(None,
124
(b'sha1:abcdefghijklmnopqrstuvwxyz12345678901234',))
127
class Test_Sha1ToKey(TestBtreeSerializer):
129
def assertSha1ToKey(self, hex_sha1):
130
bin_sha1 = binascii.unhexlify(hex_sha1)
131
key = self.module._py_sha1_to_key(bin_sha1)
132
self.assertEqual((b'sha1:' + hex_sha1,), key)
134
def test_simple(self):
135
self.assertSha1ToKey(_hex_form)
138
_one_key_content = b"""type=leaf
139
sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
142
_large_offsets = b"""type=leaf
143
sha1:123456789012345678901234567890abcdefabcd\x00\x0012345678901 1234567890 0 1
144
sha1:abcd123456789012345678901234567890abcdef\x00\x002147483648 2147483647 0 1
145
sha1:abcdefabcd123456789012345678901234567890\x00\x004294967296 4294967295 4294967294 1
148
_multi_key_content = b"""type=leaf
149
sha1:c80c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
150
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
151
sha1:c8e240de74fb1ed08fa08d38063f6a6a91462a81\x00\x002 2 2 2
152
sha1:cda39a3ee5e6b4b0d3255bfef95601890afd8070\x00\x003 3 3 3
153
sha1:cdf51e37c269aa94d38f93e537bf6e2020b21406\x00\x004 4 4 4
154
sha1:ce0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x005 5 5 5
155
sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x006 6 6 6
156
sha1:cf7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x007 7 7 7
159
_multi_key_same_offset = b"""type=leaf
160
sha1:080c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
161
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
162
sha1:cd0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x002 2 2 2
163
sha1:cda39a3ee5e6b4b0d3255bfef95601890afd8070\x00\x003 3 3 3
164
sha1:cde240de74fb1ed08fa08d38063f6a6a91462a81\x00\x004 4 4 4
165
sha1:cdf51e37c269aa94d38f93e537bf6e2020b21406\x00\x005 5 5 5
166
sha1:ce7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x006 6 6 6
167
sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x007 7 7 7
170
_common_32_bits = b"""type=leaf
171
sha1:123456784a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
172
sha1:1234567874fb1ed08fa08d38063f6a6a91462a81\x00\x001 1 1 1
173
sha1:12345678777ec23212c54d7a350bc5bea5477fdb\x00\x002 2 2 2
174
sha1:123456787faa5a7fce15d1ddcb9eaeaea377667b\x00\x003 3 3 3
175
sha1:12345678898dd52fc65c41454cec9c4d2611bfb3\x00\x004 4 4 4
176
sha1:12345678c269aa94d38f93e537bf6e2020b21406\x00\x005 5 5 5
177
sha1:12345678c464ffd51732fbd6ded717e9efda28aa\x00\x006 6 6 6
178
sha1:12345678e5e6b4b0d3255bfef95601890afd8070\x00\x007 7 7 7
182
class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
184
def assertInvalid(self, data):
185
"""Ensure that we get a proper error when trying to parse invalid bytes.
187
(mostly this is testing that bad input doesn't cause us to segfault)
190
(ValueError, TypeError), self.module._parse_into_chk, data, 1, 0)
192
def test_non_bytes(self):
193
self.assertInvalid(u'type=leaf\n')
195
def test_not_leaf(self):
196
self.assertInvalid(b'type=internal\n')
198
def test_empty_leaf(self):
199
leaf = self.module._parse_into_chk(b'type=leaf\n', 1, 0)
200
self.assertEqual(0, len(leaf))
201
self.assertEqual([], leaf.all_items())
202
self.assertEqual([], leaf.all_keys())
203
# It should allow any key to be queried
204
self.assertFalse(('key',) in leaf)
206
def test_one_key_leaf(self):
207
leaf = self.module._parse_into_chk(_one_key_content, 1, 0)
208
self.assertEqual(1, len(leaf))
209
sha_key = (b'sha1:' + _hex_form,)
210
self.assertEqual([sha_key], leaf.all_keys())
211
self.assertEqual([(sha_key, (b'1 2 3 4', ()))], leaf.all_items())
212
self.assertTrue(sha_key in leaf)
214
def test_large_offsets(self):
215
leaf = self.module._parse_into_chk(_large_offsets, 1, 0)
216
self.assertEqual([b'12345678901 1234567890 0 1',
217
b'2147483648 2147483647 0 1',
218
b'4294967296 4294967295 4294967294 1',
219
], [x[1][0] for x in leaf.all_items()])
221
def test_many_key_leaf(self):
222
leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
223
self.assertEqual(8, len(leaf))
224
all_keys = leaf.all_keys()
225
self.assertEqual(8, len(leaf.all_keys()))
226
for idx, key in enumerate(all_keys):
227
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
229
def test_common_shift(self):
230
# The keys were deliberately chosen so that the first 5 bits all
231
# overlapped, it also happens that a later bit overlaps
232
# Note that by 'overlap' we mean that given bit is either on in all
233
# keys, or off in all keys
234
leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
235
self.assertEqual(19, leaf.common_shift)
236
# The interesting byte for each key is
237
# (defined as the 8-bits that come after the common prefix)
238
lst = [1, 13, 28, 180, 190, 193, 210, 239]
239
offsets = leaf._get_offsets()
240
self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
242
for idx, val in enumerate(lst):
243
self.assertEqual(idx, offsets[val])
244
for idx, key in enumerate(leaf.all_keys()):
245
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
247
def test_multi_key_same_offset(self):
248
# there is no common prefix, though there are some common bits
249
leaf = self.module._parse_into_chk(_multi_key_same_offset, 1, 0)
250
self.assertEqual(24, leaf.common_shift)
251
offsets = leaf._get_offsets()
252
# The interesting byte is just the first 8-bits of the key
253
lst = [8, 200, 205, 205, 205, 205, 206, 206]
254
self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
257
self.assertEqual(lst.index(val), offsets[val])
258
for idx, key in enumerate(leaf.all_keys()):
259
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
261
def test_all_common_prefix(self):
262
# The first 32 bits of all hashes are the same. This is going to be
263
# pretty much impossible, but I don't want to fail because of this
264
leaf = self.module._parse_into_chk(_common_32_bits, 1, 0)
265
self.assertEqual(0, leaf.common_shift)
267
offsets = leaf._get_offsets()
268
self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
271
self.assertEqual(lst.index(val), offsets[val])
272
for idx, key in enumerate(leaf.all_keys()):
273
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
275
def test_many_entries(self):
276
# Again, this is almost impossible, but we should still work
277
# It would be hard to fit more that 120 entries in a 4k page, much less
278
# more than 256 of them. but hey, weird stuff happens sometimes
279
lines = [b'type=leaf\n']
281
key_str = b'sha1:%04x%s' % (i, _hex_form[:36])
283
lines.append(b'%s\0\0%d %d %d %d\n' % (key_str, i, i, i, i))
284
data = b''.join(lines)
285
leaf = self.module._parse_into_chk(data, 1, 0)
286
self.assertEqual(24-7, leaf.common_shift)
287
offsets = leaf._get_offsets()
288
# This is the interesting bits for each entry
289
lst = [x // 2 for x in range(500)]
290
expected_offsets = [x * 2 for x in range(128)] + [255]*129
291
self.assertEqual(expected_offsets, offsets)
292
# We truncate because offsets is an unsigned char. So the bisection
293
# will just say 'greater than the last one' for all the rest
295
self.assertEqual([bisect.bisect_left(lst, x) for x in range(0, 257)],
298
self.assertEqual(lst.index(val), offsets[val])
299
for idx, key in enumerate(leaf.all_keys()):
300
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
302
def test__sizeof__(self):
303
# We can't use the exact numbers because of platform variations, etc.
304
# But what we really care about is that it does get bigger with more
306
leaf0 = self.module._parse_into_chk(b'type=leaf\n', 1, 0)
307
leaf1 = self.module._parse_into_chk(_one_key_content, 1, 0)
308
leafN = self.module._parse_into_chk(_multi_key_content, 1, 0)
309
sizeof_1 = leaf1.__sizeof__() - leaf0.__sizeof__()
310
self.assertTrue(sizeof_1 > 0)
311
sizeof_N = leafN.__sizeof__() - leaf0.__sizeof__()
312
self.assertEqual(sizeof_1 * len(leafN), sizeof_N)