33
30
_test_needs_features = [compiled_btreeparser_feature]
37
return compiled_btreeparser_feature.module
33
super(TestBtreeSerializer, self).setUp()
34
self.module = compiled_btreeparser_feature.module
40
37
class TestHexAndUnhex(TestBtreeSerializer):
48
45
mod_unhex = self.module._py_unhexlify(as_hex)
49
46
if ba_unhex != mod_unhex:
50
47
if mod_unhex is None:
53
50
mod_hex = binascii.hexlify(mod_unhex)
54
51
self.fail('_py_unhexlify returned a different answer'
55
' from binascii:\n %r\n != %r'
52
' from binascii:\n %s\n != %s'
56
53
% (binascii.hexlify(ba_unhex), mod_hex))
58
55
def assertFailUnhexlify(self, as_hex):
60
57
self.assertIs(None, self.module._py_unhexlify(as_hex))
62
59
def test_to_hex(self):
63
raw_bytes = b''.join(map(int2byte, range(256)))
60
raw_bytes = ''.join(map(chr, range(256)))
64
61
for i in range(0, 240, 20):
65
self.assertHexlify(raw_bytes[i:i + 20])
66
self.assertHexlify(raw_bytes[240:] + raw_bytes[0:4])
62
self.assertHexlify(raw_bytes[i:i+20])
63
self.assertHexlify(raw_bytes[240:]+raw_bytes[0:4])
68
65
def test_from_hex(self):
69
self.assertUnhexlify(b'0123456789abcdef0123456789abcdef01234567')
70
self.assertUnhexlify(b'123456789abcdef0123456789abcdef012345678')
71
self.assertUnhexlify(b'0123456789ABCDEF0123456789ABCDEF01234567')
72
self.assertUnhexlify(b'123456789ABCDEF0123456789ABCDEF012345678')
73
hex_chars = binascii.hexlify(b''.join(map(int2byte, range(256))))
66
self.assertUnhexlify('0123456789abcdef0123456789abcdef01234567')
67
self.assertUnhexlify('123456789abcdef0123456789abcdef012345678')
68
self.assertUnhexlify('0123456789ABCDEF0123456789ABCDEF01234567')
69
self.assertUnhexlify('123456789ABCDEF0123456789ABCDEF012345678')
70
hex_chars = binascii.hexlify(''.join(map(chr, range(256))))
74
71
for i in range(0, 480, 40):
75
self.assertUnhexlify(hex_chars[i:i + 40])
76
self.assertUnhexlify(hex_chars[480:] + hex_chars[0:8])
72
self.assertUnhexlify(hex_chars[i:i+40])
73
self.assertUnhexlify(hex_chars[480:]+hex_chars[0:8])
78
75
def test_from_invalid_hex(self):
79
self.assertFailUnhexlify(b'123456789012345678901234567890123456789X')
80
self.assertFailUnhexlify(b'12345678901234567890123456789012345678X9')
82
def test_bad_argument(self):
83
self.assertRaises(ValueError, self.module._py_unhexlify, u'1a')
84
self.assertRaises(ValueError, self.module._py_unhexlify, b'1b')
87
_hex_form = b'123456789012345678901234567890abcdefabcd'
76
self.assertFailUnhexlify('123456789012345678901234567890123456789X')
77
self.assertFailUnhexlify('12345678901234567890123456789012345678X9')
80
_hex_form = '123456789012345678901234567890abcdefabcd'
90
82
class Test_KeyToSha1(TestBtreeSerializer):
103
95
% (actual_sha1, expected))
105
97
def test_simple(self):
106
self.assertKeyToSha1(_hex_form, (b'sha1:' + _hex_form,))
98
self.assertKeyToSha1(_hex_form, ('sha1:' + _hex_form,))
108
100
def test_invalid_not_tuple(self):
109
101
self.assertKeyToSha1(None, _hex_form)
110
self.assertKeyToSha1(None, b'sha1:' + _hex_form)
102
self.assertKeyToSha1(None, 'sha1:' + _hex_form)
112
104
def test_invalid_empty(self):
113
105
self.assertKeyToSha1(None, ())
119
111
def test_invalid_not_sha1(self):
120
112
self.assertKeyToSha1(None, (_hex_form,))
121
self.assertKeyToSha1(None, (b'sha2:' + _hex_form,))
113
self.assertKeyToSha1(None, ('sha2:' + _hex_form,))
123
115
def test_invalid_not_hex(self):
124
116
self.assertKeyToSha1(None,
125
(b'sha1:abcdefghijklmnopqrstuvwxyz12345678901234',))
117
('sha1:abcdefghijklmnopqrstuvwxyz12345678901234',))
128
120
class Test_Sha1ToKey(TestBtreeSerializer):
130
122
def assertSha1ToKey(self, hex_sha1):
131
123
bin_sha1 = binascii.unhexlify(hex_sha1)
132
124
key = self.module._py_sha1_to_key(bin_sha1)
133
self.assertEqual((b'sha1:' + hex_sha1,), key)
125
self.assertEqual(('sha1:' + hex_sha1,), key)
135
127
def test_simple(self):
136
128
self.assertSha1ToKey(_hex_form)
139
_one_key_content = b"""type=leaf
131
_one_key_content = """type=leaf
140
132
sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
143
_large_offsets = b"""type=leaf
135
_large_offsets = """type=leaf
144
136
sha1:123456789012345678901234567890abcdefabcd\x00\x0012345678901 1234567890 0 1
145
137
sha1:abcd123456789012345678901234567890abcdef\x00\x002147483648 2147483647 0 1
146
138
sha1:abcdefabcd123456789012345678901234567890\x00\x004294967296 4294967295 4294967294 1
149
_multi_key_content = b"""type=leaf
141
_multi_key_content = """type=leaf
150
142
sha1:c80c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
151
143
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
152
144
sha1:c8e240de74fb1ed08fa08d38063f6a6a91462a81\x00\x002 2 2 2
157
149
sha1:cf7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x007 7 7 7
160
_multi_key_same_offset = b"""type=leaf
152
_multi_key_same_offset = """type=leaf
161
153
sha1:080c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
162
154
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
163
155
sha1:cd0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x002 2 2 2
168
160
sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x007 7 7 7
171
_common_32_bits = b"""type=leaf
163
_common_32_bits = """type=leaf
172
164
sha1:123456784a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
173
165
sha1:1234567874fb1ed08fa08d38063f6a6a91462a81\x00\x001 1 1 1
174
166
sha1:12345678777ec23212c54d7a350bc5bea5477fdb\x00\x002 2 2 2
183
175
class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
185
def assertInvalid(self, data):
177
def assertInvalid(self, bytes):
186
178
"""Ensure that we get a proper error when trying to parse invalid bytes.
188
180
(mostly this is testing that bad input doesn't cause us to segfault)
191
(ValueError, TypeError), self.module._parse_into_chk, data, 1, 0)
182
self.assertRaises((ValueError, TypeError),
183
self.module._parse_into_chk, bytes, 1, 0)
193
def test_non_bytes(self):
185
def test_non_str(self):
194
186
self.assertInvalid(u'type=leaf\n')
196
188
def test_not_leaf(self):
197
self.assertInvalid(b'type=internal\n')
189
self.assertInvalid('type=internal\n')
199
191
def test_empty_leaf(self):
200
leaf = self.module._parse_into_chk(b'type=leaf\n', 1, 0)
192
leaf = self.module._parse_into_chk('type=leaf\n', 1, 0)
201
193
self.assertEqual(0, len(leaf))
202
194
self.assertEqual([], leaf.all_items())
203
195
self.assertEqual([], leaf.all_keys())
207
199
def test_one_key_leaf(self):
208
200
leaf = self.module._parse_into_chk(_one_key_content, 1, 0)
209
201
self.assertEqual(1, len(leaf))
210
sha_key = (b'sha1:' + _hex_form,)
202
sha_key = ('sha1:' + _hex_form,)
211
203
self.assertEqual([sha_key], leaf.all_keys())
212
self.assertEqual([(sha_key, (b'1 2 3 4', ()))], leaf.all_items())
204
self.assertEqual([(sha_key, ('1 2 3 4', ()))], leaf.all_items())
213
205
self.assertTrue(sha_key in leaf)
215
207
def test_large_offsets(self):
216
208
leaf = self.module._parse_into_chk(_large_offsets, 1, 0)
217
self.assertEqual([b'12345678901 1234567890 0 1',
218
b'2147483648 2147483647 0 1',
219
b'4294967296 4294967295 4294967294 1',
220
], [x[1][0] for x in leaf.all_items()])
209
self.assertEqual(['12345678901 1234567890 0 1',
210
'2147483648 2147483647 0 1',
211
'4294967296 4294967295 4294967294 1',
212
], [x[1][0] for x in leaf.all_items()])
222
214
def test_many_key_leaf(self):
223
215
leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
225
217
all_keys = leaf.all_keys()
226
218
self.assertEqual(8, len(leaf.all_keys()))
227
219
for idx, key in enumerate(all_keys):
228
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
220
self.assertEqual(str(idx), leaf[key][0].split()[0])
230
222
def test_common_shift(self):
231
223
# The keys were deliberately chosen so that the first 5 bits all
243
235
for idx, val in enumerate(lst):
244
236
self.assertEqual(idx, offsets[val])
245
237
for idx, key in enumerate(leaf.all_keys()):
246
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
238
self.assertEqual(str(idx), leaf[key][0].split()[0])
248
240
def test_multi_key_same_offset(self):
249
241
# there is no common prefix, though there are some common bits
258
250
self.assertEqual(lst.index(val), offsets[val])
259
251
for idx, key in enumerate(leaf.all_keys()):
260
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
252
self.assertEqual(str(idx), leaf[key][0].split()[0])
262
254
def test_all_common_prefix(self):
263
255
# The first 32 bits of all hashes are the same. This is going to be
272
264
self.assertEqual(lst.index(val), offsets[val])
273
265
for idx, key in enumerate(leaf.all_keys()):
274
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
266
self.assertEqual(str(idx), leaf[key][0].split()[0])
276
268
def test_many_entries(self):
277
269
# Again, this is almost impossible, but we should still work
278
270
# It would be hard to fit more that 120 entries in a 4k page, much less
279
271
# more than 256 of them. but hey, weird stuff happens sometimes
280
lines = [b'type=leaf\n']
272
lines = ['type=leaf\n']
281
273
for i in range(500):
282
key_str = b'sha1:%04x%s' % (i, _hex_form[:36])
274
key_str = 'sha1:%04x%s' % (i, _hex_form[:36])
284
lines.append(b'%s\0\0%d %d %d %d\n' % (key_str, i, i, i, i))
285
data = b''.join(lines)
286
leaf = self.module._parse_into_chk(data, 1, 0)
287
self.assertEqual(24 - 7, leaf.common_shift)
276
lines.append('%s\0\0%d %d %d %d\n' % (key_str, i, i, i, i))
277
bytes = ''.join(lines)
278
leaf = self.module._parse_into_chk(bytes, 1, 0)
279
self.assertEqual(24-7, leaf.common_shift)
288
280
offsets = leaf._get_offsets()
289
281
# This is the interesting bits for each entry
290
282
lst = [x // 2 for x in range(500)]
291
expected_offsets = [x * 2 for x in range(128)] + [255] * 129
283
expected_offsets = [x * 2 for x in range(128)] + [255]*129
292
284
self.assertEqual(expected_offsets, offsets)
293
285
# We truncate because offsets is an unsigned char. So the bisection
294
286
# will just say 'greater than the last one' for all the rest
299
291
self.assertEqual(lst.index(val), offsets[val])
300
292
for idx, key in enumerate(leaf.all_keys()):
301
self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
293
self.assertEqual(str(idx), leaf[key][0].split()[0])
303
295
def test__sizeof__(self):
304
296
# We can't use the exact numbers because of platform variations, etc.
305
297
# But what we really care about is that it does get bigger with more
307
leaf0 = self.module._parse_into_chk(b'type=leaf\n', 1, 0)
299
leaf0 = self.module._parse_into_chk('type=leaf\n', 1, 0)
308
300
leaf1 = self.module._parse_into_chk(_one_key_content, 1, 0)
309
301
leafN = self.module._parse_into_chk(_multi_key_content, 1, 0)
310
302
sizeof_1 = leaf1.__sizeof__() - leaf0.__sizeof__()