/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/tests/test__btree_serializer.py

  • Committer: Jelmer Vernooij
  • Date: 2017-07-23 22:06:41 UTC
  • mfrom: (6738 trunk)
  • mto: This revision was merged to the branch mainline in revision 6739.
  • Revision ID: jelmer@jelmer.uk-20170723220641-69eczax9bmv8d6kk
Merge trunk, address review comments.

Show diffs side-by-side

added added

removed removed

Lines of Context:
20
20
import binascii
21
21
import bisect
22
22
 
23
 
from ... import tests
 
23
from .. import tests
24
24
 
25
25
from .test_btree_index import compiled_btreeparser_feature
26
26
 
29
29
 
30
30
    _test_needs_features = [compiled_btreeparser_feature]
31
31
 
32
 
    @property
33
 
    def module(self):
34
 
        return compiled_btreeparser_feature.module
 
32
    def setUp(self):
 
33
        super(TestBtreeSerializer, self).setUp()
 
34
        self.module = compiled_btreeparser_feature.module
35
35
 
36
36
 
37
37
class TestHexAndUnhex(TestBtreeSerializer):
45
45
        mod_unhex = self.module._py_unhexlify(as_hex)
46
46
        if ba_unhex != mod_unhex:
47
47
            if mod_unhex is None:
48
 
                mod_hex = b'<None>'
 
48
                mod_hex = '<None>'
49
49
            else:
50
50
                mod_hex = binascii.hexlify(mod_unhex)
51
51
            self.fail('_py_unhexlify returned a different answer'
52
 
                      ' from binascii:\n    %r\n != %r'
 
52
                      ' from binascii:\n    %s\n != %s'
53
53
                      % (binascii.hexlify(ba_unhex), mod_hex))
54
54
 
55
55
    def assertFailUnhexlify(self, as_hex):
57
57
        self.assertIs(None, self.module._py_unhexlify(as_hex))
58
58
 
59
59
    def test_to_hex(self):
60
 
        raw_bytes = bytes(range(256))
 
60
        raw_bytes = ''.join(map(chr, range(256)))
61
61
        for i in range(0, 240, 20):
62
 
            self.assertHexlify(raw_bytes[i:i + 20])
63
 
        self.assertHexlify(raw_bytes[240:] + raw_bytes[0:4])
 
62
            self.assertHexlify(raw_bytes[i:i+20])
 
63
        self.assertHexlify(raw_bytes[240:]+raw_bytes[0:4])
64
64
 
65
65
    def test_from_hex(self):
66
 
        self.assertUnhexlify(b'0123456789abcdef0123456789abcdef01234567')
67
 
        self.assertUnhexlify(b'123456789abcdef0123456789abcdef012345678')
68
 
        self.assertUnhexlify(b'0123456789ABCDEF0123456789ABCDEF01234567')
69
 
        self.assertUnhexlify(b'123456789ABCDEF0123456789ABCDEF012345678')
70
 
        hex_chars = binascii.hexlify(bytes(range(256)))
 
66
        self.assertUnhexlify('0123456789abcdef0123456789abcdef01234567')
 
67
        self.assertUnhexlify('123456789abcdef0123456789abcdef012345678')
 
68
        self.assertUnhexlify('0123456789ABCDEF0123456789ABCDEF01234567')
 
69
        self.assertUnhexlify('123456789ABCDEF0123456789ABCDEF012345678')
 
70
        hex_chars = binascii.hexlify(''.join(map(chr, range(256))))
71
71
        for i in range(0, 480, 40):
72
 
            self.assertUnhexlify(hex_chars[i:i + 40])
73
 
        self.assertUnhexlify(hex_chars[480:] + hex_chars[0:8])
 
72
            self.assertUnhexlify(hex_chars[i:i+40])
 
73
        self.assertUnhexlify(hex_chars[480:]+hex_chars[0:8])
74
74
 
75
75
    def test_from_invalid_hex(self):
76
 
        self.assertFailUnhexlify(b'123456789012345678901234567890123456789X')
77
 
        self.assertFailUnhexlify(b'12345678901234567890123456789012345678X9')
78
 
 
79
 
    def test_bad_argument(self):
80
 
        self.assertRaises(ValueError, self.module._py_unhexlify, u'1a')
81
 
        self.assertRaises(ValueError, self.module._py_unhexlify, b'1b')
82
 
 
83
 
 
84
 
_hex_form = b'123456789012345678901234567890abcdefabcd'
85
 
 
 
76
        self.assertFailUnhexlify('123456789012345678901234567890123456789X')
 
77
        self.assertFailUnhexlify('12345678901234567890123456789012345678X9')
 
78
 
 
79
 
 
80
_hex_form = '123456789012345678901234567890abcdefabcd'
86
81
 
87
82
class Test_KeyToSha1(TestBtreeSerializer):
88
83
 
100
95
                      % (actual_sha1, expected))
101
96
 
102
97
    def test_simple(self):
103
 
        self.assertKeyToSha1(_hex_form, (b'sha1:' + _hex_form,))
 
98
        self.assertKeyToSha1(_hex_form, ('sha1:' + _hex_form,))
104
99
 
105
100
    def test_invalid_not_tuple(self):
106
101
        self.assertKeyToSha1(None, _hex_form)
107
 
        self.assertKeyToSha1(None, b'sha1:' + _hex_form)
 
102
        self.assertKeyToSha1(None, 'sha1:' + _hex_form)
108
103
 
109
104
    def test_invalid_empty(self):
110
105
        self.assertKeyToSha1(None, ())
115
110
 
116
111
    def test_invalid_not_sha1(self):
117
112
        self.assertKeyToSha1(None, (_hex_form,))
118
 
        self.assertKeyToSha1(None, (b'sha2:' + _hex_form,))
 
113
        self.assertKeyToSha1(None, ('sha2:' + _hex_form,))
119
114
 
120
115
    def test_invalid_not_hex(self):
121
116
        self.assertKeyToSha1(None,
122
 
                             (b'sha1:abcdefghijklmnopqrstuvwxyz12345678901234',))
 
117
            ('sha1:abcdefghijklmnopqrstuvwxyz12345678901234',))
123
118
 
124
119
 
125
120
class Test_Sha1ToKey(TestBtreeSerializer):
127
122
    def assertSha1ToKey(self, hex_sha1):
128
123
        bin_sha1 = binascii.unhexlify(hex_sha1)
129
124
        key = self.module._py_sha1_to_key(bin_sha1)
130
 
        self.assertEqual((b'sha1:' + hex_sha1,), key)
 
125
        self.assertEqual(('sha1:' + hex_sha1,), key)
131
126
 
132
127
    def test_simple(self):
133
128
        self.assertSha1ToKey(_hex_form)
134
129
 
135
130
 
136
 
_one_key_content = b"""type=leaf
 
131
_one_key_content = """type=leaf
137
132
sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
138
133
"""
139
134
 
140
 
_large_offsets = b"""type=leaf
 
135
_large_offsets = """type=leaf
141
136
sha1:123456789012345678901234567890abcdefabcd\x00\x0012345678901 1234567890 0 1
142
137
sha1:abcd123456789012345678901234567890abcdef\x00\x002147483648 2147483647 0 1
143
138
sha1:abcdefabcd123456789012345678901234567890\x00\x004294967296 4294967295 4294967294 1
144
139
"""
145
140
 
146
 
_multi_key_content = b"""type=leaf
 
141
_multi_key_content = """type=leaf
147
142
sha1:c80c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
148
143
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
149
144
sha1:c8e240de74fb1ed08fa08d38063f6a6a91462a81\x00\x002 2 2 2
154
149
sha1:cf7a9e24777ec23212c54d7a350bc5bea5477fdb\x00\x007 7 7 7
155
150
"""
156
151
 
157
 
_multi_key_same_offset = b"""type=leaf
 
152
_multi_key_same_offset = """type=leaf
158
153
sha1:080c881d4a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
159
154
sha1:c86f7e437faa5a7fce15d1ddcb9eaeaea377667b\x00\x001 1 1 1
160
155
sha1:cd0c9035898dd52fc65c41454cec9c4d2611bfb3\x00\x002 2 2 2
165
160
sha1:ce93b4e3c464ffd51732fbd6ded717e9efda28aa\x00\x007 7 7 7
166
161
"""
167
162
 
168
 
_common_32_bits = b"""type=leaf
 
163
_common_32_bits = """type=leaf
169
164
sha1:123456784a26984ddce795f6f71817c9cf4480e7\x00\x000 0 0 0
170
165
sha1:1234567874fb1ed08fa08d38063f6a6a91462a81\x00\x001 1 1 1
171
166
sha1:12345678777ec23212c54d7a350bc5bea5477fdb\x00\x002 2 2 2
179
174
 
180
175
class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
181
176
 
182
 
    def assertInvalid(self, data):
 
177
    def assertInvalid(self, bytes):
183
178
        """Ensure that we get a proper error when trying to parse invalid bytes.
184
179
 
185
180
        (mostly this is testing that bad input doesn't cause us to segfault)
186
181
        """
187
 
        self.assertRaises(
188
 
            (ValueError, TypeError), self.module._parse_into_chk, data, 1, 0)
 
182
        self.assertRaises((ValueError, TypeError), 
 
183
                          self.module._parse_into_chk, bytes, 1, 0)
189
184
 
190
 
    def test_non_bytes(self):
 
185
    def test_non_str(self):
191
186
        self.assertInvalid(u'type=leaf\n')
192
187
 
193
188
    def test_not_leaf(self):
194
 
        self.assertInvalid(b'type=internal\n')
 
189
        self.assertInvalid('type=internal\n')
195
190
 
196
191
    def test_empty_leaf(self):
197
 
        leaf = self.module._parse_into_chk(b'type=leaf\n', 1, 0)
 
192
        leaf = self.module._parse_into_chk('type=leaf\n', 1, 0)
198
193
        self.assertEqual(0, len(leaf))
199
194
        self.assertEqual([], leaf.all_items())
200
195
        self.assertEqual([], leaf.all_keys())
204
199
    def test_one_key_leaf(self):
205
200
        leaf = self.module._parse_into_chk(_one_key_content, 1, 0)
206
201
        self.assertEqual(1, len(leaf))
207
 
        sha_key = (b'sha1:' + _hex_form,)
 
202
        sha_key = ('sha1:' + _hex_form,)
208
203
        self.assertEqual([sha_key], leaf.all_keys())
209
 
        self.assertEqual([(sha_key, (b'1 2 3 4', ()))], leaf.all_items())
 
204
        self.assertEqual([(sha_key, ('1 2 3 4', ()))], leaf.all_items())
210
205
        self.assertTrue(sha_key in leaf)
211
206
 
212
207
    def test_large_offsets(self):
213
208
        leaf = self.module._parse_into_chk(_large_offsets, 1, 0)
214
 
        self.assertEqual([b'12345678901 1234567890 0 1',
215
 
                          b'2147483648 2147483647 0 1',
216
 
                          b'4294967296 4294967295 4294967294 1',
217
 
                          ], [x[1][0] for x in leaf.all_items()])
 
209
        self.assertEqual(['12345678901 1234567890 0 1',
 
210
                          '2147483648 2147483647 0 1',
 
211
                          '4294967296 4294967295 4294967294 1',
 
212
                         ], [x[1][0] for x in leaf.all_items()])
218
213
 
219
214
    def test_many_key_leaf(self):
220
215
        leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
222
217
        all_keys = leaf.all_keys()
223
218
        self.assertEqual(8, len(leaf.all_keys()))
224
219
        for idx, key in enumerate(all_keys):
225
 
            self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
 
220
            self.assertEqual(str(idx), leaf[key][0].split()[0])
226
221
 
227
222
    def test_common_shift(self):
228
223
        # The keys were deliberately chosen so that the first 5 bits all
240
235
        for idx, val in enumerate(lst):
241
236
            self.assertEqual(idx, offsets[val])
242
237
        for idx, key in enumerate(leaf.all_keys()):
243
 
            self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
 
238
            self.assertEqual(str(idx), leaf[key][0].split()[0])
244
239
 
245
240
    def test_multi_key_same_offset(self):
246
241
        # there is no common prefix, though there are some common bits
254
249
        for val in lst:
255
250
            self.assertEqual(lst.index(val), offsets[val])
256
251
        for idx, key in enumerate(leaf.all_keys()):
257
 
            self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
 
252
            self.assertEqual(str(idx), leaf[key][0].split()[0])
258
253
 
259
254
    def test_all_common_prefix(self):
260
255
        # The first 32 bits of all hashes are the same. This is going to be
268
263
        for val in lst:
269
264
            self.assertEqual(lst.index(val), offsets[val])
270
265
        for idx, key in enumerate(leaf.all_keys()):
271
 
            self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
 
266
            self.assertEqual(str(idx), leaf[key][0].split()[0])
272
267
 
273
268
    def test_many_entries(self):
274
269
        # Again, this is almost impossible, but we should still work
275
270
        # It would be hard to fit more that 120 entries in a 4k page, much less
276
271
        # more than 256 of them. but hey, weird stuff happens sometimes
277
 
        lines = [b'type=leaf\n']
 
272
        lines = ['type=leaf\n']
278
273
        for i in range(500):
279
 
            key_str = b'sha1:%04x%s' % (i, _hex_form[:36])
 
274
            key_str = 'sha1:%04x%s' % (i, _hex_form[:36])
280
275
            key = (key_str,)
281
 
            lines.append(b'%s\0\0%d %d %d %d\n' % (key_str, i, i, i, i))
282
 
        data = b''.join(lines)
283
 
        leaf = self.module._parse_into_chk(data, 1, 0)
284
 
        self.assertEqual(24 - 7, leaf.common_shift)
 
276
            lines.append('%s\0\0%d %d %d %d\n' % (key_str, i, i, i, i))
 
277
        bytes = ''.join(lines)
 
278
        leaf = self.module._parse_into_chk(bytes, 1, 0)
 
279
        self.assertEqual(24-7, leaf.common_shift)
285
280
        offsets = leaf._get_offsets()
286
281
        # This is the interesting bits for each entry
287
282
        lst = [x // 2 for x in range(500)]
288
 
        expected_offsets = [x * 2 for x in range(128)] + [255] * 129
 
283
        expected_offsets = [x * 2 for x in range(128)] + [255]*129
289
284
        self.assertEqual(expected_offsets, offsets)
290
285
        # We truncate because offsets is an unsigned char. So the bisection
291
286
        # will just say 'greater than the last one' for all the rest
295
290
        for val in lst:
296
291
            self.assertEqual(lst.index(val), offsets[val])
297
292
        for idx, key in enumerate(leaf.all_keys()):
298
 
            self.assertEqual(b'%d' % idx, leaf[key][0].split()[0])
 
293
            self.assertEqual(str(idx), leaf[key][0].split()[0])
299
294
 
300
295
    def test__sizeof__(self):
301
296
        # We can't use the exact numbers because of platform variations, etc.
302
297
        # But what we really care about is that it does get bigger with more
303
298
        # content.
304
 
        leaf0 = self.module._parse_into_chk(b'type=leaf\n', 1, 0)
 
299
        leaf0 = self.module._parse_into_chk('type=leaf\n', 1, 0)
305
300
        leaf1 = self.module._parse_into_chk(_one_key_content, 1, 0)
306
301
        leafN = self.module._parse_into_chk(_multi_key_content, 1, 0)
307
302
        sizeof_1 = leaf1.__sizeof__() - leaf0.__sizeof__()