/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2694.5.8 by Jelmer Vernooij
Use standard infrastructure for testing python and pyrex bencode implementations.
1
# Copyright (C) 2007,2009 Canonical Ltd
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Pyrex implementation for bencode coder/decoder"""
18
19
20
cdef extern from "Python.h":
21
    ctypedef int  Py_ssize_t
22
    int PyInt_CheckExact(object o)
23
    int PyLong_CheckExact(object o)
24
    int PyString_CheckExact(object o)
25
    int PyTuple_CheckExact(object o)
26
    int PyList_CheckExact(object o)
27
    int PyDict_CheckExact(object o)
28
    int PyBool_Check(object o)
29
    object PyString_FromStringAndSize(char *v, Py_ssize_t len)
2694.5.12 by Jelmer Vernooij
Avoid using snprintf for strings, use memcpy instead.
30
    char *PyString_AS_STRING(object o) except NULL
31
    Py_ssize_t PyString_GET_SIZE(object o) except -1
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
32
    long PyInt_GetMax()
33
    object PyLong_FromString(char *str, char **pend, int base)
34
35
cdef extern from "stddef.h":
36
    ctypedef unsigned int size_t
37
38
cdef extern from "stdlib.h":
39
    void free(void *memblock)
40
    void *malloc(size_t size)
41
    void *realloc(void *memblock, size_t size)
42
43
cdef extern from "string.h":
44
    void *memcpy(void *dest, void *src, size_t count)
45
2694.5.11 by Jelmer Vernooij
Use global python-compat.
46
cdef extern from "python-compat.h":
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
47
    int snprintf(char* buffer, size_t nsize, char* fmt, ...)
48
49
50
cdef class Decoder:
51
    """Bencode decoder"""
52
53
    cdef readonly char *tail
54
    cdef readonly int   size
55
56
    cdef readonly long   _MAXINT
57
    cdef readonly int    _MAXN
58
    cdef readonly object _longint
2694.5.5 by Jelmer Vernooij
Support bdecode_as_tuple.
59
    cdef readonly int    _yield_tuples
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
60
2694.5.5 by Jelmer Vernooij
Support bdecode_as_tuple.
61
    def __init__(self, s, yield_tuples=0):
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
62
        """Initialize decoder engine.
63
        @param  s:  Python string.
64
        """
65
        if not PyString_CheckExact(s):
2694.5.9 by Jelmer Vernooij
Fix tests.
66
            raise TypeError("String required")
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
67
2694.5.12 by Jelmer Vernooij
Avoid using snprintf for strings, use memcpy instead.
68
        self.tail = PyString_AS_STRING(s)
69
        self.size = PyString_GET_SIZE(s)
2694.5.5 by Jelmer Vernooij
Support bdecode_as_tuple.
70
        self._yield_tuples = int(yield_tuples)
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
71
72
        self._MAXINT = PyInt_GetMax()
73
        self._MAXN = len(str(self._MAXINT))
74
        self._longint = long(0)
75
76
    def decode(self):
77
        result = self.decode_object()
78
        if self.size != 0:
79
            raise ValueError('junk in stream')
80
        return result
81
82
    def decode_object(self):
83
        cdef char ch
84
85
        if 0 == self.size:
86
            raise ValueError('stream underflow')
87
88
        ch = self.tail[0]
89
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
90
        if ch == c'i':
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
91
            self._update_tail(1)
92
            return self._decode_int()
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
93
        elif c'0' <= ch <= c'9':
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
94
            return self._decode_string()
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
95
        elif ch == c'l':
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
96
            self._update_tail(1)
97
            return self._decode_list()
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
98
        elif ch == c'd':
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
99
            self._update_tail(1)
100
            return self._decode_dict()
2694.5.9 by Jelmer Vernooij
Fix tests.
101
        else:
102
            raise ValueError('unknown object type identifier %r' % ch)
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
103
104
    cdef void _update_tail(self, int n):
105
        """Update tail pointer and resulting size by n characters"""
106
        self.size = self.size - n
107
        self.tail = &self.tail[n]
108
109
    cdef object _decode_int(self):
110
        cdef int result
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
111
        result = self._decode_int_until(c'e')
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
112
        if result != self._MAXINT:
113
            return result
114
        else:
115
            return self._longint
116
117
    cdef int _decode_int_until(self, char stop_char) except? -1:
118
        """Decode int from stream until stop_char encountered"""
119
        cdef int result
120
        cdef int i, n
121
        cdef int sign
122
        cdef char digit
123
        cdef char *longstr
124
125
        for n from 0 <= n < self.size:
126
            if self.tail[n] == stop_char:
127
                break
128
        else:
129
            raise ValueError
130
131
        sign = 0
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
132
        if c'-' == self.tail[0]:
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
133
            sign = 1
134
135
        if n-sign == 0:
136
            raise ValueError    # ie / i-e
137
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
138
        if self.tail[sign] == c'0':   # special check for zero
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
139
            if sign:
140
                raise ValueError    # i-0e
141
            if n > 1:
142
                raise ValueError    # i00e / i01e
143
            self._update_tail(n+1)
144
            return 0
145
146
        if n-sign < self._MAXN:
147
            # plain int
148
            result = 0
149
            for i from sign <= i < n:
150
                digit = self.tail[i]
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
151
                if c'0' <= digit <= c'9':
152
                    result = result * 10 + (digit - c'0')
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
153
                else:
154
                    raise ValueError
155
            if sign:
156
                result = -result
157
            self._update_tail(n+1)
158
        else:
159
            # long int
160
            result = self._MAXINT
161
            longstr = <char*>malloc(n+1)
162
            if NULL == longstr:
2694.5.6 by Jelmer Vernooij
Use MemoryError rather than custom exception.
163
                raise MemoryError 
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
164
            memcpy(longstr, self.tail, n)
165
            longstr[n] = 0
166
            self._longint = PyLong_FromString(longstr, NULL, 10)
167
            free(longstr)
168
            self._update_tail(n+1)
169
170
        return result
171
172
    cdef object _decode_string(self):
173
        cdef int n
174
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
175
        n = self._decode_int_until(c':')
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
176
        if n == 0:
177
            return ''
178
        if n == self._MAXINT:
179
            # strings longer than 1GB is not supported
180
            raise ValueError('too long string')
181
        if n > self.size:
182
            raise ValueError('stream underflow')
2694.5.13 by Jelmer Vernooij
Always checks for strings first since they're more common, make sure sizes of strings are never below zero.
183
        if n < 0:
184
            raise ValueError('string size below zero: %d' % n)
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
185
186
        result = PyString_FromStringAndSize(self.tail, n)
187
        self._update_tail(n)
188
        return result
189
190
    cdef object _decode_list(self):
191
        result = []
192
193
        while self.size > 0:
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
194
            if self.tail[0] == c'e':
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
195
                self._update_tail(1)
2694.5.5 by Jelmer Vernooij
Support bdecode_as_tuple.
196
                if self._yield_tuples:
197
                    return tuple(result)
198
                else:
199
                    return result
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
200
            else:
201
                result.append(self.decode_object())
202
203
        raise ValueError('malformed list')
204
205
    cdef object _decode_dict(self):
206
        cdef char ch
207
208
        result = {}
209
        lastkey = None
210
211
        while self.size > 0:
212
            ch = self.tail[0]
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
213
            if ch == c'e':
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
214
                self._update_tail(1)
215
                return result
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
216
            elif c'0' <= ch <= c'9':
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
217
                # keys should be strings only
218
                key = self._decode_string()
219
                if lastkey >= key:
220
                    raise ValueError('dict keys disordered')
221
                else:
222
                    lastkey = key
223
                value = self.decode_object()
224
                result[key] = value
225
            else:
226
                raise ValueError('keys in dict should be strings only')
227
228
        raise ValueError('malformed dict')
229
230
231
def bdecode(object s):
232
    """Decode string x to Python object"""
233
    return Decoder(s).decode()
234
235
2694.5.5 by Jelmer Vernooij
Support bdecode_as_tuple.
236
def bdecode_as_tuple(object s):
237
    """Decode string x to Python object, using tuples rather than lists."""
238
    return Decoder(s, True).decode()
239
240
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
241
class Bencached(object):
242
    __slots__ = ['bencoded']
243
244
    def __init__(self, s):
245
        self.bencoded = s
246
247
248
cdef enum:
249
    INITSIZE = 1024     # initial size for encoder buffer
250
251
252
cdef class Encoder:
253
    """Bencode encoder"""
254
255
    cdef readonly char *buffer
256
    cdef readonly int   maxsize
257
    cdef readonly char *tail
258
    cdef readonly int   size
259
260
    def __init__(self, int maxsize=INITSIZE):
261
        """Initialize encoder engine
262
        @param  maxsize:    initial size of internal char buffer
263
        """
264
        cdef char *p
265
266
        self.maxsize = 0
267
        self.size = 0
268
        self.tail = NULL
269
270
        p = <char*>malloc(maxsize)
271
        if p == NULL:
2694.5.6 by Jelmer Vernooij
Use MemoryError rather than custom exception.
272
            raise MemoryError('Not enough memory to allocate buffer '
273
                              'for encoder')
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
274
        self.buffer = p
275
        self.maxsize = maxsize
276
        self.tail = p
277
278
    def __del__(self):
279
        free(self.buffer)
280
        self.buffer = NULL
281
        self.maxsize = 0
282
283
    def __str__(self):
284
        if self.buffer != NULL and self.size != 0:
285
            return PyString_FromStringAndSize(self.buffer, self.size)
286
        else:
287
            return ''
288
289
    cdef int _ensure_buffer(self, int required) except 0:
290
        """Ensure that tail of CharTail buffer has enough size.
291
        If buffer is not big enough then function try to
292
        realloc buffer.
293
        """
294
        cdef char *new_buffer
295
        cdef int   new_size
296
297
        if self.size + required < self.maxsize:
298
            return 1
299
300
        new_size = self.maxsize
301
        while new_size < self.size + required:
302
            new_size = new_size * 2
303
        new_buffer = <char*>realloc(self.buffer, <size_t>new_size)
304
        if new_buffer == NULL:
2694.5.6 by Jelmer Vernooij
Use MemoryError rather than custom exception.
305
            raise MemoryError('Cannot realloc buffer for encoder')
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
306
307
        self.buffer = new_buffer
308
        self.maxsize = new_size
309
        self.tail = &new_buffer[self.size]
310
        return 1
311
312
    cdef void _update_tail(self, int n):
313
        """Update tail pointer and resulting size by n characters"""
314
        self.size = self.size + n
315
        self.tail = &self.tail[n]
316
317
    cdef int _encode_int(self, int x) except 0:
318
        """Encode int to bencode string iNNNe
319
        @param  x:  value to encode
320
        """
321
        cdef int n
322
        self._ensure_buffer(32)
323
        n = snprintf(self.tail, 32, "i%de", x)
324
        if n < 0:
2694.5.6 by Jelmer Vernooij
Use MemoryError rather than custom exception.
325
            raise MemoryError('int %d too big to encode' % x)
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
326
        self._update_tail(n)
327
        return 1
328
329
    cdef int _encode_long(self, x) except 0:
330
        return self._append_string(''.join(('i', str(x), 'e')))
331
332
    cdef int _append_string(self, s) except 0:
2694.5.12 by Jelmer Vernooij
Avoid using snprintf for strings, use memcpy instead.
333
        self._ensure_buffer(PyString_GET_SIZE(s))
334
        memcpy(self.tail, PyString_AS_STRING(s), PyString_GET_SIZE(s))
335
        self._update_tail(PyString_GET_SIZE(s))
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
336
        return 1
337
338
    cdef int _encode_string(self, x) except 0:
339
        cdef int n
2694.5.12 by Jelmer Vernooij
Avoid using snprintf for strings, use memcpy instead.
340
        self._ensure_buffer(PyString_GET_SIZE(x) + 32)
341
        n = snprintf(self.tail, 32, '%d:', PyString_GET_SIZE(x))
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
342
        if n < 0:
2694.5.6 by Jelmer Vernooij
Use MemoryError rather than custom exception.
343
            raise MemoryError('string %s too big to encode' % x)
2694.5.12 by Jelmer Vernooij
Avoid using snprintf for strings, use memcpy instead.
344
        memcpy(<void *>self.tail+n, PyString_AS_STRING(x), 
345
               PyString_GET_SIZE(x))
346
        self._update_tail(n+PyString_GET_SIZE(x))
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
347
        return 1
348
349
    cdef int _encode_list(self, x) except 0:
350
        self._ensure_buffer(2)
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
351
        self.tail[0] = c'l'
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
352
        self._update_tail(1)
353
354
        for i in x:
355
            self.process(i)
356
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
357
        self.tail[0] = c'e'
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
358
        self._update_tail(1)
359
        return 1
360
361
    cdef int _encode_dict(self, x) except 0:
362
        self._ensure_buffer(2)
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
363
        self.tail[0] = c'd'
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
364
        self._update_tail(1)
365
366
        keys = x.keys()
367
        keys.sort()
368
        for k in keys:
369
            if not PyString_CheckExact(k):
370
                raise TypeError('key in dict should be string')
371
            self._encode_string(k)
372
            self.process(x[k])
373
2694.5.7 by Jelmer Vernooij
use C character constants rather than a custom enum.
374
        self.tail[0] = c'e'
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
375
        self._update_tail(1)
376
        return 1
377
378
    def process(self, object x):
2694.5.13 by Jelmer Vernooij
Always checks for strings first since they're more common, make sure sizes of strings are never below zero.
379
        if PyString_CheckExact(x):
380
            self._encode_string(x)
381
        elif PyInt_CheckExact(x):
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
382
            self._encode_int(x)
383
        elif PyLong_CheckExact(x):
384
            self._encode_long(x)
385
        elif PyList_CheckExact(x) or PyTuple_CheckExact(x):
386
            self._encode_list(x)
387
        elif PyDict_CheckExact(x):
388
            self._encode_dict(x)
389
        elif PyBool_Check(x):
390
            self._encode_int(int(x))
391
        elif isinstance(x, Bencached):
392
            self._append_string(x.bencoded)
393
        else:
2694.5.9 by Jelmer Vernooij
Fix tests.
394
            raise TypeError('unsupported type %r' % x)
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
395
396
397
def bencode(x):
398
    """Encode Python object x to string"""
399
    encoder = Encoder()
400
    encoder.process(x)
401
    return str(encoder)