/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2694.5.1 by Alexander Belchenko
pyrex bencode (without benchmarks)
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Pyrex implementation for bencode coder/decoder"""
18
19
20
cdef extern from "Python.h":
21
    ctypedef int  Py_ssize_t
22
    object Py_BuildValue(char *format, ...)
23
    int PyInt_CheckExact(object o)
24
    int PyLong_CheckExact(object o)
25
    int PyString_CheckExact(object o)
26
    int PyTuple_CheckExact(object o)
27
    int PyList_CheckExact(object o)
28
    int PyDict_CheckExact(object o)
29
    int PyBool_Check(object o)
30
    object PyString_FromStringAndSize(char *v, Py_ssize_t len)
31
    int PyString_AsStringAndSize(object o, char **buffer, Py_ssize_t *length)
32
    long PyInt_GetMax()
33
    object PyLong_FromString(char *str, char **pend, int base)
34
35
cdef extern from "stddef.h":
36
    ctypedef unsigned int size_t
37
38
cdef extern from "stdlib.h":
39
    void free(void *memblock)
40
    void *malloc(size_t size)
41
    void *realloc(void *memblock, size_t size)
42
43
cdef extern from "string.h":
44
    void *memcpy(void *dest, void *src, size_t count)
45
46
cdef extern from "_bencode_c.h":
47
    int snprintf(char* buffer, size_t nsize, char* fmt, ...)
48
49
50
class NotEnoughMemory(Exception):
51
    """Memory allocation error"""
52
    pass
53
54
55
cdef enum:  # Codes for used characters
56
    MINUS   = 0x2D      # ord(-)
57
    CHAR_0  = 0x30      # ord(0)
58
    CHAR_1  = 0x31      # ord(1)
59
    CHAR_2  = 0x32      # ord(2)
60
    CHAR_3  = 0x33      # ord(3)
61
    CHAR_4  = 0x34      # ord(4)
62
    CHAR_5  = 0x35      # ord(5)
63
    CHAR_6  = 0x36      # ord(6)
64
    CHAR_7  = 0x37      # ord(7)
65
    CHAR_8  = 0x38      # ord(8)
66
    CHAR_9  = 0x39      # ord(9)
67
    COLON   = 0x3A      # ord(:)
68
    SMALL_D = 0x64      # ord(d)
69
    SMALL_E = 0x65      # ord(e)
70
    SMALL_I = 0x69      # ord(i)
71
    SMALL_L = 0x6c      # ord(l)
72
73
74
cdef class Decoder:
75
    """Bencode decoder"""
76
77
    cdef readonly object __s
78
    cdef readonly char *tail
79
    cdef readonly int   size
80
81
    cdef readonly long   _MAXINT
82
    cdef readonly int    _MAXN
83
    cdef readonly object _longint
84
85
    def __init__(self, s):
86
        """Initialize decoder engine.
87
        @param  s:  Python string.
88
        """
89
        cdef Py_ssize_t k
90
        cdef char *pstr
91
92
        if not PyString_CheckExact(s):
93
            raise TypeError
94
95
        PyString_AsStringAndSize(s, &pstr, &k)
96
97
        if pstr == NULL:
98
            raise ValueError
99
100
        self.__s = s
101
        self.tail = pstr
102
        self.size = <int>k
103
104
        self._MAXINT = PyInt_GetMax()
105
        self._MAXN = len(str(self._MAXINT))
106
        self._longint = long(0)
107
108
    def __repr__(self):
109
        return 'Decoder(%s)' % repr(self.__s)
110
111
    def decode(self):
112
        result = self.decode_object()
113
        if self.size != 0:
114
            raise ValueError('junk in stream')
115
        return result
116
117
    def decode_object(self):
118
        cdef char ch
119
120
        if 0 == self.size:
121
            raise ValueError('stream underflow')
122
123
        ch = self.tail[0]
124
125
        if ch == SMALL_I:
126
            self._update_tail(1)
127
            return self._decode_int()
128
        elif CHAR_0 <= ch <= CHAR_9:
129
            return self._decode_string()
130
        elif ch == SMALL_L:
131
            self._update_tail(1)
132
            return self._decode_list()
133
        elif ch == SMALL_D:
134
            self._update_tail(1)
135
            return self._decode_dict()
136
137
        raise ValueError('unknown object')
138
139
    cdef void _update_tail(self, int n):
140
        """Update tail pointer and resulting size by n characters"""
141
        self.size = self.size - n
142
        self.tail = &self.tail[n]
143
144
    cdef object _decode_int(self):
145
        cdef int result
146
        result = self._decode_int_until(SMALL_E)
147
        if result != self._MAXINT:
148
            return result
149
        else:
150
            return self._longint
151
152
    cdef int _decode_int_until(self, char stop_char) except? -1:
153
        """Decode int from stream until stop_char encountered"""
154
        cdef int result
155
        cdef int i, n
156
        cdef int sign
157
        cdef char digit
158
        cdef char *longstr
159
160
        for n from 0 <= n < self.size:
161
            if self.tail[n] == stop_char:
162
                break
163
        else:
164
            raise ValueError
165
166
        sign = 0
167
        if MINUS == self.tail[0]:
168
            sign = 1
169
170
        if n-sign == 0:
171
            raise ValueError    # ie / i-e
172
173
        if self.tail[sign] == CHAR_0:   # special check for zero
174
            if sign:
175
                raise ValueError    # i-0e
176
            if n > 1:
177
                raise ValueError    # i00e / i01e
178
            self._update_tail(n+1)
179
            return 0
180
181
        if n-sign < self._MAXN:
182
            # plain int
183
            result = 0
184
            for i from sign <= i < n:
185
                digit = self.tail[i]
186
                if CHAR_0 <= digit <= CHAR_9:
187
                    result = result * 10 + (digit - CHAR_0)
188
                else:
189
                    raise ValueError
190
            if sign:
191
                result = -result
192
            self._update_tail(n+1)
193
        else:
194
            # long int
195
            result = self._MAXINT
196
            longstr = <char*>malloc(n+1)
197
            if NULL == longstr:
198
                raise NotEnoughMemory
199
            memcpy(longstr, self.tail, n)
200
            longstr[n] = 0
201
            self._longint = PyLong_FromString(longstr, NULL, 10)
202
            free(longstr)
203
            self._update_tail(n+1)
204
205
        return result
206
207
    cdef object _decode_string(self):
208
        cdef int n
209
210
        n = self._decode_int_until(COLON)
211
        if n == 0:
212
            return ''
213
        if n == self._MAXINT:
214
            # strings longer than 1GB is not supported
215
            raise ValueError('too long string')
216
        if n > self.size:
217
            raise ValueError('stream underflow')
218
219
        result = PyString_FromStringAndSize(self.tail, n)
220
        self._update_tail(n)
221
        return result
222
223
    cdef object _decode_list(self):
224
        result = []
225
226
        while self.size > 0:
227
            if self.tail[0] == SMALL_E:
228
                self._update_tail(1)
229
                return result
230
            else:
231
                result.append(self.decode_object())
232
233
        raise ValueError('malformed list')
234
235
    cdef object _decode_dict(self):
236
        cdef char ch
237
238
        result = {}
239
        lastkey = None
240
241
        while self.size > 0:
242
            ch = self.tail[0]
243
            if ch == SMALL_E:
244
                self._update_tail(1)
245
                return result
246
            elif CHAR_0 <= ch <= CHAR_9:
247
                # keys should be strings only
248
                key = self._decode_string()
249
                if lastkey >= key:
250
                    raise ValueError('dict keys disordered')
251
                else:
252
                    lastkey = key
253
                value = self.decode_object()
254
                result[key] = value
255
            else:
256
                raise ValueError('keys in dict should be strings only')
257
258
        raise ValueError('malformed dict')
259
260
261
def bdecode(object s):
262
    """Decode string x to Python object"""
263
    return Decoder(s).decode()
264
265
266
class Bencached(object):
267
    __slots__ = ['bencoded']
268
269
    def __init__(self, s):
270
        self.bencoded = s
271
272
273
cdef enum:
274
    INITSIZE = 1024     # initial size for encoder buffer
275
276
277
cdef class Encoder:
278
    """Bencode encoder"""
279
280
    cdef readonly char *buffer
281
    cdef readonly int   maxsize
282
    cdef readonly char *tail
283
    cdef readonly int   size
284
285
    def __init__(self, int maxsize=INITSIZE):
286
        """Initialize encoder engine
287
        @param  maxsize:    initial size of internal char buffer
288
        """
289
        cdef char *p
290
291
        self.maxsize = 0
292
        self.size = 0
293
        self.tail = NULL
294
295
        p = <char*>malloc(maxsize)
296
        if p == NULL:
297
            raise NotEnoughMemory('Not enough memory to allocate buffer '
298
                                  'for encoder')
299
        self.buffer = p
300
        self.maxsize = maxsize
301
        self.tail = p
302
303
    def __del__(self):
304
        free(self.buffer)
305
        self.buffer = NULL
306
        self.maxsize = 0
307
308
    def __str__(self):
309
        if self.buffer != NULL and self.size != 0:
310
            return PyString_FromStringAndSize(self.buffer, self.size)
311
        else:
312
            return ''
313
314
    cdef int _ensure_buffer(self, int required) except 0:
315
        """Ensure that tail of CharTail buffer has enough size.
316
        If buffer is not big enough then function try to
317
        realloc buffer.
318
        """
319
        cdef char *new_buffer
320
        cdef int   new_size
321
322
        if self.size + required < self.maxsize:
323
            return 1
324
325
        new_size = self.maxsize
326
        while new_size < self.size + required:
327
            new_size = new_size * 2
328
        new_buffer = <char*>realloc(self.buffer, <size_t>new_size)
329
        if new_buffer == NULL:
330
            raise NotEnoughMemory('Cannot realloc buffer for encoder')
331
332
        self.buffer = new_buffer
333
        self.maxsize = new_size
334
        self.tail = &new_buffer[self.size]
335
        return 1
336
337
    cdef void _update_tail(self, int n):
338
        """Update tail pointer and resulting size by n characters"""
339
        self.size = self.size + n
340
        self.tail = &self.tail[n]
341
342
    cdef int _encode_int(self, int x) except 0:
343
        """Encode int to bencode string iNNNe
344
        @param  x:  value to encode
345
        """
346
        cdef int n
347
        self._ensure_buffer(32)
348
        n = snprintf(self.tail, 32, "i%de", x)
349
        if n < 0:
350
            raise NotEnoughMemory('int %d too big to encode' % x)
351
        self._update_tail(n)
352
        return 1
353
354
    cdef int _encode_long(self, x) except 0:
355
        return self._append_string(''.join(('i', str(x), 'e')))
356
357
    cdef int _append_string(self, s) except 0:
358
        cdef Py_ssize_t k
359
        cdef int n
360
        cdef char *pstr
361
362
        PyString_AsStringAndSize(s, &pstr, &k)
363
        k = (<int>k + 1)
364
        self._ensure_buffer(<int>k)
365
        n = snprintf(self.tail, k, '%s', pstr)
366
        if n < 0:
367
            raise NotEnoughMemory('string %s too big to append' % s)
368
        self._update_tail(n)
369
        return 1
370
371
    cdef int _encode_string(self, x) except 0:
372
        cdef Py_ssize_t k
373
        cdef int n
374
        cdef char *pstr
375
376
        PyString_AsStringAndSize(x, &pstr, &k)
377
        self._ensure_buffer(<int>k+32)
378
        n = snprintf(self.tail, k+32, '%d:%s', <int>k, pstr)
379
        if n < 0:
380
            raise NotEnoughMemory('string %s too big to encode' % x)
381
        self._update_tail(n)
382
        return 1
383
384
    cdef int _encode_list(self, x) except 0:
385
        self._ensure_buffer(2)
386
        self.tail[0] = SMALL_L
387
        self._update_tail(1)
388
389
        for i in x:
390
            self.process(i)
391
392
        self.tail[0] = SMALL_E
393
        self._update_tail(1)
394
        return 1
395
396
    cdef int _encode_dict(self, x) except 0:
397
        self._ensure_buffer(2)
398
        self.tail[0] = SMALL_D
399
        self._update_tail(1)
400
401
        keys = x.keys()
402
        keys.sort()
403
        for k in keys:
404
            if not PyString_CheckExact(k):
405
                raise TypeError('key in dict should be string')
406
            self._encode_string(k)
407
            self.process(x[k])
408
409
        self.tail[0] = SMALL_E
410
        self._update_tail(1)
411
        return 1
412
413
    def process(self, object x):
414
        if PyInt_CheckExact(x):
415
            self._encode_int(x)
416
        elif PyLong_CheckExact(x):
417
            self._encode_long(x)
418
        elif PyString_CheckExact(x):
419
            self._encode_string(x)
420
        elif PyList_CheckExact(x) or PyTuple_CheckExact(x):
421
            self._encode_list(x)
422
        elif PyDict_CheckExact(x):
423
            self._encode_dict(x)
424
        elif PyBool_Check(x):
425
            self._encode_int(int(x))
426
        elif isinstance(x, Bencached):
427
            self._append_string(x.bencoded)
428
        else:
429
            raise TypeError('unsupported type')
430
431
432
def bencode(x):
433
    """Encode Python object x to string"""
434
    encoder = Encoder()
435
    encoder.process(x)
436
    return str(encoder)