1
# Copyright (C) 2007, 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Pyrex implementation for bencode coder/decoder"""
19
from __future__ import absolute_import
21
from cpython.bytes cimport (
23
PyBytes_FromStringAndSize,
27
from cpython.long cimport (
30
from cpython.int cimport (
34
from cpython.tuple cimport (
37
from cpython.list cimport (
41
from cpython.dict cimport (
44
from cpython.bool cimport (
47
from cpython.mem cimport (
53
from libc.stdlib cimport (
56
from libc.string cimport (
60
cdef extern from "Python.h":
61
# There is no cython module for ceval.h for some reason
62
int Py_GetRecursionLimit()
63
int Py_EnterRecursiveCall(char *)
64
void Py_LeaveRecursiveCall()
66
cdef extern from "python-compat.h":
67
int snprintf(char* buffer, size_t nsize, char* fmt, ...)
72
cdef extern from "_bencode_pyx.h":
73
void D_UPDATE_TAIL(Decoder, int n)
74
void E_UPDATE_TAIL(Encoder, int n)
76
from ._static_tuple_c cimport StaticTuple, StaticTuple_CheckExact, \
79
import_static_tuple_c()
85
cdef readonly char *tail
86
cdef readonly int size
87
cdef readonly int _yield_tuples
90
def __init__(self, s, yield_tuples=0):
91
"""Initialize decoder engine.
92
@param s: Python string.
94
if not PyBytes_CheckExact(s):
95
raise TypeError("bytes required")
98
self.tail = PyBytes_AS_STRING(s)
99
self.size = PyBytes_GET_SIZE(s)
100
self._yield_tuples = int(yield_tuples)
103
result = self._decode_object()
105
raise ValueError('junk in stream')
108
def decode_object(self):
109
return self._decode_object()
111
cdef object _decode_object(self):
115
raise ValueError('stream underflow')
117
if Py_EnterRecursiveCall("_decode_object"):
118
raise RuntimeError("too deeply nested")
121
if c'0' <= ch <= c'9':
122
return self._decode_string()
124
D_UPDATE_TAIL(self, 1)
125
return self._decode_list()
127
D_UPDATE_TAIL(self, 1)
128
return self._decode_int()
130
D_UPDATE_TAIL(self, 1)
131
return self._decode_dict()
133
raise ValueError('unknown object type identifier %r' % ch)
135
Py_LeaveRecursiveCall()
137
cdef int _read_digits(self, char stop_char) except -1:
140
while ((self.tail[i] >= c'0' and self.tail[i] <= c'9') or
141
self.tail[i] == c'-') and i < self.size:
144
if self.tail[i] != stop_char:
145
raise ValueError("Stop character %c not found: %c" %
146
(stop_char, self.tail[i]))
147
if (self.tail[0] == c'0' or
148
(self.tail[0] == c'-' and self.tail[1] == c'0')):
152
raise ValueError # leading zeroes are not allowed
155
cdef object _decode_int(self):
157
i = self._read_digits(c'e')
160
ret = PyInt_FromString(self.tail, NULL, 10)
163
D_UPDATE_TAIL(self, i+1)
166
cdef object _decode_string(self):
169
# strtol allows leading whitespace, negatives, and leading zeros
170
# however, all callers have already checked that '0' <= tail[0] <= '9'
171
# or they wouldn't have called _decode_string
172
# strtol will stop at trailing whitespace, etc
173
n = strtol(self.tail, &next_tail, 10)
174
if next_tail == NULL or next_tail[0] != c':':
175
raise ValueError('string len not terminated by ":"')
176
# strtol allows leading zeros, so validate that we don't have that
177
if (self.tail[0] == c'0'
178
and (n != 0 or (next_tail - self.tail != 1))):
179
raise ValueError('leading zeros are not allowed')
180
D_UPDATE_TAIL(self, next_tail - self.tail + 1)
184
raise ValueError('stream underflow')
186
raise ValueError('string size below zero: %d' % n)
188
result = PyBytes_FromStringAndSize(self.tail, n)
189
D_UPDATE_TAIL(self, n)
192
cdef object _decode_list(self):
196
if self.tail[0] == c'e':
197
D_UPDATE_TAIL(self, 1)
198
if self._yield_tuples:
203
# As a quick shortcut, check to see if the next object is a
204
# string, since we know that won't be creating recursion
205
# if self.tail[0] >= c'0' and self.tail[0] <= c'9':
206
PyList_Append(result, self._decode_object())
208
raise ValueError('malformed list')
210
cdef object _decode_dict(self):
219
D_UPDATE_TAIL(self, 1)
222
# keys should be strings only
223
if self.tail[0] < c'0' or self.tail[0] > c'9':
224
raise ValueError('key was not a simple string.')
225
key = self._decode_string()
226
if lastkey is not None and lastkey >= key:
227
raise ValueError('dict keys disordered')
230
value = self._decode_object()
233
raise ValueError('malformed dict')
236
def bdecode(object s):
237
"""Decode string x to Python object"""
238
return Decoder(s).decode()
241
def bdecode_as_tuple(object s):
242
"""Decode string x to Python object, using tuples rather than lists."""
243
return Decoder(s, True).decode()
246
class Bencached(object):
247
__slots__ = ['bencoded']
249
def __init__(self, s):
254
INITSIZE = 1024 # initial size for encoder buffer
259
"""Bencode encoder"""
261
cdef readonly char *tail
262
cdef readonly int size
263
cdef readonly char *buffer
264
cdef readonly int maxsize
266
def __init__(self, int maxsize=INITSIZE):
267
"""Initialize encoder engine
268
@param maxsize: initial size of internal char buffer
276
p = <char*>PyMem_Malloc(maxsize)
278
raise MemoryError('Not enough memory to allocate buffer '
281
self.maxsize = maxsize
284
def __dealloc__(self):
285
PyMem_Free(self.buffer)
290
if self.buffer != NULL and self.size != 0:
291
return PyBytes_FromStringAndSize(self.buffer, self.size)
294
cdef int _ensure_buffer(self, int required) except 0:
295
"""Ensure that tail of CharTail buffer has enough size.
296
If buffer is not big enough then function try to
299
cdef char *new_buffer
302
if self.size + required < self.maxsize:
305
new_size = self.maxsize
306
while new_size < self.size + required:
307
new_size = new_size * 2
308
new_buffer = <char*>PyMem_Realloc(self.buffer, <size_t>new_size)
309
if new_buffer == NULL:
310
raise MemoryError('Cannot realloc buffer for encoder')
312
self.buffer = new_buffer
313
self.maxsize = new_size
314
self.tail = &new_buffer[self.size]
317
cdef int _encode_int(self, int x) except 0:
318
"""Encode int to bencode string iNNNe
319
@param x: value to encode
322
self._ensure_buffer(INT_BUF_SIZE)
323
n = snprintf(self.tail, INT_BUF_SIZE, b"i%de", x)
325
raise MemoryError('int %d too big to encode' % x)
326
E_UPDATE_TAIL(self, n)
329
cdef int _encode_long(self, x) except 0:
330
return self._append_string(b'i%de' % x)
332
cdef int _append_string(self, s) except 0:
334
n = PyBytes_GET_SIZE(s)
335
self._ensure_buffer(n)
336
memcpy(self.tail, PyBytes_AS_STRING(s), n)
337
E_UPDATE_TAIL(self, n)
340
cdef int _encode_string(self, x) except 0:
342
cdef Py_ssize_t x_len
343
x_len = PyBytes_GET_SIZE(x)
344
self._ensure_buffer(x_len + INT_BUF_SIZE)
345
n = snprintf(self.tail, INT_BUF_SIZE, b'%d:', x_len)
347
raise MemoryError('string %s too big to encode' % x)
348
memcpy(<void *>(self.tail+n), PyBytes_AS_STRING(x), x_len)
349
E_UPDATE_TAIL(self, n + x_len)
352
cdef int _encode_list(self, x) except 0:
353
self._ensure_buffer(1)
355
E_UPDATE_TAIL(self, 1)
360
self._ensure_buffer(1)
362
E_UPDATE_TAIL(self, 1)
365
cdef int _encode_dict(self, x) except 0:
366
self._ensure_buffer(1)
368
E_UPDATE_TAIL(self, 1)
371
if not PyBytes_CheckExact(k):
372
raise TypeError('key in dict should be string')
373
self._encode_string(k)
376
self._ensure_buffer(1)
378
E_UPDATE_TAIL(self, 1)
381
def process(self, object x):
382
if Py_EnterRecursiveCall("encode"):
383
raise RuntimeError("too deeply nested")
385
if PyBytes_CheckExact(x):
386
self._encode_string(x)
387
elif PyInt_CheckExact(x) and x.bit_length() < 32:
389
elif PyLong_CheckExact(x):
391
elif (PyList_CheckExact(x) or PyTuple_CheckExact(x)
392
or isinstance(x, StaticTuple)):
394
elif PyDict_CheckExact(x):
396
elif PyBool_Check(x):
397
self._encode_int(int(x))
398
elif isinstance(x, Bencached):
399
self._append_string(x.bencoded)
401
raise TypeError('unsupported type %r' % x)
403
Py_LeaveRecursiveCall()
407
"""Encode Python object x to string"""
410
return encoder.to_bytes()