1
# Copyright (C) 2007, 2009, 2010 Canonical Ltd
1
# Copyright (C) 2007,2009 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
"""Pyrex implementation for bencode coder/decoder"""
37
37
int Py_EnterRecursiveCall(char *)
38
38
void Py_LeaveRecursiveCall()
40
int PyList_Append(object, object) except -1
42
40
cdef extern from "stdlib.h":
43
41
void free(void *memblock)
44
42
void *malloc(size_t size)
51
49
cdef extern from "python-compat.h":
52
50
int snprintf(char* buffer, size_t nsize, char* fmt, ...)
57
cdef extern from "_bencode_pyx.h":
58
void D_UPDATE_TAIL(Decoder, int n)
59
void E_UPDATE_TAIL(Encoder, int n)
61
# To maintain compatibility with older versions of pyrex, we have to use the
62
# relative import here, rather than 'bzrlib._static_tuple_c'
63
from _static_tuple_c cimport StaticTuple, StaticTuple_CheckExact, \
66
import_static_tuple_c()
69
53
cdef class Decoder:
70
54
"""Bencode decoder"""
87
71
self._yield_tuples = int(yield_tuples)
90
result = self._decode_object()
74
result = self.decode_object()
92
76
raise ValueError('junk in stream')
95
79
def decode_object(self):
96
return self._decode_object()
98
cdef object _decode_object(self):
101
82
if 0 == self.size:
102
83
raise ValueError('stream underflow')
104
if Py_EnterRecursiveCall("_decode_object"):
85
if Py_EnterRecursiveCall("decode_object"):
105
86
raise RuntimeError("too deeply nested")
108
if c'0' <= ch <= c'9':
91
return self._decode_int()
92
elif c'0' <= ch <= c'9':
109
93
return self._decode_string()
111
D_UPDATE_TAIL(self, 1)
112
96
return self._decode_list()
114
D_UPDATE_TAIL(self, 1)
115
return self._decode_int()
117
D_UPDATE_TAIL(self, 1)
118
99
return self._decode_dict()
120
101
raise ValueError('unknown object type identifier %r' % ch)
122
103
Py_LeaveRecursiveCall()
105
cdef void _update_tail(self, int n):
106
"""Update tail pointer and resulting size by n characters"""
107
self.size = self.size - n
108
self.tail = &self.tail[n]
124
110
cdef int _read_digits(self, char stop_char) except -1:
127
while ((self.tail[i] >= c'0' and self.tail[i] <= c'9') or
113
while ((self.tail[i] >= c'0' and self.tail[i] <= c'9') or
128
114
self.tail[i] == c'-') and i < self.size:
147
133
ret = PyInt_FromString(self.tail, NULL, 10)
149
135
self.tail[i] = c'e'
150
D_UPDATE_TAIL(self, i+1)
136
self._update_tail(i+1)
153
139
cdef object _decode_string(self):
156
# strtol allows leading whitespace, negatives, and leading zeros
157
# however, all callers have already checked that '0' <= tail[0] <= '9'
158
# or they wouldn't have called _decode_string
159
# strtol will stop at trailing whitespace, etc
160
n = strtol(self.tail, &next_tail, 10)
161
if next_tail == NULL or next_tail[0] != c':':
162
raise ValueError('string len not terminated by ":"')
163
# strtol allows leading zeros, so validate that we don't have that
164
if (self.tail[0] == c'0'
165
and (n != 0 or (next_tail - self.tail != 1))):
166
raise ValueError('leading zeros are not allowed')
167
D_UPDATE_TAIL(self, next_tail - self.tail + 1)
141
i = self._read_digits(c':')
142
n = strtol(self.tail, NULL, 10)
143
self._update_tail(i+1)
170
146
if n > self.size:
182
158
while self.size > 0:
183
159
if self.tail[0] == c'e':
184
D_UPDATE_TAIL(self, 1)
185
161
if self._yield_tuples:
186
162
return tuple(result)
190
# As a quick shortcut, check to see if the next object is a
191
# string, since we know that won't be creating recursion
192
# if self.tail[0] >= c'0' and self.tail[0] <= c'9':
193
PyList_Append(result, self._decode_object())
166
result.append(self.decode_object())
195
168
raise ValueError('malformed list')
203
176
while self.size > 0:
204
177
ch = self.tail[0]
206
D_UPDATE_TAIL(self, 1)
209
182
# keys should be strings only
210
if self.tail[0] < c'0' or self.tail[0] > c'9':
211
raise ValueError('key was not a simple string.')
212
183
key = self._decode_string()
213
184
if lastkey >= key:
214
185
raise ValueError('dict keys disordered')
217
value = self._decode_object()
188
value = self.decode_object()
218
189
result[key] = value
220
191
raise ValueError('malformed dict')
245
216
cdef class Encoder:
246
217
"""Bencode encoder"""
248
cdef readonly char *tail
249
cdef readonly int size
250
219
cdef readonly char *buffer
251
220
cdef readonly int maxsize
221
cdef readonly char *tail
222
cdef readonly int size
253
224
def __init__(self, int maxsize=INITSIZE):
254
225
"""Initialize encoder engine
302
273
self.tail = &new_buffer[self.size]
276
cdef void _update_tail(self, int n):
277
"""Update tail pointer and resulting size by n characters"""
278
self.size = self.size + n
279
self.tail = &self.tail[n]
305
281
cdef int _encode_int(self, int x) except 0:
306
282
"""Encode int to bencode string iNNNe
307
283
@param x: value to encode
311
287
n = snprintf(self.tail, INT_BUF_SIZE, "i%de", x)
313
289
raise MemoryError('int %d too big to encode' % x)
314
E_UPDATE_TAIL(self, n)
317
293
cdef int _encode_long(self, x) except 0:
318
294
return self._append_string(''.join(('i', str(x), 'e')))
320
296
cdef int _append_string(self, s) except 0:
322
n = PyString_GET_SIZE(s)
323
self._ensure_buffer(n)
324
memcpy(self.tail, PyString_AS_STRING(s), n)
325
E_UPDATE_TAIL(self, n)
297
self._ensure_buffer(PyString_GET_SIZE(s))
298
memcpy(self.tail, PyString_AS_STRING(s), PyString_GET_SIZE(s))
299
self._update_tail(PyString_GET_SIZE(s))
328
302
cdef int _encode_string(self, x) except 0:
330
cdef Py_ssize_t x_len
331
x_len = PyString_GET_SIZE(x)
332
self._ensure_buffer(x_len + INT_BUF_SIZE)
333
n = snprintf(self.tail, INT_BUF_SIZE, '%d:', x_len)
304
self._ensure_buffer(PyString_GET_SIZE(x) + 32)
305
n = snprintf(self.tail, 32, '%d:', PyString_GET_SIZE(x))
335
307
raise MemoryError('string %s too big to encode' % x)
336
memcpy(<void *>(self.tail+n), PyString_AS_STRING(x), x_len)
337
E_UPDATE_TAIL(self, n + x_len)
308
memcpy(<void *>(self.tail+n), PyString_AS_STRING(x),
309
PyString_GET_SIZE(x))
310
self._update_tail(n+PyString_GET_SIZE(x))
340
313
cdef int _encode_list(self, x) except 0:
341
self._ensure_buffer(1)
314
self._ensure_buffer(2)
342
315
self.tail[0] = c'l'
343
E_UPDATE_TAIL(self, 1)
348
self._ensure_buffer(1)
349
321
self.tail[0] = c'e'
350
E_UPDATE_TAIL(self, 1)
353
325
cdef int _encode_dict(self, x) except 0:
354
self._ensure_buffer(1)
326
self._ensure_buffer(2)
355
327
self.tail[0] = c'd'
356
E_UPDATE_TAIL(self, 1)
378
349
self._encode_int(x)
379
350
elif PyLong_CheckExact(x):
380
351
self._encode_long(x)
381
elif (PyList_CheckExact(x) or PyTuple_CheckExact(x)
382
or StaticTuple_CheckExact(x)):
352
elif PyList_CheckExact(x) or PyTuple_CheckExact(x):
383
353
self._encode_list(x)
384
354
elif PyDict_CheckExact(x):
385
355
self._encode_dict(x)