17
17
"""Pyrex implementation of _read_stanza_*."""
20
20
cdef extern from "python-compat.h":
23
cdef extern from "stdlib.h":
25
void *realloc(void *, int)
23
from cpython.bytes cimport (
25
PyBytes_FromStringAndSize,
29
from cpython.unicode cimport (
32
# Deprecated after PEP 393 changes
34
PyUnicode_FromUnicode,
37
from cpython.list cimport (
40
from cpython.mem cimport (
45
from cpython.version cimport (
28
49
cdef extern from "Python.h":
29
ctypedef int Py_ssize_t # Required for older pyrex versions
30
50
ctypedef int Py_UNICODE
31
char *PyString_AS_STRING(object s)
32
Py_ssize_t PyString_GET_SIZE(object t) except -1
33
object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors)
34
object PyString_FromStringAndSize(char *s, Py_ssize_t len)
35
int PyString_CheckExact(object)
36
int PyUnicode_CheckExact(object)
37
object PyUnicode_Join(object, object)
38
51
object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
39
Py_UNICODE *PyUnicode_AS_UNICODE(object)
40
Py_UNICODE *PyUnicode_AsUnicode(object)
41
Py_ssize_t PyUnicode_GET_SIZE(object) except -1
42
int PyList_Append(object, object) except -1
43
52
int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
44
object PyUnicode_FromUnicode(Py_UNICODE *, int)
45
void *Py_UNICODE_COPY(Py_UNICODE *, Py_UNICODE *, int)
47
cdef extern from "string.h":
48
void *memcpy(void *, void *, int)
50
from bzrlib.rio import Stanza
54
# GZ 2017-09-11: Not sure why cython unicode module lacks this?
55
object PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
57
# Python 3.3 or later unicode handling
58
char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size)
60
from libc.string cimport (
64
from .rio import Stanza
52
67
cdef int _valid_tag_char(char c): # cannot_raise
53
return (c == c'_' or c == c'-' or
68
return (c == c'_' or c == c'-' or
54
69
(c >= c'a' and c <= c'z') or
55
70
(c >= c'A' and c <= c'Z') or
56
71
(c >= c'0' and c <= c'9'))
61
76
cdef Py_ssize_t c_len
63
if not PyString_CheckExact(tag):
65
c_tag = PyString_AS_STRING(tag)
66
c_len = PyString_GET_SIZE(tag)
78
# GZ 2017-09-11: Encapsulate native string as ascii tag somewhere neater
79
if PY_MAJOR_VERSION >= 3:
80
if not PyUnicode_CheckExact(tag):
82
c_tag = PyUnicode_AsUTF8AndSize(tag, &c_len)
84
if not PyBytes_CheckExact(tag):
86
c_tag = PyBytes_AS_STRING(tag)
87
c_len = PyBytes_GET_SIZE(tag)
69
90
for i from 0 <= i < c_len:
81
102
raise ValueError("invalid tag in line %r" % line)
82
103
memcpy(value, line+i+2, len-i-2)
83
104
value_len[0] = len-i-2
84
return PyString_FromStringAndSize(line, i)
105
if PY_MAJOR_VERSION >= 3:
106
return PyUnicode_FromStringAndSize(line, i)
107
return PyBytes_FromStringAndSize(line, i)
85
108
raise ValueError('tag/value separator not found in line %r' % line)
88
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
111
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
89
112
Py_UNICODE *value, Py_ssize_t *value_len):
91
114
for i from 0 <= i < len:
95
118
PyUnicode_FromUnicode(line, len))
96
119
memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
97
120
value_len[0] = len-i-2
121
if PY_MAJOR_VERSION >= 3:
122
return PyUnicode_FromUnicode(line, i)
98
123
return PyUnicode_EncodeASCII(line, i, "strict")
99
124
raise ValueError("tag/value separator not found in line %r" %
100
125
PyUnicode_FromUnicode(line, len))
103
128
def _read_stanza_utf8(line_iter):
104
129
cdef char *c_line
105
130
cdef Py_ssize_t c_len
106
cdef char *accum_value, *new_accum_value
131
cdef char *accum_value
132
cdef char *new_accum_value
107
133
cdef Py_ssize_t accum_len, accum_size
111
137
accum_size = 4096
112
accum_value = <char *>malloc(accum_size)
138
accum_value = <char *>PyMem_Malloc(accum_size)
113
139
if accum_value == NULL:
114
140
raise MemoryError
116
142
for line in line_iter:
118
144
break # end of file
119
if not PyString_CheckExact(line):
145
if not PyBytes_CheckExact(line):
120
146
raise TypeError("%r is not a plain string" % line)
121
c_line = PyString_AS_STRING(line)
122
c_len = PyString_GET_SIZE(line)
147
c_line = PyBytes_AS_STRING(line)
148
c_len = PyBytes_GET_SIZE(line)
124
150
break # end of file
125
151
if c_len == 1 and c_line[0] == c"\n":
126
152
break # end of stanza
127
153
if accum_len + c_len > accum_size:
128
154
accum_size = (accum_len + c_len)
129
new_accum_value = <char *>realloc(accum_value, accum_size)
155
new_accum_value = <char *>PyMem_Realloc(accum_value, accum_size)
130
156
if new_accum_value == NULL:
131
157
raise MemoryError
138
164
accum_len = accum_len + c_len-1
139
165
else: # new tag:value line
140
166
if tag is not None:
142
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
168
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
144
tag = _split_first_line_utf8(c_line, c_len, accum_value,
170
tag = _split_first_line_utf8(c_line, c_len, accum_value,
146
172
if not _valid_tag(tag):
147
173
raise ValueError("invalid rio tag %r" % (tag,))
148
174
if tag is not None: # add last tag-value
150
176
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))
151
177
return Stanza.from_pairs(pairs)
152
178
else: # didn't see any content
181
PyMem_Free(accum_value)
158
184
def _read_stanza_unicode(unicode_iter):
159
185
cdef Py_UNICODE *c_line
161
cdef Py_UNICODE *accum_value, *new_accum_value
187
cdef Py_UNICODE *accum_value
188
cdef Py_UNICODE *new_accum_value
162
189
cdef Py_ssize_t accum_len, accum_size
166
193
accum_size = 4096
167
accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))
194
accum_value = <Py_UNICODE *>PyMem_Malloc(accum_size*sizeof(Py_UNICODE))
168
195
if accum_value == NULL:
169
196
raise MemoryError
181
208
break # end of stanza
182
209
if accum_len + c_len > accum_size:
183
210
accum_size = accum_len + c_len
184
new_accum_value = <Py_UNICODE *>realloc(accum_value,
211
new_accum_value = <Py_UNICODE *>PyMem_Realloc(accum_value,
185
212
accum_size*sizeof(Py_UNICODE))
186
213
if new_accum_value == NULL:
187
214
raise MemoryError
195
222
accum_len = accum_len + (c_len-1)
196
223
else: # new tag:value line
197
224
if tag is not None:
199
226
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
200
tag = _split_first_line_unicode(c_line, c_len, accum_value,
227
tag = _split_first_line_unicode(c_line, c_len, accum_value,
202
229
if not _valid_tag(tag):
203
230
raise ValueError("invalid rio tag %r" % (tag,))