1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Pyrex implementation of _read_stanza_*."""
20
cdef extern from "python-compat.h":
23
from cpython.bytes cimport (
25
PyBytes_FromStringAndSize,
29
from cpython.unicode cimport (
32
# Deprecated after PEP 393 changes
34
PyUnicode_FromUnicode,
37
from cpython.list cimport (
40
from cpython.mem cimport (
45
from cpython.version cimport (
49
cdef extern from "Python.h":
50
ctypedef int Py_UNICODE
51
object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
52
int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
54
# GZ 2017-09-11: Not sure why cython unicode module lacks this?
55
object PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
57
# Python 3.3 or later unicode handling
58
char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size)
60
from libc.string cimport (
64
from .rio import Stanza
67
cdef int _valid_tag_char(char c): # cannot_raise
68
return (c == c'_' or c == c'-' or
69
(c >= c'a' and c <= c'z') or
70
(c >= c'A' and c <= c'Z') or
71
(c >= c'0' and c <= c'9'))
78
# GZ 2017-09-11: Encapsulate native string as ascii tag somewhere neater
79
if PY_MAJOR_VERSION >= 3:
80
if not PyUnicode_CheckExact(tag):
82
c_tag = PyUnicode_AsUTF8AndSize(tag, &c_len)
84
if not PyBytes_CheckExact(tag):
86
c_tag = PyBytes_AS_STRING(tag)
87
c_len = PyBytes_GET_SIZE(tag)
90
for i from 0 <= i < c_len:
91
if not _valid_tag_char(c_tag[i]):
96
cdef object _split_first_line_utf8(char *line, int len,
97
char *value, Py_ssize_t *value_len):
99
for i from 0 <= i < len:
101
if line[i+1] != c' ':
102
raise ValueError("invalid tag in line %r" % line)
103
memcpy(value, line+i+2, len-i-2)
104
value_len[0] = len-i-2
105
if PY_MAJOR_VERSION >= 3:
106
return PyUnicode_FromStringAndSize(line, i)
107
return PyBytes_FromStringAndSize(line, i)
108
raise ValueError('tag/value separator not found in line %r' % line)
111
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
112
Py_UNICODE *value, Py_ssize_t *value_len):
114
for i from 0 <= i < len:
116
if line[i+1] != c' ':
117
raise ValueError("invalid tag in line %r" %
118
PyUnicode_FromUnicode(line, len))
119
memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
120
value_len[0] = len-i-2
121
if PY_MAJOR_VERSION >= 3:
122
return PyUnicode_FromUnicode(line, i)
123
return PyUnicode_EncodeASCII(line, i, "strict")
124
raise ValueError("tag/value separator not found in line %r" %
125
PyUnicode_FromUnicode(line, len))
128
def _read_stanza_utf8(line_iter):
130
cdef Py_ssize_t c_len
131
cdef char *accum_value
132
cdef char *new_accum_value
133
cdef Py_ssize_t accum_len, accum_size
138
accum_value = <char *>PyMem_Malloc(accum_size)
139
if accum_value == NULL:
142
for line in line_iter:
145
if not PyBytes_CheckExact(line):
146
raise TypeError("%r is not a plain string" % line)
147
c_line = PyBytes_AS_STRING(line)
148
c_len = PyBytes_GET_SIZE(line)
151
if c_len == 1 and c_line[0] == c"\n":
152
break # end of stanza
153
if accum_len + c_len > accum_size:
154
accum_size = (accum_len + c_len)
155
new_accum_value = <char *>PyMem_Realloc(accum_value, accum_size)
156
if new_accum_value == NULL:
159
accum_value = new_accum_value
160
if c_line[0] == c'\t': # continues previous value
162
raise ValueError('invalid continuation line %r' % line)
163
memcpy(accum_value+accum_len, c_line+1, c_len-1)
164
accum_len = accum_len + c_len-1
165
else: # new tag:value line
168
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
170
tag = _split_first_line_utf8(c_line, c_len, accum_value,
172
if not _valid_tag(tag):
173
raise ValueError("invalid rio tag %r" % (tag,))
174
if tag is not None: # add last tag-value
176
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))
177
return Stanza.from_pairs(pairs)
178
else: # didn't see any content
181
PyMem_Free(accum_value)
184
def _read_stanza_unicode(unicode_iter):
185
cdef Py_UNICODE *c_line
187
cdef Py_UNICODE *accum_value
188
cdef Py_UNICODE *new_accum_value
189
cdef Py_ssize_t accum_len, accum_size
194
accum_value = <Py_UNICODE *>PyMem_Malloc(accum_size*sizeof(Py_UNICODE))
195
if accum_value == NULL:
198
for line in unicode_iter:
201
if not PyUnicode_CheckExact(line):
202
raise TypeError("%r is not a unicode string" % line)
203
c_line = PyUnicode_AS_UNICODE(line)
204
c_len = PyUnicode_GET_SIZE(line)
207
if Py_UNICODE_ISLINEBREAK(c_line[0]):
208
break # end of stanza
209
if accum_len + c_len > accum_size:
210
accum_size = accum_len + c_len
211
new_accum_value = <Py_UNICODE *>PyMem_Realloc(accum_value,
212
accum_size*sizeof(Py_UNICODE))
213
if new_accum_value == NULL:
216
accum_value = new_accum_value
217
if c_line[0] == c'\t': # continues previous value,
219
raise ValueError('invalid continuation line %r' % line)
220
memcpy(&accum_value[accum_len], &c_line[1],
221
(c_len-1)*sizeof(Py_UNICODE))
222
accum_len = accum_len + (c_len-1)
223
else: # new tag:value line
226
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
227
tag = _split_first_line_unicode(c_line, c_len, accum_value,
229
if not _valid_tag(tag):
230
raise ValueError("invalid rio tag %r" % (tag,))
231
if tag is not None: # add last tag-value
233
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
234
return Stanza.from_pairs(pairs)
235
else: # didn't see any content
238
PyMem_Free(accum_value)