1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Pyrex implementation of _read_stanza_*."""
19
from __future__ import absolute_import
22
cdef extern from "python-compat.h":
25
from cpython.bytes cimport (
27
PyBytes_FromStringAndSize,
31
from cpython.unicode cimport (
34
# Deprecated after PEP 393 changes
36
PyUnicode_FromUnicode,
39
from cpython.list cimport (
42
from cpython.mem cimport (
47
from cpython.version cimport (
51
cdef extern from "Python.h":
52
ctypedef int Py_UNICODE
53
object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
54
int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
56
# GZ 2017-09-11: Not sure why cython unicode module lacks this?
57
object PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
59
# Python 3.3 or later unicode handling
60
char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size)
62
from libc.string cimport (
66
from .rio import Stanza
69
cdef int _valid_tag_char(char c): # cannot_raise
70
return (c == c'_' or c == c'-' or
71
(c >= c'a' and c <= c'z') or
72
(c >= c'A' and c <= c'Z') or
73
(c >= c'0' and c <= c'9'))
80
# GZ 2017-09-11: Encapsulate native string as ascii tag somewhere neater
81
if PY_MAJOR_VERSION >= 3:
82
if not PyUnicode_CheckExact(tag):
84
c_tag = PyUnicode_AsUTF8AndSize(tag, &c_len)
86
if not PyBytes_CheckExact(tag):
88
c_tag = PyBytes_AS_STRING(tag)
89
c_len = PyBytes_GET_SIZE(tag)
92
for i from 0 <= i < c_len:
93
if not _valid_tag_char(c_tag[i]):
98
cdef object _split_first_line_utf8(char *line, int len,
99
char *value, Py_ssize_t *value_len):
101
for i from 0 <= i < len:
103
if line[i+1] != c' ':
104
raise ValueError("invalid tag in line %r" % line)
105
memcpy(value, line+i+2, len-i-2)
106
value_len[0] = len-i-2
107
if PY_MAJOR_VERSION >= 3:
108
return PyUnicode_FromStringAndSize(line, i)
109
return PyBytes_FromStringAndSize(line, i)
110
raise ValueError('tag/value separator not found in line %r' % line)
113
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
114
Py_UNICODE *value, Py_ssize_t *value_len):
116
for i from 0 <= i < len:
118
if line[i+1] != c' ':
119
raise ValueError("invalid tag in line %r" %
120
PyUnicode_FromUnicode(line, len))
121
memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
122
value_len[0] = len-i-2
123
if PY_MAJOR_VERSION >= 3:
124
return PyUnicode_FromUnicode(line, i)
125
return PyUnicode_EncodeASCII(line, i, "strict")
126
raise ValueError("tag/value separator not found in line %r" %
127
PyUnicode_FromUnicode(line, len))
130
def _read_stanza_utf8(line_iter):
132
cdef Py_ssize_t c_len
133
cdef char *accum_value
134
cdef char *new_accum_value
135
cdef Py_ssize_t accum_len, accum_size
140
accum_value = <char *>PyMem_Malloc(accum_size)
141
if accum_value == NULL:
144
for line in line_iter:
147
if not PyBytes_CheckExact(line):
148
raise TypeError("%r is not a plain string" % line)
149
c_line = PyBytes_AS_STRING(line)
150
c_len = PyBytes_GET_SIZE(line)
153
if c_len == 1 and c_line[0] == c"\n":
154
break # end of stanza
155
if accum_len + c_len > accum_size:
156
accum_size = (accum_len + c_len)
157
new_accum_value = <char *>PyMem_Realloc(accum_value, accum_size)
158
if new_accum_value == NULL:
161
accum_value = new_accum_value
162
if c_line[0] == c'\t': # continues previous value
164
raise ValueError('invalid continuation line %r' % line)
165
memcpy(accum_value+accum_len, c_line+1, c_len-1)
166
accum_len = accum_len + c_len-1
167
else: # new tag:value line
170
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
172
tag = _split_first_line_utf8(c_line, c_len, accum_value,
174
if not _valid_tag(tag):
175
raise ValueError("invalid rio tag %r" % (tag,))
176
if tag is not None: # add last tag-value
178
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))
179
return Stanza.from_pairs(pairs)
180
else: # didn't see any content
183
PyMem_Free(accum_value)
186
def _read_stanza_unicode(unicode_iter):
187
cdef Py_UNICODE *c_line
189
cdef Py_UNICODE *accum_value
190
cdef Py_UNICODE *new_accum_value
191
cdef Py_ssize_t accum_len, accum_size
196
accum_value = <Py_UNICODE *>PyMem_Malloc(accum_size*sizeof(Py_UNICODE))
197
if accum_value == NULL:
200
for line in unicode_iter:
203
if not PyUnicode_CheckExact(line):
204
raise TypeError("%r is not a unicode string" % line)
205
c_line = PyUnicode_AS_UNICODE(line)
206
c_len = PyUnicode_GET_SIZE(line)
209
if Py_UNICODE_ISLINEBREAK(c_line[0]):
210
break # end of stanza
211
if accum_len + c_len > accum_size:
212
accum_size = accum_len + c_len
213
new_accum_value = <Py_UNICODE *>PyMem_Realloc(accum_value,
214
accum_size*sizeof(Py_UNICODE))
215
if new_accum_value == NULL:
218
accum_value = new_accum_value
219
if c_line[0] == c'\t': # continues previous value,
221
raise ValueError('invalid continuation line %r' % line)
222
memcpy(&accum_value[accum_len], &c_line[1],
223
(c_len-1)*sizeof(Py_UNICODE))
224
accum_len = accum_len + (c_len-1)
225
else: # new tag:value line
228
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
229
tag = _split_first_line_unicode(c_line, c_len, accum_value,
231
if not _valid_tag(tag):
232
raise ValueError("invalid rio tag %r" % (tag,))
233
if tag is not None: # add last tag-value
235
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
236
return Stanza.from_pairs(pairs)
237
else: # didn't see any content
240
PyMem_Free(accum_value)