1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Pyrex implementation of _read_stanza_*."""
19
from __future__ import absolute_import
22
cdef extern from "python-compat.h":
25
cdef extern from "stdlib.h":
27
void *realloc(void *, int)
30
cdef extern from "Python.h":
31
ctypedef int Py_UNICODE
32
char *PyString_AS_STRING(object s)
33
Py_ssize_t PyString_GET_SIZE(object t) except -1
34
object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors)
35
object PyString_FromStringAndSize(char *s, Py_ssize_t len)
36
int PyString_CheckExact(object)
37
int PyUnicode_CheckExact(object)
38
object PyUnicode_Join(object, object)
39
object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
40
Py_UNICODE *PyUnicode_AS_UNICODE(object)
41
Py_UNICODE *PyUnicode_AsUnicode(object)
42
Py_ssize_t PyUnicode_GET_SIZE(object) except -1
43
int PyList_Append(object, object) except -1
44
int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
45
object PyUnicode_FromUnicode(Py_UNICODE *, int)
46
void *Py_UNICODE_COPY(Py_UNICODE *, Py_UNICODE *, int)
48
cdef extern from "string.h":
49
void *memcpy(void *, void *, int)
51
from .rio import Stanza
53
cdef int _valid_tag_char(char c): # cannot_raise
54
return (c == c'_' or c == c'-' or
55
(c >= c'a' and c <= c'z') or
56
(c >= c'A' and c <= c'Z') or
57
(c >= c'0' and c <= c'9'))
64
if not PyString_CheckExact(tag):
66
c_tag = PyString_AS_STRING(tag)
67
c_len = PyString_GET_SIZE(tag)
70
for i from 0 <= i < c_len:
71
if not _valid_tag_char(c_tag[i]):
76
cdef object _split_first_line_utf8(char *line, int len,
77
char *value, Py_ssize_t *value_len):
79
for i from 0 <= i < len:
82
raise ValueError("invalid tag in line %r" % line)
83
memcpy(value, line+i+2, len-i-2)
84
value_len[0] = len-i-2
85
return PyString_FromStringAndSize(line, i)
86
raise ValueError('tag/value separator not found in line %r' % line)
89
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
90
Py_UNICODE *value, Py_ssize_t *value_len):
92
for i from 0 <= i < len:
95
raise ValueError("invalid tag in line %r" %
96
PyUnicode_FromUnicode(line, len))
97
memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
98
value_len[0] = len-i-2
99
return PyUnicode_EncodeASCII(line, i, "strict")
100
raise ValueError("tag/value separator not found in line %r" %
101
PyUnicode_FromUnicode(line, len))
104
def _read_stanza_utf8(line_iter):
106
cdef Py_ssize_t c_len
107
cdef char *accum_value, *new_accum_value
108
cdef Py_ssize_t accum_len, accum_size
113
accum_value = <char *>malloc(accum_size)
114
if accum_value == NULL:
117
for line in line_iter:
120
if not PyString_CheckExact(line):
121
raise TypeError("%r is not a plain string" % line)
122
c_line = PyString_AS_STRING(line)
123
c_len = PyString_GET_SIZE(line)
126
if c_len == 1 and c_line[0] == c"\n":
127
break # end of stanza
128
if accum_len + c_len > accum_size:
129
accum_size = (accum_len + c_len)
130
new_accum_value = <char *>realloc(accum_value, accum_size)
131
if new_accum_value == NULL:
134
accum_value = new_accum_value
135
if c_line[0] == c'\t': # continues previous value
137
raise ValueError('invalid continuation line %r' % line)
138
memcpy(accum_value+accum_len, c_line+1, c_len-1)
139
accum_len = accum_len + c_len-1
140
else: # new tag:value line
143
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
145
tag = _split_first_line_utf8(c_line, c_len, accum_value,
147
if not _valid_tag(tag):
148
raise ValueError("invalid rio tag %r" % (tag,))
149
if tag is not None: # add last tag-value
151
(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))
152
return Stanza.from_pairs(pairs)
153
else: # didn't see any content
159
def _read_stanza_unicode(unicode_iter):
160
cdef Py_UNICODE *c_line
162
cdef Py_UNICODE *accum_value, *new_accum_value
163
cdef Py_ssize_t accum_len, accum_size
168
accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))
169
if accum_value == NULL:
172
for line in unicode_iter:
175
if not PyUnicode_CheckExact(line):
176
raise TypeError("%r is not a unicode string" % line)
177
c_line = PyUnicode_AS_UNICODE(line)
178
c_len = PyUnicode_GET_SIZE(line)
181
if Py_UNICODE_ISLINEBREAK(c_line[0]):
182
break # end of stanza
183
if accum_len + c_len > accum_size:
184
accum_size = accum_len + c_len
185
new_accum_value = <Py_UNICODE *>realloc(accum_value,
186
accum_size*sizeof(Py_UNICODE))
187
if new_accum_value == NULL:
190
accum_value = new_accum_value
191
if c_line[0] == c'\t': # continues previous value,
193
raise ValueError('invalid continuation line %r' % line)
194
memcpy(&accum_value[accum_len], &c_line[1],
195
(c_len-1)*sizeof(Py_UNICODE))
196
accum_len = accum_len + (c_len-1)
197
else: # new tag:value line
200
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
201
tag = _split_first_line_unicode(c_line, c_len, accum_value,
203
if not _valid_tag(tag):
204
raise ValueError("invalid rio tag %r" % (tag,))
205
if tag is not None: # add last tag-value
207
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
208
return Stanza.from_pairs(pairs)
209
else: # didn't see any content