/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7509.1.1 by Jelmer Vernooij
Set cython language level to Python 3.
16
#
17
# cython: language_level=3
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
18
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
19
"""Pyrex implementation of _read_stanza_*."""
20
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
21
4354.3.3 by Jelmer Vernooij
More performance tweaks.
22
cdef extern from "python-compat.h":
23
    pass
24
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
25
from cpython.bytes cimport (
26
    PyBytes_CheckExact,
27
    PyBytes_FromStringAndSize,
28
    PyBytes_AS_STRING,
29
    PyBytes_GET_SIZE,
30
    )
31
from cpython.unicode cimport (
32
    PyUnicode_CheckExact,
33
    PyUnicode_DecodeUTF8,
34
    # Deprecated after PEP 393 changes
35
    PyUnicode_AS_UNICODE,
36
    PyUnicode_FromUnicode,
37
    PyUnicode_GET_SIZE,
38
    )
39
from cpython.list cimport (
40
    PyList_Append,
41
    )
42
from cpython.mem cimport (
43
    PyMem_Free,
44
    PyMem_Malloc,
45
    PyMem_Realloc,
46
    )
47
from cpython.version cimport (
48
    PY_MAJOR_VERSION,
49
    )
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
50
4354.3.3 by Jelmer Vernooij
More performance tweaks.
51
cdef extern from "Python.h":
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
52
    ctypedef int Py_UNICODE
53
    object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
54
    int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
55
56
    # GZ 2017-09-11: Not sure why cython unicode module lacks this?
57
    object PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
58
59
    # Python 3.3 or later unicode handling
60
    char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size)
61
62
from libc.string cimport (
63
    memcpy,
64
    )
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
65
6656.2.2 by Jelmer Vernooij
Use absolute_import.
66
from .rio import Stanza
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
67
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
68
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
69
cdef int _valid_tag_char(char c): # cannot_raise
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
70
    return (c == c'_' or c == c'-' or
4354.3.8 by Jelmer Vernooij
Review feedback from John:
71
            (c >= c'a' and c <= c'z') or
72
            (c >= c'A' and c <= c'Z') or
73
            (c >= c'0' and c <= c'9'))
74
75
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
76
def _valid_tag(tag):
4354.3.3 by Jelmer Vernooij
More performance tweaks.
77
    cdef char *c_tag
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
78
    cdef Py_ssize_t c_len
79
    cdef int i
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
80
    # GZ 2017-09-11: Encapsulate native string as ascii tag somewhere neater
81
    if PY_MAJOR_VERSION >= 3:
82
        if not PyUnicode_CheckExact(tag):
83
            raise TypeError(tag)
84
        c_tag = PyUnicode_AsUTF8AndSize(tag, &c_len)
85
    else:
86
        if not PyBytes_CheckExact(tag):
87
            raise TypeError(tag)
88
        c_tag = PyBytes_AS_STRING(tag)
89
        c_len = PyBytes_GET_SIZE(tag)
4354.3.12 by Jelmer Vernooij
Add tests for _valid_tag.
90
    if c_len < 1:
91
        return False
4354.3.3 by Jelmer Vernooij
More performance tweaks.
92
    for i from 0 <= i < c_len:
4354.3.8 by Jelmer Vernooij
Review feedback from John:
93
        if not _valid_tag_char(c_tag[i]):
4354.3.3 by Jelmer Vernooij
More performance tweaks.
94
            return False
95
    return True
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
96
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
97
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
98
cdef object _split_first_line_utf8(char *line, int len,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
99
                                   char *value, Py_ssize_t *value_len):
4354.3.8 by Jelmer Vernooij
Review feedback from John:
100
    cdef int i
101
    for i from 0 <= i < len:
102
        if line[i] == c':':
103
            if line[i+1] != c' ':
104
                raise ValueError("invalid tag in line %r" % line)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
105
            memcpy(value, line+i+2, len-i-2)
106
            value_len[0] = len-i-2
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
107
            if PY_MAJOR_VERSION >= 3:
108
                return PyUnicode_FromStringAndSize(line, i)
109
            return PyBytes_FromStringAndSize(line, i)
4354.3.8 by Jelmer Vernooij
Review feedback from John:
110
    raise ValueError('tag/value separator not found in line %r' % line)
111
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
112
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
113
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
114
                                      Py_UNICODE *value, Py_ssize_t *value_len):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
115
    cdef int i
116
    for i from 0 <= i < len:
4354.3.14 by Jelmer Vernooij
Review feedback from John.
117
        if line[i] == c':':
118
            if line[i+1] != c' ':
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
119
                raise ValueError("invalid tag in line %r" %
120
                                 PyUnicode_FromUnicode(line, len))
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
121
            memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
122
            value_len[0] = len-i-2
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
123
            if PY_MAJOR_VERSION >= 3:
124
                return PyUnicode_FromUnicode(line, i)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
125
            return PyUnicode_EncodeASCII(line, i, "strict")
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
126
    raise ValueError("tag/value separator not found in line %r" %
127
                     PyUnicode_FromUnicode(line, len))
128
129
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
130
def _read_stanza_utf8(line_iter):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
131
    cdef char *c_line
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
132
    cdef Py_ssize_t c_len
7067.16.1 by Jelmer Vernooij
Fix some C extensions.
133
    cdef char *accum_value
134
    cdef char *new_accum_value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
135
    cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij
More performance tweaks.
136
    pairs = []
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
137
    tag = None
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
138
    accum_len = 0
139
    accum_size = 4096
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
140
    accum_value = <char *>PyMem_Malloc(accum_size)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
141
    if accum_value == NULL:
142
        raise MemoryError
143
    try:
144
        for line in line_iter:
145
            if line is None:
146
                break # end of file
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
147
            if not PyBytes_CheckExact(line):
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
148
                raise TypeError("%r is not a plain string" % line)
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
149
            c_line = PyBytes_AS_STRING(line)
150
            c_len = PyBytes_GET_SIZE(line)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
151
            if c_len < 1:
152
                break       # end of file
153
            if c_len == 1 and c_line[0] == c"\n":
154
                break       # end of stanza
4354.3.14 by Jelmer Vernooij
Review feedback from John.
155
            if accum_len + c_len > accum_size:
156
                accum_size = (accum_len + c_len)
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
157
                new_accum_value = <char *>PyMem_Realloc(accum_value, accum_size)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
158
                if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
159
                    raise MemoryError
4354.3.14 by Jelmer Vernooij
Review feedback from John.
160
                else:
161
                    accum_value = new_accum_value
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
162
            if c_line[0] == c'\t': # continues previous value
163
                if tag is None:
164
                    raise ValueError('invalid continuation line %r' % line)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
165
                memcpy(accum_value+accum_len, c_line+1, c_len-1)
166
                accum_len = accum_len + c_len-1
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
167
            else: # new tag:value line
168
                if tag is not None:
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
169
                    PyList_Append(pairs,
170
                        (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
171
                                                   "strict")))
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
172
                tag = _split_first_line_utf8(c_line, c_len, accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
173
                                             &accum_len)
174
                if not _valid_tag(tag):
175
                    raise ValueError("invalid rio tag %r" % (tag,))
176
        if tag is not None: # add last tag-value
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
177
            PyList_Append(pairs,
7490.74.3 by Jelmer Vernooij
Fix tests.
178
                (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "surrogateescape")))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
179
            return Stanza.from_pairs(pairs)
180
        else:     # didn't see any content
181
            return None
182
    finally:
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
183
        PyMem_Free(accum_value)
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
184
185
186
def _read_stanza_unicode(unicode_iter):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
187
    cdef Py_UNICODE *c_line
188
    cdef int c_len
7067.16.1 by Jelmer Vernooij
Fix some C extensions.
189
    cdef Py_UNICODE *accum_value
190
    cdef Py_UNICODE *new_accum_value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
191
    cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij
More performance tweaks.
192
    pairs = []
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
193
    tag = None
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
194
    accum_len = 0
195
    accum_size = 4096
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
196
    accum_value = <Py_UNICODE *>PyMem_Malloc(accum_size*sizeof(Py_UNICODE))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
197
    if accum_value == NULL:
198
        raise MemoryError
199
    try:
200
        for line in unicode_iter:
201
            if line is None:
202
                break       # end of file
203
            if not PyUnicode_CheckExact(line):
204
                raise TypeError("%r is not a unicode string" % line)
205
            c_line = PyUnicode_AS_UNICODE(line)
206
            c_len = PyUnicode_GET_SIZE(line)
207
            if c_len < 1:
208
                break        # end of file
209
            if Py_UNICODE_ISLINEBREAK(c_line[0]):
210
                break       # end of stanza
4354.3.14 by Jelmer Vernooij
Review feedback from John.
211
            if accum_len + c_len > accum_size:
212
                accum_size = accum_len + c_len
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
213
                new_accum_value = <Py_UNICODE *>PyMem_Realloc(accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
214
                    accum_size*sizeof(Py_UNICODE))
4354.3.14 by Jelmer Vernooij
Review feedback from John.
215
                if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
216
                    raise MemoryError
4354.3.14 by Jelmer Vernooij
Review feedback from John.
217
                else:
218
                    accum_value = new_accum_value
219
            if c_line[0] == c'\t': # continues previous value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
220
                if tag is None:
221
                    raise ValueError('invalid continuation line %r' % line)
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
222
                memcpy(&accum_value[accum_len], &c_line[1],
4354.3.14 by Jelmer Vernooij
Review feedback from John.
223
                    (c_len-1)*sizeof(Py_UNICODE))
224
                accum_len = accum_len + (c_len-1)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
225
            else: # new tag:value line
226
                if tag is not None:
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
227
                    PyList_Append(pairs,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
228
                        (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
229
                tag = _split_first_line_unicode(c_line, c_len, accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
230
                                                &accum_len)
231
                if not _valid_tag(tag):
232
                    raise ValueError("invalid rio tag %r" % (tag,))
233
        if tag is not None: # add last tag-value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
234
            PyList_Append(pairs,
235
                    (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
236
            return Stanza.from_pairs(pairs)
237
        else:     # didn't see any content
238
            return None
239
    finally:
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
240
        PyMem_Free(accum_value)