/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
17
"""Pyrex implementation of _read_stanza_*."""
18
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
19
4354.3.3 by Jelmer Vernooij
More performance tweaks.
20
cdef extern from "python-compat.h":
21
    pass
22
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
23
from cpython.bytes cimport (
24
    PyBytes_CheckExact,
25
    PyBytes_FromStringAndSize,
26
    PyBytes_AS_STRING,
27
    PyBytes_GET_SIZE,
28
    )
29
from cpython.unicode cimport (
30
    PyUnicode_CheckExact,
31
    PyUnicode_DecodeUTF8,
32
    # Deprecated after PEP 393 changes
33
    PyUnicode_AS_UNICODE,
34
    PyUnicode_FromUnicode,
35
    PyUnicode_GET_SIZE,
36
    )
37
from cpython.list cimport (
38
    PyList_Append,
39
    )
40
from cpython.mem cimport (
41
    PyMem_Free,
42
    PyMem_Malloc,
43
    PyMem_Realloc,
44
    )
45
from cpython.version cimport (
46
    PY_MAJOR_VERSION,
47
    )
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
48
4354.3.3 by Jelmer Vernooij
More performance tweaks.
49
cdef extern from "Python.h":
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
50
    ctypedef int Py_UNICODE
51
    object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
52
    int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
53
54
    # GZ 2017-09-11: Not sure why cython unicode module lacks this?
55
    object PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
56
57
    # Python 3.3 or later unicode handling
58
    char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size)
59
60
from libc.string cimport (
61
    memcpy,
62
    )
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
63
6656.2.2 by Jelmer Vernooij
Use absolute_import.
64
from .rio import Stanza
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
65
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
66
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
67
cdef int _valid_tag_char(char c): # cannot_raise
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
68
    return (c == c'_' or c == c'-' or
4354.3.8 by Jelmer Vernooij
Review feedback from John:
69
            (c >= c'a' and c <= c'z') or
70
            (c >= c'A' and c <= c'Z') or
71
            (c >= c'0' and c <= c'9'))
72
73
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
74
def _valid_tag(tag):
4354.3.3 by Jelmer Vernooij
More performance tweaks.
75
    cdef char *c_tag
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
76
    cdef Py_ssize_t c_len
77
    cdef int i
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
78
    # GZ 2017-09-11: Encapsulate native string as ascii tag somewhere neater
79
    if PY_MAJOR_VERSION >= 3:
80
        if not PyUnicode_CheckExact(tag):
81
            raise TypeError(tag)
82
        c_tag = PyUnicode_AsUTF8AndSize(tag, &c_len)
83
    else:
84
        if not PyBytes_CheckExact(tag):
85
            raise TypeError(tag)
86
        c_tag = PyBytes_AS_STRING(tag)
87
        c_len = PyBytes_GET_SIZE(tag)
4354.3.12 by Jelmer Vernooij
Add tests for _valid_tag.
88
    if c_len < 1:
89
        return False
4354.3.3 by Jelmer Vernooij
More performance tweaks.
90
    for i from 0 <= i < c_len:
4354.3.8 by Jelmer Vernooij
Review feedback from John:
91
        if not _valid_tag_char(c_tag[i]):
4354.3.3 by Jelmer Vernooij
More performance tweaks.
92
            return False
93
    return True
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
94
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
95
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
96
cdef object _split_first_line_utf8(char *line, int len,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
97
                                   char *value, Py_ssize_t *value_len):
4354.3.8 by Jelmer Vernooij
Review feedback from John:
98
    cdef int i
99
    for i from 0 <= i < len:
100
        if line[i] == c':':
101
            if line[i+1] != c' ':
102
                raise ValueError("invalid tag in line %r" % line)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
103
            memcpy(value, line+i+2, len-i-2)
104
            value_len[0] = len-i-2
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
105
            if PY_MAJOR_VERSION >= 3:
106
                return PyUnicode_FromStringAndSize(line, i)
107
            return PyBytes_FromStringAndSize(line, i)
4354.3.8 by Jelmer Vernooij
Review feedback from John:
108
    raise ValueError('tag/value separator not found in line %r' % line)
109
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
110
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
111
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
112
                                      Py_UNICODE *value, Py_ssize_t *value_len):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
113
    cdef int i
114
    for i from 0 <= i < len:
4354.3.14 by Jelmer Vernooij
Review feedback from John.
115
        if line[i] == c':':
116
            if line[i+1] != c' ':
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
117
                raise ValueError("invalid tag in line %r" %
118
                                 PyUnicode_FromUnicode(line, len))
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
119
            memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
120
            value_len[0] = len-i-2
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
121
            if PY_MAJOR_VERSION >= 3:
122
                return PyUnicode_FromUnicode(line, i)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
123
            return PyUnicode_EncodeASCII(line, i, "strict")
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
124
    raise ValueError("tag/value separator not found in line %r" %
125
                     PyUnicode_FromUnicode(line, len))
126
127
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
128
def _read_stanza_utf8(line_iter):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
129
    cdef char *c_line
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
130
    cdef Py_ssize_t c_len
7067.16.1 by Jelmer Vernooij
Fix some C extensions.
131
    cdef char *accum_value
132
    cdef char *new_accum_value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
133
    cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij
More performance tweaks.
134
    pairs = []
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
135
    tag = None
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
136
    accum_len = 0
137
    accum_size = 4096
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
138
    accum_value = <char *>PyMem_Malloc(accum_size)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
139
    if accum_value == NULL:
140
        raise MemoryError
141
    try:
142
        for line in line_iter:
143
            if line is None:
144
                break # end of file
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
145
            if not PyBytes_CheckExact(line):
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
146
                raise TypeError("%r is not a plain string" % line)
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
147
            c_line = PyBytes_AS_STRING(line)
148
            c_len = PyBytes_GET_SIZE(line)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
149
            if c_len < 1:
150
                break       # end of file
151
            if c_len == 1 and c_line[0] == c"\n":
152
                break       # end of stanza
4354.3.14 by Jelmer Vernooij
Review feedback from John.
153
            if accum_len + c_len > accum_size:
154
                accum_size = (accum_len + c_len)
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
155
                new_accum_value = <char *>PyMem_Realloc(accum_value, accum_size)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
156
                if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
157
                    raise MemoryError
4354.3.14 by Jelmer Vernooij
Review feedback from John.
158
                else:
159
                    accum_value = new_accum_value
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
160
            if c_line[0] == c'\t': # continues previous value
161
                if tag is None:
162
                    raise ValueError('invalid continuation line %r' % line)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
163
                memcpy(accum_value+accum_len, c_line+1, c_len-1)
164
                accum_len = accum_len + c_len-1
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
165
            else: # new tag:value line
166
                if tag is not None:
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
167
                    PyList_Append(pairs,
168
                        (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
169
                                                   "strict")))
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
170
                tag = _split_first_line_utf8(c_line, c_len, accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
171
                                             &accum_len)
172
                if not _valid_tag(tag):
173
                    raise ValueError("invalid rio tag %r" % (tag,))
174
        if tag is not None: # add last tag-value
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
175
            PyList_Append(pairs,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
176
                (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
177
            return Stanza.from_pairs(pairs)
178
        else:     # didn't see any content
179
            return None
180
    finally:
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
181
        PyMem_Free(accum_value)
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
182
183
184
def _read_stanza_unicode(unicode_iter):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
185
    cdef Py_UNICODE *c_line
186
    cdef int c_len
7067.16.1 by Jelmer Vernooij
Fix some C extensions.
187
    cdef Py_UNICODE *accum_value
188
    cdef Py_UNICODE *new_accum_value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
189
    cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij
More performance tweaks.
190
    pairs = []
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
191
    tag = None
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
192
    accum_len = 0
193
    accum_size = 4096
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
194
    accum_value = <Py_UNICODE *>PyMem_Malloc(accum_size*sizeof(Py_UNICODE))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
195
    if accum_value == NULL:
196
        raise MemoryError
197
    try:
198
        for line in unicode_iter:
199
            if line is None:
200
                break       # end of file
201
            if not PyUnicode_CheckExact(line):
202
                raise TypeError("%r is not a unicode string" % line)
203
            c_line = PyUnicode_AS_UNICODE(line)
204
            c_len = PyUnicode_GET_SIZE(line)
205
            if c_len < 1:
206
                break        # end of file
207
            if Py_UNICODE_ISLINEBREAK(c_line[0]):
208
                break       # end of stanza
4354.3.14 by Jelmer Vernooij
Review feedback from John.
209
            if accum_len + c_len > accum_size:
210
                accum_size = accum_len + c_len
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
211
                new_accum_value = <Py_UNICODE *>PyMem_Realloc(accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
212
                    accum_size*sizeof(Py_UNICODE))
4354.3.14 by Jelmer Vernooij
Review feedback from John.
213
                if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
214
                    raise MemoryError
4354.3.14 by Jelmer Vernooij
Review feedback from John.
215
                else:
216
                    accum_value = new_accum_value
217
            if c_line[0] == c'\t': # continues previous value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
218
                if tag is None:
219
                    raise ValueError('invalid continuation line %r' % line)
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
220
                memcpy(&accum_value[accum_len], &c_line[1],
4354.3.14 by Jelmer Vernooij
Review feedback from John.
221
                    (c_len-1)*sizeof(Py_UNICODE))
222
                accum_len = accum_len + (c_len-1)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
223
            else: # new tag:value line
224
                if tag is not None:
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
225
                    PyList_Append(pairs,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
226
                        (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
227
                tag = _split_first_line_unicode(c_line, c_len, accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
228
                                                &accum_len)
229
                if not _valid_tag(tag):
230
                    raise ValueError("invalid rio tag %r" % (tag,))
231
        if tag is not None: # add last tag-value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
232
            PyList_Append(pairs,
233
                    (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
234
            return Stanza.from_pairs(pairs)
235
        else:     # didn't see any content
236
            return None
237
    finally:
6753.2.1 by Martin
Make _rio_pyx compile and pass tests on Python 3
238
        PyMem_Free(accum_value)