/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
17
"""Pyrex implementation of _read_stanza_*."""
18
6656.2.2 by Jelmer Vernooij
Use absolute_import.
19
from __future__ import absolute_import
20
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
21
4354.3.3 by Jelmer Vernooij
More performance tweaks.
22
cdef extern from "python-compat.h":
23
    pass
24
4368.1.1 by Jelmer Vernooij
Import malloc and friends from stdlib.h rather than from (deprecated) malloc.h.
25
cdef extern from "stdlib.h":
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
26
    void *malloc(int)
27
    void *realloc(void *, int)
28
    void free(void *)
29
4354.3.3 by Jelmer Vernooij
More performance tweaks.
30
cdef extern from "Python.h":
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
31
    ctypedef int Py_UNICODE
4354.3.3 by Jelmer Vernooij
More performance tweaks.
32
    char *PyString_AS_STRING(object s)
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
33
    Py_ssize_t PyString_GET_SIZE(object t) except -1
34
    object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors)
35
    object PyString_FromStringAndSize(char *s, Py_ssize_t len)
36
    int PyString_CheckExact(object)
37
    int PyUnicode_CheckExact(object)
38
    object PyUnicode_Join(object, object)
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
39
    object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
40
    Py_UNICODE *PyUnicode_AS_UNICODE(object)
41
    Py_UNICODE *PyUnicode_AsUnicode(object)
42
    Py_ssize_t PyUnicode_GET_SIZE(object) except -1
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
43
    int PyList_Append(object, object) except -1
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
44
    int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
45
    object PyUnicode_FromUnicode(Py_UNICODE *, int)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
46
    void *Py_UNICODE_COPY(Py_UNICODE *, Py_UNICODE *, int)
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
47
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
48
cdef extern from "string.h":
49
    void *memcpy(void *, void *, int)
50
6656.2.2 by Jelmer Vernooij
Use absolute_import.
51
from .rio import Stanza
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
52
4634.117.10 by John Arbash Meinel
Change 'no except' to 'cannot_raise'
53
cdef int _valid_tag_char(char c): # cannot_raise
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
54
    return (c == c'_' or c == c'-' or
4354.3.8 by Jelmer Vernooij
Review feedback from John:
55
            (c >= c'a' and c <= c'z') or
56
            (c >= c'A' and c <= c'Z') or
57
            (c >= c'0' and c <= c'9'))
58
59
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
60
def _valid_tag(tag):
4354.3.3 by Jelmer Vernooij
More performance tweaks.
61
    cdef char *c_tag
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
62
    cdef Py_ssize_t c_len
63
    cdef int i
4354.3.7 by Jelmer Vernooij
Simplify unicode stanza reading, check for Type in valid_tag.
64
    if not PyString_CheckExact(tag):
65
        raise TypeError(tag)
4354.3.3 by Jelmer Vernooij
More performance tweaks.
66
    c_tag = PyString_AS_STRING(tag)
67
    c_len = PyString_GET_SIZE(tag)
4354.3.12 by Jelmer Vernooij
Add tests for _valid_tag.
68
    if c_len < 1:
69
        return False
4354.3.3 by Jelmer Vernooij
More performance tweaks.
70
    for i from 0 <= i < c_len:
4354.3.8 by Jelmer Vernooij
Review feedback from John:
71
        if not _valid_tag_char(c_tag[i]):
4354.3.3 by Jelmer Vernooij
More performance tweaks.
72
            return False
73
    return True
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
74
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
75
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
76
cdef object _split_first_line_utf8(char *line, int len,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
77
                                   char *value, Py_ssize_t *value_len):
4354.3.8 by Jelmer Vernooij
Review feedback from John:
78
    cdef int i
79
    for i from 0 <= i < len:
80
        if line[i] == c':':
81
            if line[i+1] != c' ':
82
                raise ValueError("invalid tag in line %r" % line)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
83
            memcpy(value, line+i+2, len-i-2)
84
            value_len[0] = len-i-2
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
85
            return PyString_FromStringAndSize(line, i)
4354.3.8 by Jelmer Vernooij
Review feedback from John:
86
    raise ValueError('tag/value separator not found in line %r' % line)
87
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
88
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
89
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
90
                                      Py_UNICODE *value, Py_ssize_t *value_len):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
91
    cdef int i
92
    for i from 0 <= i < len:
4354.3.14 by Jelmer Vernooij
Review feedback from John.
93
        if line[i] == c':':
94
            if line[i+1] != c' ':
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
95
                raise ValueError("invalid tag in line %r" %
96
                                 PyUnicode_FromUnicode(line, len))
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
97
            memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
98
            value_len[0] = len-i-2
99
            return PyUnicode_EncodeASCII(line, i, "strict")
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
100
    raise ValueError("tag/value separator not found in line %r" %
101
                     PyUnicode_FromUnicode(line, len))
102
103
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
104
def _read_stanza_utf8(line_iter):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
105
    cdef char *c_line
4354.3.4 by Jelmer Vernooij
More work using C API's rather than Python objects.
106
    cdef Py_ssize_t c_len
4354.3.14 by Jelmer Vernooij
Review feedback from John.
107
    cdef char *accum_value, *new_accum_value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
108
    cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij
More performance tweaks.
109
    pairs = []
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
110
    tag = None
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
111
    accum_len = 0
112
    accum_size = 4096
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
113
    accum_value = <char *>malloc(accum_size)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
114
    if accum_value == NULL:
115
        raise MemoryError
116
    try:
117
        for line in line_iter:
118
            if line is None:
119
                break # end of file
120
            if not PyString_CheckExact(line):
121
                raise TypeError("%r is not a plain string" % line)
122
            c_line = PyString_AS_STRING(line)
123
            c_len = PyString_GET_SIZE(line)
124
            if c_len < 1:
125
                break       # end of file
126
            if c_len == 1 and c_line[0] == c"\n":
127
                break       # end of stanza
4354.3.14 by Jelmer Vernooij
Review feedback from John.
128
            if accum_len + c_len > accum_size:
129
                accum_size = (accum_len + c_len)
130
                new_accum_value = <char *>realloc(accum_value, accum_size)
131
                if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
132
                    raise MemoryError
4354.3.14 by Jelmer Vernooij
Review feedback from John.
133
                else:
134
                    accum_value = new_accum_value
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
135
            if c_line[0] == c'\t': # continues previous value
136
                if tag is None:
137
                    raise ValueError('invalid continuation line %r' % line)
4354.3.14 by Jelmer Vernooij
Review feedback from John.
138
                memcpy(accum_value+accum_len, c_line+1, c_len-1)
139
                accum_len = accum_len + c_len-1
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
140
            else: # new tag:value line
141
                if tag is not None:
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
142
                    PyList_Append(pairs,
143
                        (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
144
                                                   "strict")))
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
145
                tag = _split_first_line_utf8(c_line, c_len, accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
146
                                             &accum_len)
147
                if not _valid_tag(tag):
148
                    raise ValueError("invalid rio tag %r" % (tag,))
149
        if tag is not None: # add last tag-value
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
150
            PyList_Append(pairs,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
151
                (tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
152
            return Stanza.from_pairs(pairs)
153
        else:     # didn't see any content
154
            return None
155
    finally:
156
        free(accum_value)
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
157
158
159
def _read_stanza_unicode(unicode_iter):
4354.3.10 by Jelmer Vernooij
Use Py_UNICODE in unicode RIO parser.
160
    cdef Py_UNICODE *c_line
161
    cdef int c_len
4354.3.14 by Jelmer Vernooij
Review feedback from John.
162
    cdef Py_UNICODE *accum_value, *new_accum_value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
163
    cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij
More performance tweaks.
164
    pairs = []
4354.3.2 by Jelmer Vernooij
Provide custom implementation of _read_stanza_utf8 in Pyrex.
165
    tag = None
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
166
    accum_len = 0
167
    accum_size = 4096
168
    accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))
169
    if accum_value == NULL:
170
        raise MemoryError
171
    try:
172
        for line in unicode_iter:
173
            if line is None:
174
                break       # end of file
175
            if not PyUnicode_CheckExact(line):
176
                raise TypeError("%r is not a unicode string" % line)
177
            c_line = PyUnicode_AS_UNICODE(line)
178
            c_len = PyUnicode_GET_SIZE(line)
179
            if c_len < 1:
180
                break        # end of file
181
            if Py_UNICODE_ISLINEBREAK(c_line[0]):
182
                break       # end of stanza
4354.3.14 by Jelmer Vernooij
Review feedback from John.
183
            if accum_len + c_len > accum_size:
184
                accum_size = accum_len + c_len
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
185
                new_accum_value = <Py_UNICODE *>realloc(accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
186
                    accum_size*sizeof(Py_UNICODE))
4354.3.14 by Jelmer Vernooij
Review feedback from John.
187
                if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
188
                    raise MemoryError
4354.3.14 by Jelmer Vernooij
Review feedback from John.
189
                else:
190
                    accum_value = new_accum_value
191
            if c_line[0] == c'\t': # continues previous value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
192
                if tag is None:
193
                    raise ValueError('invalid continuation line %r' % line)
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
194
                memcpy(&accum_value[accum_len], &c_line[1],
4354.3.14 by Jelmer Vernooij
Review feedback from John.
195
                    (c_len-1)*sizeof(Py_UNICODE))
196
                accum_len = accum_len + (c_len-1)
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
197
            else: # new tag:value line
198
                if tag is not None:
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
199
                    PyList_Append(pairs,
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
200
                        (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
6656.2.4 by Jelmer Vernooij
Merge cython-only branch.
201
                tag = _split_first_line_unicode(c_line, c_len, accum_value,
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
202
                                                &accum_len)
203
                if not _valid_tag(tag):
204
                    raise ValueError("invalid rio tag %r" % (tag,))
205
        if tag is not None: # add last tag-value
4354.3.13 by Jelmer Vernooij
Add more RIO tests, fix bugs in pyrex implementation.
206
            PyList_Append(pairs,
207
                    (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
4354.3.11 by Jelmer Vernooij
Use shared data area when parsing pairs in stanza.
208
            return Stanza.from_pairs(pairs)
209
        else:     # didn't see any content
210
            return None
211
    finally:
212
        free(accum_value)