/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
17
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
19
from cpython.object cimport (
20
    hashfunc,
21
    Py_EQ,
22
    PyObject_Hash,
23
    PyTypeObject,
24
    Py_TYPE,
25
    richcmpfunc,
26
    traverseproc,
27
    visitproc,
28
    )
29
from cpython.mem cimport (
30
    PyMem_Malloc,
31
    PyMem_Free,
32
    )
33
from cpython.ref cimport (
34
    Py_INCREF,
35
    Py_DECREF,
36
    )
37
from libc.string cimport memset
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
38
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
39
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
40
# Dummy is an object used to mark nodes that have been deleted. Since
41
# collisions require us to move a node to an alternative location, if we just
42
# set an entry to NULL on delete, we won't find any relocated nodes.
43
# We have to use _dummy_obj because we need to keep a refcount to it, but we
44
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
45
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
46
cdef object _dummy_obj
47
cdef PyObject *_dummy
48
_dummy_obj = object()
49
_dummy = <PyObject *>_dummy_obj
50
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
51
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
52
cdef object _NotImplemented
53
_NotImplemented = NotImplemented
54
55
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
56
cdef int _is_equal(object this, long this_hash, object other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
57
    cdef long other_hash
58
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
59
    other_hash = PyObject_Hash(other)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
60
    if other_hash != this_hash:
61
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
62
63
    # This implements a subset of the PyObject_RichCompareBool functionality.
64
    # Namely it:
65
    #   1) Doesn't try to do anything with old-style classes
66
    #   2) Assumes that both objects have a tp_richcompare implementation, and
67
    #      that if that is not enough to compare equal, then they are not
68
    #      equal. (It doesn't try to cast them both to some intermediate form
69
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
70
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
71
    if res is _NotImplemented:
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
72
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
73
        if res is _NotImplemented:
74
            return 0
75
    if res:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
76
        return 1
77
    return 0
78
79
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
80
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
81
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
82
83
    It is similar in function to the interned dictionary that is used by
84
    strings. However:
85
86
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
87
         of it
88
      2) It only stores one reference to the object, rather than 2 (key vs
89
         key:value)
90
91
    As such, it uses 1/3rd the amount of memory to store a pointer to the
92
    interned object.
93
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
94
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
95
    DEF DEFAULT_SIZE=1024
96
97
    def __init__(self):
98
        cdef Py_ssize_t size, n_bytes
99
100
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
101
        self._mask = size - 1
102
        self._used = 0
103
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
104
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
105
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
106
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
107
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
108
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
109
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
110
    def __sizeof__(self):
5361.2.6 by John Arbash Meinel
We also have to re-implement it for _simple_set_pyx.pyx
111
        # Note: Pyrex doesn't allow sizeof(class) so we re-implement it here.
112
        # Bits are:
113
        #   1: PyObject
114
        #   2: vtable *
115
        #   3: 3 Py_ssize_t
116
        #   4: PyObject**
117
        # Note that we might get alignment, etc, wrong, but at least this is
118
        # better than no estimate at all
119
        # return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject*))
120
        return (sizeof(PyObject) + sizeof(void*)
121
                + 3*sizeof(Py_ssize_t) + sizeof(PyObject**)
122
                + (self._mask + 1) * sizeof(PyObject*))
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
123
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
124
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
125
        if self._table != NULL:
126
            PyMem_Free(self._table)
127
            self._table = NULL
128
129
    property used:
130
        def __get__(self):
131
            return self._used
132
133
    property fill:
134
        def __get__(self):
135
            return self._fill
136
137
    property mask:
138
        def __get__(self):
139
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
140
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
141
    def _memory_size(self):
142
        """Return the number of bytes of memory consumed by this class."""
143
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
144
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
145
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
146
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
147
148
    def _test_lookup(self, key):
149
        cdef PyObject **slot
150
4679.3.61 by John Arbash Meinel
Invert the logic.
151
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
152
        if slot[0] == NULL:
153
            res = '<null>'
154
        elif slot[0] == _dummy:
155
            res = '<dummy>'
156
        else:
157
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
158
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
159
160
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
161
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
162
        cdef PyObject **slot
163
4679.3.61 by John Arbash Meinel
Invert the logic.
164
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
165
        if slot[0] == NULL or slot[0] == _dummy:
166
            return False
167
        return True
168
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
169
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
170
        """Return the object (or nothing) define at the given location."""
171
        cdef PyObject **slot
172
173
        slot = _lookup(self, key)
174
        if slot[0] == NULL or slot[0] == _dummy:
175
            return NULL
176
        return slot[0]
177
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
178
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
179
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
180
        cdef PyObject *py_val
181
182
        py_val = self._get(key)
183
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
184
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
185
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
186
        return val
187
4679.3.63 by John Arbash Meinel
Implement resizing.
188
    cdef int _insert_clean(self, PyObject *key) except -1:
189
        """Insert a key into self.table.
190
191
        This is only meant to be used during times like '_resize',
192
        as it makes a lot of assuptions about keys not already being present,
193
        and there being no dummy entries.
194
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
195
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
196
        cdef long the_hash
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
197
        cdef PyObject **table
198
        cdef PyObject **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
199
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
200
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
201
        mask = self._mask
202
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
203
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
204
        the_hash = PyObject_Hash(<object>key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
205
        i = the_hash
206
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
207
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
208
            if slot[0] == NULL:
209
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
210
                self._fill = self._fill + 1
211
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
212
                return 1
213
            i = i + 1 + n_lookup
214
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
215
216
    def _py_resize(self, min_used):
217
        """Do not use this directly, it is only exposed for testing."""
218
        return self._resize(min_used)
219
220
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
221
        """Resize the internal table.
222
223
        The final table will be big enough to hold at least min_used entries.
224
        We will copy the data from the existing table over, leaving out dummy
225
        entries.
226
227
        :return: The new size of the internal table
228
        """
229
        cdef Py_ssize_t new_size, n_bytes, remaining
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
230
        cdef PyObject **new_table
231
        cdef PyObject **old_table
232
        cdef PyObject **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
233
234
        new_size = DEFAULT_SIZE
235
        while new_size <= min_used and new_size > 0:
236
            new_size = new_size << 1
237
        # We rolled over our signed size field
238
        if new_size <= 0:
239
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
240
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
241
        # size, we will still run the algorithm so that dummy entries are
242
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
243
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
244
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
245
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
246
        #                        ' smaller than the number of used slots.')
247
        n_bytes = sizeof(PyObject*) * new_size;
248
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
249
        if new_table == NULL:
250
            raise MemoryError()
251
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
252
        old_table = self._table
253
        self._table = new_table
254
        memset(self._table, 0, n_bytes)
255
        self._mask = new_size - 1
256
        self._used = 0
257
        remaining = self._fill
258
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
259
260
        # Moving everything to the other table is refcount neutral, so we don't
261
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
262
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
263
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
264
            if slot[0] == NULL: # unused slot
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
265
                pass
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
266
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
267
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
268
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
269
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
270
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
271
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
272
        PyMem_Free(old_table)
273
        return new_size
274
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
275
    cpdef object add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
276
        """Similar to set.add(), start tracking this key.
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
277
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
278
        There is one small difference, which is that we return the object that
279
        is stored at the given location. (which is closer to the
280
        dict.setdefault() functionality.)
281
        """
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
282
        cdef PyObject **slot
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
283
        cdef bint added
4679.3.61 by John Arbash Meinel
Invert the logic.
284
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
285
        if (Py_TYPE(key).tp_richcompare == NULL
286
            or Py_TYPE(key).tp_hash == NULL):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
287
            raise TypeError('Types added to SimpleSet must implement'
288
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
289
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
290
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
291
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
292
        slot = _lookup(self, key)
293
        if (slot[0] == NULL):
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
294
            Py_INCREF(key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
295
            self._fill = self._fill + 1
296
            self._used = self._used + 1
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
297
            slot[0] = <PyObject *>key
4679.3.63 by John Arbash Meinel
Implement resizing.
298
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
299
        elif (slot[0] == _dummy):
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
300
            Py_INCREF(key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
301
            self._used = self._used + 1
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
302
            slot[0] = <PyObject *>key
4679.3.63 by John Arbash Meinel
Implement resizing.
303
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
304
        # No else: clause. If _lookup returns a pointer to
305
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
306
        retval = <object>(slot[0])
307
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
308
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
309
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
310
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
311
        # Even if we resized and ended up moving retval into a different slot,
312
        # it is still the value that is held at the slot equivalent to 'key',
313
        # so we can still return it
314
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
315
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
316
    cpdef bint discard(self, key) except -1:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
317
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
318
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
319
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
320
        """
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
321
        cdef PyObject **slot
4679.3.61 by John Arbash Meinel
Invert the logic.
322
323
        slot = _lookup(self, key)
324
        if slot[0] == NULL or slot[0] == _dummy:
325
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
326
        self._used = self._used - 1
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
327
        Py_DECREF(<object>slot[0])
4679.3.61 by John Arbash Meinel
Invert the logic.
328
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
329
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
330
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
331
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
332
        # However, we are planning on using this as an interning structure, in
333
        # which we will be putting a lot of objects. And we expect that large
334
        # groups of them are going to have the same lifetime.
335
        # Dummy entries hurt a little bit because they cause the lookup to keep
336
        # searching, but resizing is also rather expensive
337
        # For now, we'll just use their algorithm, but we may want to revisit
338
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
339
        if ((self._fill - self._used) * 5 > self._mask):
340
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
341
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
342
4679.3.65 by John Arbash Meinel
Add __iter__ support.
343
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
344
        return _SimpleSet_iterator(self)
345
346
347
cdef class _SimpleSet_iterator:
348
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
349
350
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
351
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
352
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
353
    cdef Py_ssize_t len # number of entries left
354
355
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
356
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
357
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
358
        self._used = self.set._used
359
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
360
361
    def __iter__(self):
362
        return self
363
364
    def __next__(self):
365
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
366
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
367
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
368
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
369
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
370
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
371
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
372
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
373
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
374
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
375
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
376
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
377
        # we found something
378
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
379
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
380
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
381
382
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
383
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
384
            return self.len
385
        return 0
386
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
387
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
388
cdef api SimpleSet SimpleSet_New():
389
    """Create a new SimpleSet object."""
390
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
391
392
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
393
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
394
    """Check that the parameter is not None.
395
396
    Pyrex/Cython will do type checking, but only to ensure that an object is
397
    either the right type or None. You can say "object foo not None" for pure
398
    python functions, but not for C functions.
399
    So this is just a helper for all the apis that need to do the check.
400
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
401
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
402
    if self is None:
403
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
404
    true_self = self
405
    return true_self
406
407
408
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
409
    """Find the slot where 'key' would fit.
410
411
    This is the same as a dicts 'lookup' function.
412
413
    :param key: An object we are looking up
414
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
415
    :return: The location in self.table where key should be put.
416
        location == NULL is an exception, but (*location) == NULL just
417
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
418
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
419
    # This uses Quadratic Probing:
420
    #  http://en.wikipedia.org/wiki/Quadratic_probing
421
    # with c1 = c2 = 1/2
422
    # This leads to probe locations at:
423
    #  h0 = hash(k1)
424
    #  h1 = h0 + 1
425
    #  h2 = h0 + 3 = h1 + 1 + 1
426
    #  h3 = h0 + 6 = h2 + 1 + 2
427
    #  h4 = h0 + 10 = h2 + 1 + 3
428
    # Note that all of these are '& mask', but that is computed *after* the
429
    # offset.
430
    # This differs from the algorithm used by Set and Dict. Which, effectively,
431
    # use double-hashing, and a step size that starts large, but dwindles to
432
    # stepping one-by-one.
433
    # This gives more 'locality' in that if you have a collision at offset X,
434
    # the first fallback is X+1, which is fast to check. However, that means
435
    # that an object w/ hash X+1 will also check there, and then X+2 next.
436
    # However, for objects with differing hashes, their chains are different.
437
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
438
    # So different hashes diverge quickly.
439
    # A bigger problem is that we *only* ever use the lowest bits of the hash
440
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
441
    # use the same collision resolution. We may want to try to find a way to
442
    # incorporate the upper bits of the hash with quadratic probing. (For
443
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
444
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
445
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
446
    cdef long key_hash
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
447
    cdef PyObject **table
448
    cdef PyObject **slot
449
    cdef PyObject *cur
450
    cdef PyObject **free_slot
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
451
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
452
    key_hash = PyObject_Hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
453
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
454
    mask = self._mask
455
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
456
    free_slot = NULL
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
457
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
458
        slot = &table[i & mask]
459
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
460
        if cur == NULL:
461
            # Found a blank spot
462
            if free_slot != NULL:
463
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
464
                return free_slot
465
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
466
                return slot
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
467
        if cur == <PyObject *>key:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
468
            # Found an exact pointer to the key
469
            return slot
470
        if cur == _dummy:
471
            if free_slot == NULL:
472
                free_slot = slot
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
473
        elif _is_equal(key, key_hash, <object>cur):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
474
            # Both py_key and cur belong in this slot, return it
475
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
476
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
477
    raise AssertionError('should never get here')
478
479
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
480
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
481
    """Find the slot where 'key' would fit.
482
483
    This is the same as a dicts 'lookup' function. This is a private
484
    api because mutating what you get without maintaing the other invariants
485
    is a 'bad thing'.
486
487
    :param key: An object we are looking up
488
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
489
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
490
        should never be NULL, but may reference a NULL (PyObject*)
491
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
492
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
493
494
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
495
cdef api object SimpleSet_Add(object self, object key):
496
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
497
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
498
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
499
    :param key: The key to be added. If the key is already present,
500
        self will not be modified
501
    :return: The current key stored at the location defined by 'key'.
502
        This may be the same object, or it may be an equivalent object.
503
        (consider dict.setdefault(key, key))
504
    """
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
505
    return _check_self(self).add(key)
506
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
507
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
508
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
509
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
510
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
511
512
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
513
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
514
    """Remove the object referenced at location 'key'.
515
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
516
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
517
    :param key: The key we are checking on
518
    :return: 1 if there was an object present, 0 if there was not, and -1 on
519
        error.
520
    """
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
521
    return _check_self(self).discard(key)
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
522
523
524
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
525
    """Get a pointer to the object present at location 'key'.
526
527
    This returns an object which is equal to key which was previously added to
528
    self. This returns a borrowed reference, as it may also return NULL if no
529
    value is present at that location.
530
531
    :param key: The value we are looking for
532
    :return: The object present at that location
533
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
534
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
535
536
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
537
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
538
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
539
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
540
541
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
542
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
543
                            PyObject **key) except -1:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
544
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
545
546
    :param pos: should be initialized to 0 by the caller, and will be updated
547
        by this function
548
    :param key: Will return a borrowed reference to key
549
    :return: 0 if nothing left, 1 if we are returning a new value
550
    """
551
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
552
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
553
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
554
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
555
    i = pos[0]
556
    if (i < 0):
557
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
558
    mask = true_self._mask
559
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
560
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
561
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
562
    pos[0] = i + 1
563
    if (i > mask):
564
        return 0 # All done
565
    if (key != NULL):
566
        key[0] = table[i]
567
    return 1
568
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
569
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
570
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit,
571
                            void *arg) except -1:
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
572
    """This is an implementation of 'tp_traverse' that hits the whole table.
573
574
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
575
    define one for you if you have an 'object' attribute. Since they don't
576
    support C arrays of objects, we access the PyObject * directly.
577
    """
578
    cdef Py_ssize_t pos
579
    cdef PyObject *next_key
580
    cdef int ret
581
582
    pos = 0
583
    while SimpleSet_Next(self, &pos, &next_key):
584
        ret = visit(next_key, arg)
585
        if ret:
586
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
587
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
588
589
# It is a little bit ugly to do this, but it works, and means that Meliae can
590
# dump the total memory consumed by all child objects.
591
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse