/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7509.1.1 by Jelmer Vernooij
Set cython language level to Python 3.
16
#
17
# cython: language_level=3
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
19
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
20
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
21
from cpython.object cimport (
22
    hashfunc,
23
    Py_EQ,
24
    PyObject_Hash,
25
    PyTypeObject,
26
    Py_TYPE,
27
    richcmpfunc,
28
    traverseproc,
29
    visitproc,
30
    )
31
from cpython.mem cimport (
32
    PyMem_Malloc,
33
    PyMem_Free,
34
    )
35
from cpython.ref cimport (
36
    Py_INCREF,
37
    Py_DECREF,
38
    )
39
from libc.string cimport memset
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
40
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
41
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
42
# Dummy is an object used to mark nodes that have been deleted. Since
43
# collisions require us to move a node to an alternative location, if we just
44
# set an entry to NULL on delete, we won't find any relocated nodes.
45
# We have to use _dummy_obj because we need to keep a refcount to it, but we
46
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
47
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
48
cdef object _dummy_obj
49
cdef PyObject *_dummy
50
_dummy_obj = object()
51
_dummy = <PyObject *>_dummy_obj
52
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
53
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
54
cdef object _NotImplemented
55
_NotImplemented = NotImplemented
56
57
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
58
cdef int _is_equal(object this, long this_hash, object other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
59
    cdef long other_hash
60
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
61
    other_hash = PyObject_Hash(other)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
62
    if other_hash != this_hash:
63
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
64
65
    # This implements a subset of the PyObject_RichCompareBool functionality.
66
    # Namely it:
67
    #   1) Doesn't try to do anything with old-style classes
68
    #   2) Assumes that both objects have a tp_richcompare implementation, and
69
    #      that if that is not enough to compare equal, then they are not
70
    #      equal. (It doesn't try to cast them both to some intermediate form
71
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
72
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
73
    if res is _NotImplemented:
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
74
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
75
        if res is _NotImplemented:
76
            return 0
77
    if res:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
78
        return 1
79
    return 0
80
81
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
82
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
83
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
84
85
    It is similar in function to the interned dictionary that is used by
86
    strings. However:
87
88
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
89
         of it
90
      2) It only stores one reference to the object, rather than 2 (key vs
91
         key:value)
92
93
    As such, it uses 1/3rd the amount of memory to store a pointer to the
94
    interned object.
95
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
96
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
97
    DEF DEFAULT_SIZE=1024
98
99
    def __init__(self):
100
        cdef Py_ssize_t size, n_bytes
101
102
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
103
        self._mask = size - 1
104
        self._used = 0
105
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
106
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
107
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
108
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
109
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
110
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
111
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
112
    def __sizeof__(self):
5361.2.6 by John Arbash Meinel
We also have to re-implement it for _simple_set_pyx.pyx
113
        # Note: Pyrex doesn't allow sizeof(class) so we re-implement it here.
114
        # Bits are:
115
        #   1: PyObject
116
        #   2: vtable *
117
        #   3: 3 Py_ssize_t
118
        #   4: PyObject**
119
        # Note that we might get alignment, etc, wrong, but at least this is
120
        # better than no estimate at all
121
        # return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject*))
122
        return (sizeof(PyObject) + sizeof(void*)
123
                + 3*sizeof(Py_ssize_t) + sizeof(PyObject**)
124
                + (self._mask + 1) * sizeof(PyObject*))
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
125
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
126
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
127
        if self._table != NULL:
128
            PyMem_Free(self._table)
129
            self._table = NULL
130
131
    property used:
132
        def __get__(self):
133
            return self._used
134
135
    property fill:
136
        def __get__(self):
137
            return self._fill
138
139
    property mask:
140
        def __get__(self):
141
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
142
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
143
    def _memory_size(self):
144
        """Return the number of bytes of memory consumed by this class."""
145
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
146
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
147
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
148
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
149
150
    def _test_lookup(self, key):
151
        cdef PyObject **slot
152
4679.3.61 by John Arbash Meinel
Invert the logic.
153
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
154
        if slot[0] == NULL:
155
            res = '<null>'
156
        elif slot[0] == _dummy:
157
            res = '<dummy>'
158
        else:
159
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
160
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
161
162
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
163
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
164
        cdef PyObject **slot
165
4679.3.61 by John Arbash Meinel
Invert the logic.
166
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
167
        if slot[0] == NULL or slot[0] == _dummy:
168
            return False
169
        return True
170
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
171
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
172
        """Return the object (or nothing) define at the given location."""
173
        cdef PyObject **slot
174
175
        slot = _lookup(self, key)
176
        if slot[0] == NULL or slot[0] == _dummy:
177
            return NULL
178
        return slot[0]
179
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
180
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
181
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
182
        cdef PyObject *py_val
183
184
        py_val = self._get(key)
185
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
186
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
187
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
188
        return val
189
4679.3.63 by John Arbash Meinel
Implement resizing.
190
    cdef int _insert_clean(self, PyObject *key) except -1:
191
        """Insert a key into self.table.
192
193
        This is only meant to be used during times like '_resize',
194
        as it makes a lot of assuptions about keys not already being present,
195
        and there being no dummy entries.
196
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
197
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
198
        cdef long the_hash
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
199
        cdef PyObject **table
200
        cdef PyObject **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
201
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
202
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
203
        mask = self._mask
204
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
205
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
206
        the_hash = PyObject_Hash(<object>key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
207
        i = the_hash
208
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
209
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
210
            if slot[0] == NULL:
211
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
212
                self._fill = self._fill + 1
213
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
214
                return 1
215
            i = i + 1 + n_lookup
216
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
217
218
    def _py_resize(self, min_used):
219
        """Do not use this directly, it is only exposed for testing."""
220
        return self._resize(min_used)
221
222
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
223
        """Resize the internal table.
224
225
        The final table will be big enough to hold at least min_used entries.
226
        We will copy the data from the existing table over, leaving out dummy
227
        entries.
228
229
        :return: The new size of the internal table
230
        """
231
        cdef Py_ssize_t new_size, n_bytes, remaining
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
232
        cdef PyObject **new_table
233
        cdef PyObject **old_table
234
        cdef PyObject **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
235
236
        new_size = DEFAULT_SIZE
237
        while new_size <= min_used and new_size > 0:
238
            new_size = new_size << 1
239
        # We rolled over our signed size field
240
        if new_size <= 0:
241
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
242
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
243
        # size, we will still run the algorithm so that dummy entries are
244
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
245
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
246
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
247
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
248
        #                        ' smaller than the number of used slots.')
249
        n_bytes = sizeof(PyObject*) * new_size;
250
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
251
        if new_table == NULL:
252
            raise MemoryError()
253
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
254
        old_table = self._table
255
        self._table = new_table
256
        memset(self._table, 0, n_bytes)
257
        self._mask = new_size - 1
258
        self._used = 0
259
        remaining = self._fill
260
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
261
262
        # Moving everything to the other table is refcount neutral, so we don't
263
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
264
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
265
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
266
            if slot[0] == NULL: # unused slot
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
267
                pass
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
268
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
269
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
270
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
271
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
272
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
273
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
274
        PyMem_Free(old_table)
275
        return new_size
276
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
277
    cpdef object add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
278
        """Similar to set.add(), start tracking this key.
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
279
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
280
        There is one small difference, which is that we return the object that
281
        is stored at the given location. (which is closer to the
282
        dict.setdefault() functionality.)
283
        """
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
284
        cdef PyObject **slot
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
285
        cdef bint added
4679.3.61 by John Arbash Meinel
Invert the logic.
286
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
287
        if (Py_TYPE(key).tp_richcompare == NULL
288
            or Py_TYPE(key).tp_hash == NULL):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
289
            raise TypeError('Types added to SimpleSet must implement'
290
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
291
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
292
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
293
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
294
        slot = _lookup(self, key)
295
        if (slot[0] == NULL):
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
296
            Py_INCREF(key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
297
            self._fill = self._fill + 1
298
            self._used = self._used + 1
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
299
            slot[0] = <PyObject *>key
4679.3.63 by John Arbash Meinel
Implement resizing.
300
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
301
        elif (slot[0] == _dummy):
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
302
            Py_INCREF(key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
303
            self._used = self._used + 1
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
304
            slot[0] = <PyObject *>key
4679.3.63 by John Arbash Meinel
Implement resizing.
305
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
306
        # No else: clause. If _lookup returns a pointer to
307
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
308
        retval = <object>(slot[0])
309
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
310
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
311
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
312
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
313
        # Even if we resized and ended up moving retval into a different slot,
314
        # it is still the value that is held at the slot equivalent to 'key',
315
        # so we can still return it
316
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
317
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
318
    cpdef bint discard(self, key) except -1:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
319
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
320
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
321
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
322
        """
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
323
        cdef PyObject **slot
4679.3.61 by John Arbash Meinel
Invert the logic.
324
325
        slot = _lookup(self, key)
326
        if slot[0] == NULL or slot[0] == _dummy:
327
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
328
        self._used = self._used - 1
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
329
        Py_DECREF(<object>slot[0])
4679.3.61 by John Arbash Meinel
Invert the logic.
330
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
331
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
332
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
333
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
334
        # However, we are planning on using this as an interning structure, in
335
        # which we will be putting a lot of objects. And we expect that large
336
        # groups of them are going to have the same lifetime.
337
        # Dummy entries hurt a little bit because they cause the lookup to keep
338
        # searching, but resizing is also rather expensive
339
        # For now, we'll just use their algorithm, but we may want to revisit
340
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
341
        if ((self._fill - self._used) * 5 > self._mask):
342
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
343
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
344
4679.3.65 by John Arbash Meinel
Add __iter__ support.
345
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
346
        return _SimpleSet_iterator(self)
347
348
349
cdef class _SimpleSet_iterator:
350
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
351
352
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
353
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
354
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
355
    cdef Py_ssize_t len # number of entries left
356
357
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
358
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
359
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
360
        self._used = self.set._used
361
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
362
363
    def __iter__(self):
364
        return self
365
366
    def __next__(self):
367
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
368
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
369
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
370
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
371
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
372
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
373
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
374
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
375
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
376
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
377
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
378
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
379
        # we found something
380
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
381
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
382
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
383
384
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
385
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
386
            return self.len
387
        return 0
388
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
389
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
390
cdef api SimpleSet SimpleSet_New():
391
    """Create a new SimpleSet object."""
392
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
393
394
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
395
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
396
    """Check that the parameter is not None.
397
398
    Pyrex/Cython will do type checking, but only to ensure that an object is
399
    either the right type or None. You can say "object foo not None" for pure
400
    python functions, but not for C functions.
401
    So this is just a helper for all the apis that need to do the check.
402
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
403
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
404
    if self is None:
405
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
406
    true_self = self
407
    return true_self
408
409
410
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
411
    """Find the slot where 'key' would fit.
412
413
    This is the same as a dicts 'lookup' function.
414
415
    :param key: An object we are looking up
416
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
417
    :return: The location in self.table where key should be put.
418
        location == NULL is an exception, but (*location) == NULL just
419
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
420
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
421
    # This uses Quadratic Probing:
422
    #  http://en.wikipedia.org/wiki/Quadratic_probing
423
    # with c1 = c2 = 1/2
424
    # This leads to probe locations at:
425
    #  h0 = hash(k1)
426
    #  h1 = h0 + 1
427
    #  h2 = h0 + 3 = h1 + 1 + 1
428
    #  h3 = h0 + 6 = h2 + 1 + 2
429
    #  h4 = h0 + 10 = h2 + 1 + 3
430
    # Note that all of these are '& mask', but that is computed *after* the
431
    # offset.
432
    # This differs from the algorithm used by Set and Dict. Which, effectively,
433
    # use double-hashing, and a step size that starts large, but dwindles to
434
    # stepping one-by-one.
435
    # This gives more 'locality' in that if you have a collision at offset X,
436
    # the first fallback is X+1, which is fast to check. However, that means
437
    # that an object w/ hash X+1 will also check there, and then X+2 next.
438
    # However, for objects with differing hashes, their chains are different.
439
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
440
    # So different hashes diverge quickly.
441
    # A bigger problem is that we *only* ever use the lowest bits of the hash
442
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
443
    # use the same collision resolution. We may want to try to find a way to
444
    # incorporate the upper bits of the hash with quadratic probing. (For
445
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
446
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
447
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
448
    cdef long key_hash
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
449
    cdef PyObject **table
450
    cdef PyObject **slot
451
    cdef PyObject *cur
452
    cdef PyObject **free_slot
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
453
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
454
    key_hash = PyObject_Hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
455
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
456
    mask = self._mask
457
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
458
    free_slot = NULL
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
459
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
460
        slot = &table[i & mask]
461
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
462
        if cur == NULL:
463
            # Found a blank spot
464
            if free_slot != NULL:
465
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
466
                return free_slot
467
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
468
                return slot
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
469
        if cur == <PyObject *>key:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
470
            # Found an exact pointer to the key
471
            return slot
472
        if cur == _dummy:
473
            if free_slot == NULL:
474
                free_slot = slot
6705.1.2 by Martin
Start refactoring _simple_set to modern Cython style
475
        elif _is_equal(key, key_hash, <object>cur):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
476
            # Both py_key and cur belong in this slot, return it
477
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
478
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
479
    raise AssertionError('should never get here')
480
481
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
482
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
483
    """Find the slot where 'key' would fit.
484
485
    This is the same as a dicts 'lookup' function. This is a private
486
    api because mutating what you get without maintaing the other invariants
487
    is a 'bad thing'.
488
489
    :param key: An object we are looking up
490
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
491
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
492
        should never be NULL, but may reference a NULL (PyObject*)
493
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
494
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
495
496
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
497
cdef api object SimpleSet_Add(object self, object key):
498
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
499
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
500
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
501
    :param key: The key to be added. If the key is already present,
502
        self will not be modified
503
    :return: The current key stored at the location defined by 'key'.
504
        This may be the same object, or it may be an equivalent object.
505
        (consider dict.setdefault(key, key))
506
    """
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
507
    return _check_self(self).add(key)
508
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
509
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
510
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
511
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
512
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
513
514
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
515
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
516
    """Remove the object referenced at location 'key'.
517
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
518
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
519
    :param key: The key we are checking on
520
    :return: 1 if there was an object present, 0 if there was not, and -1 on
521
        error.
522
    """
6705.1.3 by Martin
Final tweaks to _simple_set_pyx without major rewrite
523
    return _check_self(self).discard(key)
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
524
525
526
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
527
    """Get a pointer to the object present at location 'key'.
528
529
    This returns an object which is equal to key which was previously added to
530
    self. This returns a borrowed reference, as it may also return NULL if no
531
    value is present at that location.
532
533
    :param key: The value we are looking for
534
    :return: The object present at that location
535
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
536
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
537
538
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
539
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
540
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
541
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
542
543
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
544
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
545
                            PyObject **key) except -1:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
546
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
547
548
    :param pos: should be initialized to 0 by the caller, and will be updated
549
        by this function
550
    :param key: Will return a borrowed reference to key
551
    :return: 0 if nothing left, 1 if we are returning a new value
552
    """
553
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
554
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
555
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
556
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
557
    i = pos[0]
558
    if (i < 0):
559
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
560
    mask = true_self._mask
561
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
562
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
563
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
564
    pos[0] = i + 1
565
    if (i > mask):
566
        return 0 # All done
567
    if (key != NULL):
568
        key[0] = table[i]
569
    return 1
570
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
571
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
572
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit,
573
                            void *arg) except -1:
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
574
    """This is an implementation of 'tp_traverse' that hits the whole table.
575
576
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
577
    define one for you if you have an 'object' attribute. Since they don't
578
    support C arrays of objects, we access the PyObject * directly.
579
    """
580
    cdef Py_ssize_t pos
581
    cdef PyObject *next_key
582
    cdef int ret
583
584
    pos = 0
585
    while SimpleSet_Next(self, &pos, &next_key):
586
        ret = visit(next_key, arg)
587
        if ret:
588
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
589
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
590
591
# It is a little bit ugly to do this, but it works, and means that Meliae can
592
# dump the total memory consumed by all child objects.
593
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse