/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
17
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
19
cdef extern from "python-compat.h":
20
    pass
21
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
22
cdef extern from "Python.h":
23
    ctypedef unsigned long size_t
24
    ctypedef long (*hashfunc)(PyObject*)
25
    ctypedef PyObject *(*richcmpfunc)(PyObject *, PyObject *, int)
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
26
    ctypedef int (*visitproc)(PyObject *, void *)
27
    ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
28
    int Py_EQ
29
    PyObject *Py_True
30
    PyObject *Py_NotImplemented
31
    void Py_INCREF(PyObject *)
32
    void Py_DECREF(PyObject *)
33
    ctypedef struct PyTypeObject:
34
        hashfunc tp_hash
35
        richcmpfunc tp_richcompare
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
36
        traverseproc tp_traverse
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
37
38
    PyTypeObject *Py_TYPE(PyObject *)
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
39
    int PyObject_IsTrue(PyObject *)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
40
        
41
    void *PyMem_Malloc(size_t nbytes)
42
    void PyMem_Free(void *)
43
    void memset(void *, int, size_t)
44
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
45
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
46
# Dummy is an object used to mark nodes that have been deleted. Since
47
# collisions require us to move a node to an alternative location, if we just
48
# set an entry to NULL on delete, we won't find any relocated nodes.
49
# We have to use _dummy_obj because we need to keep a refcount to it, but we
50
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
51
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
52
cdef object _dummy_obj
53
cdef PyObject *_dummy
54
_dummy_obj = object()
55
_dummy = <PyObject *>_dummy_obj
56
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
57
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
58
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
59
    cdef long other_hash
60
    cdef PyObject *res
61
62
    if this == other:
63
        return 1
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
64
    other_hash = Py_TYPE(other).tp_hash(other)
4679.3.90 by John Arbash Meinel
A bit more error checking in _is_equal.
65
    if other_hash == -1:
66
        # Even though other successfully hashed in the past, it seems to have
67
        # changed its mind, and failed this time, so propogate the failure.
68
        return -1
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
69
    if other_hash != this_hash:
70
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
71
72
    # This implements a subset of the PyObject_RichCompareBool functionality.
73
    # Namely it:
74
    #   1) Doesn't try to do anything with old-style classes
75
    #   2) Assumes that both objects have a tp_richcompare implementation, and
76
    #      that if that is not enough to compare equal, then they are not
77
    #      equal. (It doesn't try to cast them both to some intermediate form
78
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
79
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
80
    if res == NULL: # Exception
81
        return -1
82
    if PyObject_IsTrue(res):
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
83
        Py_DECREF(res)
84
        return 1
85
    if res == Py_NotImplemented:
86
        Py_DECREF(res)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
87
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
88
    if res == NULL:
89
        return -1
90
    if PyObject_IsTrue(res):
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
91
        Py_DECREF(res)
92
        return 1
93
    Py_DECREF(res)
94
    return 0
95
96
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
97
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
98
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
99
100
    It is similar in function to the interned dictionary that is used by
101
    strings. However:
102
103
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
104
         of it
105
      2) It only stores one reference to the object, rather than 2 (key vs
106
         key:value)
107
108
    As such, it uses 1/3rd the amount of memory to store a pointer to the
109
    interned object.
110
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
111
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
112
    DEF DEFAULT_SIZE=1024
113
114
    def __init__(self):
115
        cdef Py_ssize_t size, n_bytes
116
117
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
118
        self._mask = size - 1
119
        self._used = 0
120
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
121
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
122
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
123
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
124
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
125
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
126
127
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
128
        if self._table != NULL:
129
            PyMem_Free(self._table)
130
            self._table = NULL
131
132
    property used:
133
        def __get__(self):
134
            return self._used
135
136
    property fill:
137
        def __get__(self):
138
            return self._fill
139
140
    property mask:
141
        def __get__(self):
142
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
143
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
144
    def _memory_size(self):
145
        """Return the number of bytes of memory consumed by this class."""
146
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
147
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
148
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
149
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
150
151
    def _test_lookup(self, key):
152
        cdef PyObject **slot
153
4679.3.61 by John Arbash Meinel
Invert the logic.
154
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
155
        if slot[0] == NULL:
156
            res = '<null>'
157
        elif slot[0] == _dummy:
158
            res = '<dummy>'
159
        else:
160
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
161
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
162
163
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
164
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
165
        cdef PyObject **slot
166
4679.3.61 by John Arbash Meinel
Invert the logic.
167
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
168
        if slot[0] == NULL or slot[0] == _dummy:
169
            return False
170
        return True
171
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
172
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
173
        """Return the object (or nothing) define at the given location."""
174
        cdef PyObject **slot
175
176
        slot = _lookup(self, key)
177
        if slot[0] == NULL or slot[0] == _dummy:
178
            return NULL
179
        return slot[0]
180
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
181
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
182
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
183
        cdef PyObject *py_val
184
185
        py_val = self._get(key)
186
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
187
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
188
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
189
        return val
190
4679.3.63 by John Arbash Meinel
Implement resizing.
191
    cdef int _insert_clean(self, PyObject *key) except -1:
192
        """Insert a key into self.table.
193
194
        This is only meant to be used during times like '_resize',
195
        as it makes a lot of assuptions about keys not already being present,
196
        and there being no dummy entries.
197
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
198
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
199
        cdef long the_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
200
        cdef PyObject **table, **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
201
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
202
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
203
        mask = self._mask
204
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
205
206
        the_hash = Py_TYPE(key).tp_hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
207
        if the_hash == -1:
208
            return -1
209
        i = the_hash
210
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
211
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
212
            if slot[0] == NULL:
213
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
214
                self._fill = self._fill + 1
215
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
216
                return 1
217
            i = i + 1 + n_lookup
218
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
219
220
    def _py_resize(self, min_used):
221
        """Do not use this directly, it is only exposed for testing."""
222
        return self._resize(min_used)
223
224
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
225
        """Resize the internal table.
226
227
        The final table will be big enough to hold at least min_used entries.
228
        We will copy the data from the existing table over, leaving out dummy
229
        entries.
230
231
        :return: The new size of the internal table
232
        """
233
        cdef Py_ssize_t new_size, n_bytes, remaining
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
234
        cdef PyObject **new_table, **old_table, **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
235
236
        new_size = DEFAULT_SIZE
237
        while new_size <= min_used and new_size > 0:
238
            new_size = new_size << 1
239
        # We rolled over our signed size field
240
        if new_size <= 0:
241
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
242
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
243
        # size, we will still run the algorithm so that dummy entries are
244
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
245
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
246
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
247
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
248
        #                        ' smaller than the number of used slots.')
249
        n_bytes = sizeof(PyObject*) * new_size;
250
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
251
        if new_table == NULL:
252
            raise MemoryError()
253
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
254
        old_table = self._table
255
        self._table = new_table
256
        memset(self._table, 0, n_bytes)
257
        self._mask = new_size - 1
258
        self._used = 0
259
        remaining = self._fill
260
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
261
262
        # Moving everything to the other table is refcount neutral, so we don't
263
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
264
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
265
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
266
            if slot[0] == NULL: # unused slot
4679.3.63 by John Arbash Meinel
Implement resizing.
267
                pass 
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
268
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
269
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
270
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
271
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
272
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
273
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
274
        PyMem_Free(old_table)
275
        return new_size
276
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
277
    def add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
278
        """Similar to set.add(), start tracking this key.
279
        
280
        There is one small difference, which is that we return the object that
281
        is stored at the given location. (which is closer to the
282
        dict.setdefault() functionality.)
283
        """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
284
        return self._add(key)
285
286
    cdef object _add(self, key):
4679.3.61 by John Arbash Meinel
Invert the logic.
287
        cdef PyObject **slot, *py_key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
288
        cdef int added
4679.3.61 by John Arbash Meinel
Invert the logic.
289
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
290
        py_key = <PyObject *>key
291
        if (Py_TYPE(py_key).tp_richcompare == NULL
292
            or Py_TYPE(py_key).tp_hash == NULL):
293
            raise TypeError('Types added to SimpleSet must implement'
294
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
295
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
296
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
297
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
298
        slot = _lookup(self, key)
299
        if (slot[0] == NULL):
300
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
301
            self._fill = self._fill + 1
302
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
303
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
304
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
305
        elif (slot[0] == _dummy):
306
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
307
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
308
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
309
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
310
        # No else: clause. If _lookup returns a pointer to
311
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
312
        retval = <object>(slot[0])
313
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
314
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
315
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
316
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
317
        # Even if we resized and ended up moving retval into a different slot,
318
        # it is still the value that is held at the slot equivalent to 'key',
319
        # so we can still return it
320
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
321
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
322
    def discard(self, key):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
323
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
324
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
325
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
326
        """
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
327
        if self._discard(key):
328
            return True
329
        return False
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
330
331
    cdef int _discard(self, key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
332
        cdef PyObject **slot, *py_key
333
334
        slot = _lookup(self, key)
335
        if slot[0] == NULL or slot[0] == _dummy:
336
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
337
        self._used = self._used - 1
4679.3.61 by John Arbash Meinel
Invert the logic.
338
        Py_DECREF(slot[0])
339
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
340
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
341
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
342
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
343
        # However, we are planning on using this as an interning structure, in
344
        # which we will be putting a lot of objects. And we expect that large
345
        # groups of them are going to have the same lifetime.
346
        # Dummy entries hurt a little bit because they cause the lookup to keep
347
        # searching, but resizing is also rather expensive
348
        # For now, we'll just use their algorithm, but we may want to revisit
349
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
350
        if ((self._fill - self._used) * 5 > self._mask):
351
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
352
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
353
4679.3.65 by John Arbash Meinel
Add __iter__ support.
354
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
355
        return _SimpleSet_iterator(self)
356
357
358
cdef class _SimpleSet_iterator:
359
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
360
361
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
362
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
363
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
364
    cdef Py_ssize_t len # number of entries left
365
366
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
367
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
368
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
369
        self._used = self.set._used
370
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
371
372
    def __iter__(self):
373
        return self
374
375
    def __next__(self):
376
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
377
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
378
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
379
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
380
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
381
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
382
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
383
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
384
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
385
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
386
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
387
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
388
        # we found something
389
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
390
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
391
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
392
393
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
394
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
395
            return self.len
396
        return 0
397
    
398
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
399
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
400
cdef api SimpleSet SimpleSet_New():
401
    """Create a new SimpleSet object."""
402
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
403
404
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
405
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
406
    """Check that the parameter is not None.
407
408
    Pyrex/Cython will do type checking, but only to ensure that an object is
409
    either the right type or None. You can say "object foo not None" for pure
410
    python functions, but not for C functions.
411
    So this is just a helper for all the apis that need to do the check.
412
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
413
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
414
    if self is None:
415
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
416
    true_self = self
417
    return true_self
418
419
420
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
421
    """Find the slot where 'key' would fit.
422
423
    This is the same as a dicts 'lookup' function.
424
425
    :param key: An object we are looking up
426
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
427
    :return: The location in self.table where key should be put.
428
        location == NULL is an exception, but (*location) == NULL just
429
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
430
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
431
    # This uses Quadratic Probing:
432
    #  http://en.wikipedia.org/wiki/Quadratic_probing
433
    # with c1 = c2 = 1/2
434
    # This leads to probe locations at:
435
    #  h0 = hash(k1)
436
    #  h1 = h0 + 1
437
    #  h2 = h0 + 3 = h1 + 1 + 1
438
    #  h3 = h0 + 6 = h2 + 1 + 2
439
    #  h4 = h0 + 10 = h2 + 1 + 3
440
    # Note that all of these are '& mask', but that is computed *after* the
441
    # offset.
442
    # This differs from the algorithm used by Set and Dict. Which, effectively,
443
    # use double-hashing, and a step size that starts large, but dwindles to
444
    # stepping one-by-one.
445
    # This gives more 'locality' in that if you have a collision at offset X,
446
    # the first fallback is X+1, which is fast to check. However, that means
447
    # that an object w/ hash X+1 will also check there, and then X+2 next.
448
    # However, for objects with differing hashes, their chains are different.
449
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
450
    # So different hashes diverge quickly.
451
    # A bigger problem is that we *only* ever use the lowest bits of the hash
452
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
453
    # use the same collision resolution. We may want to try to find a way to
454
    # incorporate the upper bits of the hash with quadratic probing. (For
455
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
456
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
457
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
458
    cdef long key_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
459
    cdef PyObject **table, **slot, *cur, **free_slot, *py_key
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
460
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
461
    # hash is a signed long(), we are using an offset at unsigned size_t
4679.3.61 by John Arbash Meinel
Invert the logic.
462
    key_hash = hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
463
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
464
    mask = self._mask
465
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
466
    free_slot = NULL
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
467
    py_key = <PyObject *>key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
468
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
469
        slot = &table[i & mask]
470
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
471
        if cur == NULL:
472
            # Found a blank spot
473
            if free_slot != NULL:
474
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
475
                return free_slot
476
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
477
                return slot
478
        if cur == py_key:
479
            # Found an exact pointer to the key
480
            return slot
481
        if cur == _dummy:
482
            if free_slot == NULL:
483
                free_slot = slot
484
        elif _is_equal(py_key, key_hash, cur):
485
            # Both py_key and cur belong in this slot, return it
486
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
487
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
488
    raise AssertionError('should never get here')
489
490
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
491
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
492
    """Find the slot where 'key' would fit.
493
494
    This is the same as a dicts 'lookup' function. This is a private
495
    api because mutating what you get without maintaing the other invariants
496
    is a 'bad thing'.
497
498
    :param key: An object we are looking up
499
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
500
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
501
        should never be NULL, but may reference a NULL (PyObject*)
502
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
503
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
504
505
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
506
cdef api object SimpleSet_Add(object self, object key):
507
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
508
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
509
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
510
    :param key: The key to be added. If the key is already present,
511
        self will not be modified
512
    :return: The current key stored at the location defined by 'key'.
513
        This may be the same object, or it may be an equivalent object.
514
        (consider dict.setdefault(key, key))
515
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
516
    return _check_self(self)._add(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
517
    
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
518
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
519
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
520
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
521
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
522
523
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
524
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
525
    """Remove the object referenced at location 'key'.
526
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
527
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
528
    :param key: The key we are checking on
529
    :return: 1 if there was an object present, 0 if there was not, and -1 on
530
        error.
531
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
532
    return _check_self(self)._discard(key)
533
534
535
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
536
    """Get a pointer to the object present at location 'key'.
537
538
    This returns an object which is equal to key which was previously added to
539
    self. This returns a borrowed reference, as it may also return NULL if no
540
    value is present at that location.
541
542
    :param key: The value we are looking for
543
    :return: The object present at that location
544
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
545
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
546
547
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
548
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
549
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
550
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
551
552
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
553
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
554
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
555
556
    :param pos: should be initialized to 0 by the caller, and will be updated
557
        by this function
558
    :param key: Will return a borrowed reference to key
559
    :return: 0 if nothing left, 1 if we are returning a new value
560
    """
561
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
562
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
563
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
564
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
565
    i = pos[0]
566
    if (i < 0):
567
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
568
    mask = true_self._mask
569
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
570
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
571
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
572
    pos[0] = i + 1
573
    if (i > mask):
574
        return 0 # All done
575
    if (key != NULL):
576
        key[0] = table[i]
577
    return 1
578
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
579
580
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg):
581
    """This is an implementation of 'tp_traverse' that hits the whole table.
582
583
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
584
    define one for you if you have an 'object' attribute. Since they don't
585
    support C arrays of objects, we access the PyObject * directly.
586
    """
587
    cdef Py_ssize_t pos
588
    cdef PyObject *next_key
589
    cdef int ret
590
591
    pos = 0
592
    while SimpleSet_Next(self, &pos, &next_key):
593
        ret = visit(next_key, arg)
594
        if ret:
595
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
596
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
597
598
# It is a little bit ugly to do this, but it works, and means that Meliae can
599
# dump the total memory consumed by all child objects.
600
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse