/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
17
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
19
cdef extern from "python-compat.h":
20
    pass
21
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
22
cdef extern from "Python.h":
23
    ctypedef unsigned long size_t
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
24
    ctypedef long (*hashfunc)(PyObject*) except -1
25
    ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int)
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
26
    ctypedef int (*visitproc)(PyObject *, void *)
27
    ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
28
    int Py_EQ
29
    void Py_INCREF(PyObject *)
30
    void Py_DECREF(PyObject *)
31
    ctypedef struct PyTypeObject:
32
        hashfunc tp_hash
33
        richcmpfunc tp_richcompare
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
34
        traverseproc tp_traverse
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
35
36
    PyTypeObject *Py_TYPE(PyObject *)
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
37
    # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
38
    #       thus silently truncates to 32-bits on 64-bit machines.
39
    long PyObject_Hash(PyObject *) except -1
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
40
        
41
    void *PyMem_Malloc(size_t nbytes)
42
    void PyMem_Free(void *)
43
    void memset(void *, int, size_t)
44
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
45
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
46
# Dummy is an object used to mark nodes that have been deleted. Since
47
# collisions require us to move a node to an alternative location, if we just
48
# set an entry to NULL on delete, we won't find any relocated nodes.
49
# We have to use _dummy_obj because we need to keep a refcount to it, but we
50
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
51
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
52
cdef object _dummy_obj
53
cdef PyObject *_dummy
54
_dummy_obj = object()
55
_dummy = <PyObject *>_dummy_obj
56
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
57
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
58
cdef object _NotImplemented
59
_NotImplemented = NotImplemented
60
61
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
62
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
63
    cdef long other_hash
64
65
    if this == other:
66
        return 1
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
67
    other_hash = PyObject_Hash(other)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
68
    if other_hash != this_hash:
69
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
70
71
    # This implements a subset of the PyObject_RichCompareBool functionality.
72
    # Namely it:
73
    #   1) Doesn't try to do anything with old-style classes
74
    #   2) Assumes that both objects have a tp_richcompare implementation, and
75
    #      that if that is not enough to compare equal, then they are not
76
    #      equal. (It doesn't try to cast them both to some intermediate form
77
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
78
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
79
    if res is _NotImplemented:
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
80
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
81
        if res is _NotImplemented:
82
            return 0
83
    if res:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
84
        return 1
85
    return 0
86
87
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
88
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
89
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
90
91
    It is similar in function to the interned dictionary that is used by
92
    strings. However:
93
94
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
95
         of it
96
      2) It only stores one reference to the object, rather than 2 (key vs
97
         key:value)
98
99
    As such, it uses 1/3rd the amount of memory to store a pointer to the
100
    interned object.
101
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
102
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
103
    DEF DEFAULT_SIZE=1024
104
105
    def __init__(self):
106
        cdef Py_ssize_t size, n_bytes
107
108
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
109
        self._mask = size - 1
110
        self._used = 0
111
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
112
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
113
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
114
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
115
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
116
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
117
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
118
    def __sizeof__(self):
119
        return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject**))
120
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
121
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
122
        if self._table != NULL:
123
            PyMem_Free(self._table)
124
            self._table = NULL
125
126
    property used:
127
        def __get__(self):
128
            return self._used
129
130
    property fill:
131
        def __get__(self):
132
            return self._fill
133
134
    property mask:
135
        def __get__(self):
136
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
137
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
138
    def _memory_size(self):
139
        """Return the number of bytes of memory consumed by this class."""
140
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
141
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
142
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
143
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
144
145
    def _test_lookup(self, key):
146
        cdef PyObject **slot
147
4679.3.61 by John Arbash Meinel
Invert the logic.
148
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
149
        if slot[0] == NULL:
150
            res = '<null>'
151
        elif slot[0] == _dummy:
152
            res = '<dummy>'
153
        else:
154
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
155
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
156
157
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
158
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
159
        cdef PyObject **slot
160
4679.3.61 by John Arbash Meinel
Invert the logic.
161
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
162
        if slot[0] == NULL or slot[0] == _dummy:
163
            return False
164
        return True
165
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
166
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
167
        """Return the object (or nothing) define at the given location."""
168
        cdef PyObject **slot
169
170
        slot = _lookup(self, key)
171
        if slot[0] == NULL or slot[0] == _dummy:
172
            return NULL
173
        return slot[0]
174
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
175
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
176
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
177
        cdef PyObject *py_val
178
179
        py_val = self._get(key)
180
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
181
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
182
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
183
        return val
184
4679.3.63 by John Arbash Meinel
Implement resizing.
185
    cdef int _insert_clean(self, PyObject *key) except -1:
186
        """Insert a key into self.table.
187
188
        This is only meant to be used during times like '_resize',
189
        as it makes a lot of assuptions about keys not already being present,
190
        and there being no dummy entries.
191
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
192
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
193
        cdef long the_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
194
        cdef PyObject **table, **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
195
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
196
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
197
        mask = self._mask
198
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
199
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
200
        the_hash = PyObject_Hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
201
        i = the_hash
202
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
203
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
204
            if slot[0] == NULL:
205
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
206
                self._fill = self._fill + 1
207
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
208
                return 1
209
            i = i + 1 + n_lookup
210
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
211
212
    def _py_resize(self, min_used):
213
        """Do not use this directly, it is only exposed for testing."""
214
        return self._resize(min_used)
215
216
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
217
        """Resize the internal table.
218
219
        The final table will be big enough to hold at least min_used entries.
220
        We will copy the data from the existing table over, leaving out dummy
221
        entries.
222
223
        :return: The new size of the internal table
224
        """
225
        cdef Py_ssize_t new_size, n_bytes, remaining
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
226
        cdef PyObject **new_table, **old_table, **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
227
228
        new_size = DEFAULT_SIZE
229
        while new_size <= min_used and new_size > 0:
230
            new_size = new_size << 1
231
        # We rolled over our signed size field
232
        if new_size <= 0:
233
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
234
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
235
        # size, we will still run the algorithm so that dummy entries are
236
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
237
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
238
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
239
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
240
        #                        ' smaller than the number of used slots.')
241
        n_bytes = sizeof(PyObject*) * new_size;
242
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
243
        if new_table == NULL:
244
            raise MemoryError()
245
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
246
        old_table = self._table
247
        self._table = new_table
248
        memset(self._table, 0, n_bytes)
249
        self._mask = new_size - 1
250
        self._used = 0
251
        remaining = self._fill
252
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
253
254
        # Moving everything to the other table is refcount neutral, so we don't
255
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
256
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
257
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
258
            if slot[0] == NULL: # unused slot
4679.3.63 by John Arbash Meinel
Implement resizing.
259
                pass 
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
260
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
261
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
262
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
263
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
264
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
265
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
266
        PyMem_Free(old_table)
267
        return new_size
268
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
269
    def add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
270
        """Similar to set.add(), start tracking this key.
271
        
272
        There is one small difference, which is that we return the object that
273
        is stored at the given location. (which is closer to the
274
        dict.setdefault() functionality.)
275
        """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
276
        return self._add(key)
277
278
    cdef object _add(self, key):
4679.3.61 by John Arbash Meinel
Invert the logic.
279
        cdef PyObject **slot, *py_key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
280
        cdef int added
4679.3.61 by John Arbash Meinel
Invert the logic.
281
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
282
        py_key = <PyObject *>key
283
        if (Py_TYPE(py_key).tp_richcompare == NULL
284
            or Py_TYPE(py_key).tp_hash == NULL):
285
            raise TypeError('Types added to SimpleSet must implement'
286
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
287
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
288
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
289
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
290
        slot = _lookup(self, key)
291
        if (slot[0] == NULL):
292
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
293
            self._fill = self._fill + 1
294
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
295
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
296
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
297
        elif (slot[0] == _dummy):
298
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
299
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
300
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
301
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
302
        # No else: clause. If _lookup returns a pointer to
303
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
304
        retval = <object>(slot[0])
305
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
306
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
307
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
308
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
309
        # Even if we resized and ended up moving retval into a different slot,
310
        # it is still the value that is held at the slot equivalent to 'key',
311
        # so we can still return it
312
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
313
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
314
    def discard(self, key):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
315
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
316
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
317
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
318
        """
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
319
        if self._discard(key):
320
            return True
321
        return False
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
322
323
    cdef int _discard(self, key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
324
        cdef PyObject **slot, *py_key
325
326
        slot = _lookup(self, key)
327
        if slot[0] == NULL or slot[0] == _dummy:
328
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
329
        self._used = self._used - 1
4679.3.61 by John Arbash Meinel
Invert the logic.
330
        Py_DECREF(slot[0])
331
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
332
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
333
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
334
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
335
        # However, we are planning on using this as an interning structure, in
336
        # which we will be putting a lot of objects. And we expect that large
337
        # groups of them are going to have the same lifetime.
338
        # Dummy entries hurt a little bit because they cause the lookup to keep
339
        # searching, but resizing is also rather expensive
340
        # For now, we'll just use their algorithm, but we may want to revisit
341
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
342
        if ((self._fill - self._used) * 5 > self._mask):
343
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
344
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
345
4679.3.65 by John Arbash Meinel
Add __iter__ support.
346
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
347
        return _SimpleSet_iterator(self)
348
349
350
cdef class _SimpleSet_iterator:
351
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
352
353
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
354
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
355
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
356
    cdef Py_ssize_t len # number of entries left
357
358
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
359
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
360
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
361
        self._used = self.set._used
362
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
363
364
    def __iter__(self):
365
        return self
366
367
    def __next__(self):
368
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
369
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
370
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
371
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
372
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
373
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
374
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
375
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
376
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
377
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
378
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
379
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
380
        # we found something
381
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
382
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
383
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
384
385
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
386
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
387
            return self.len
388
        return 0
389
    
390
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
391
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
392
cdef api SimpleSet SimpleSet_New():
393
    """Create a new SimpleSet object."""
394
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
395
396
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
397
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
398
    """Check that the parameter is not None.
399
400
    Pyrex/Cython will do type checking, but only to ensure that an object is
401
    either the right type or None. You can say "object foo not None" for pure
402
    python functions, but not for C functions.
403
    So this is just a helper for all the apis that need to do the check.
404
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
405
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
406
    if self is None:
407
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
408
    true_self = self
409
    return true_self
410
411
412
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
413
    """Find the slot where 'key' would fit.
414
415
    This is the same as a dicts 'lookup' function.
416
417
    :param key: An object we are looking up
418
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
419
    :return: The location in self.table where key should be put.
420
        location == NULL is an exception, but (*location) == NULL just
421
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
422
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
423
    # This uses Quadratic Probing:
424
    #  http://en.wikipedia.org/wiki/Quadratic_probing
425
    # with c1 = c2 = 1/2
426
    # This leads to probe locations at:
427
    #  h0 = hash(k1)
428
    #  h1 = h0 + 1
429
    #  h2 = h0 + 3 = h1 + 1 + 1
430
    #  h3 = h0 + 6 = h2 + 1 + 2
431
    #  h4 = h0 + 10 = h2 + 1 + 3
432
    # Note that all of these are '& mask', but that is computed *after* the
433
    # offset.
434
    # This differs from the algorithm used by Set and Dict. Which, effectively,
435
    # use double-hashing, and a step size that starts large, but dwindles to
436
    # stepping one-by-one.
437
    # This gives more 'locality' in that if you have a collision at offset X,
438
    # the first fallback is X+1, which is fast to check. However, that means
439
    # that an object w/ hash X+1 will also check there, and then X+2 next.
440
    # However, for objects with differing hashes, their chains are different.
441
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
442
    # So different hashes diverge quickly.
443
    # A bigger problem is that we *only* ever use the lowest bits of the hash
444
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
445
    # use the same collision resolution. We may want to try to find a way to
446
    # incorporate the upper bits of the hash with quadratic probing. (For
447
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
448
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
449
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
450
    cdef long key_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
451
    cdef PyObject **table, **slot, *cur, **free_slot, *py_key
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
452
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
453
    py_key = <PyObject *>key
454
    # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
455
    #       (it treats hash() as returning an 'int' rather than a 'long')
456
    key_hash = PyObject_Hash(py_key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
457
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
458
    mask = self._mask
459
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
460
    free_slot = NULL
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
461
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
462
        slot = &table[i & mask]
463
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
464
        if cur == NULL:
465
            # Found a blank spot
466
            if free_slot != NULL:
467
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
468
                return free_slot
469
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
470
                return slot
471
        if cur == py_key:
472
            # Found an exact pointer to the key
473
            return slot
474
        if cur == _dummy:
475
            if free_slot == NULL:
476
                free_slot = slot
477
        elif _is_equal(py_key, key_hash, cur):
478
            # Both py_key and cur belong in this slot, return it
479
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
480
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
481
    raise AssertionError('should never get here')
482
483
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
484
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
485
    """Find the slot where 'key' would fit.
486
487
    This is the same as a dicts 'lookup' function. This is a private
488
    api because mutating what you get without maintaing the other invariants
489
    is a 'bad thing'.
490
491
    :param key: An object we are looking up
492
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
493
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
494
        should never be NULL, but may reference a NULL (PyObject*)
495
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
496
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
497
498
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
499
cdef api object SimpleSet_Add(object self, object key):
500
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
501
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
502
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
503
    :param key: The key to be added. If the key is already present,
504
        self will not be modified
505
    :return: The current key stored at the location defined by 'key'.
506
        This may be the same object, or it may be an equivalent object.
507
        (consider dict.setdefault(key, key))
508
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
509
    return _check_self(self)._add(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
510
    
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
511
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
512
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
513
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
514
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
515
516
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
517
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
518
    """Remove the object referenced at location 'key'.
519
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
520
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
521
    :param key: The key we are checking on
522
    :return: 1 if there was an object present, 0 if there was not, and -1 on
523
        error.
524
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
525
    return _check_self(self)._discard(key)
526
527
528
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
529
    """Get a pointer to the object present at location 'key'.
530
531
    This returns an object which is equal to key which was previously added to
532
    self. This returns a borrowed reference, as it may also return NULL if no
533
    value is present at that location.
534
535
    :param key: The value we are looking for
536
    :return: The object present at that location
537
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
538
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
539
540
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
541
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
542
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
543
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
544
545
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
546
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
547
                            PyObject **key) except -1:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
548
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
549
550
    :param pos: should be initialized to 0 by the caller, and will be updated
551
        by this function
552
    :param key: Will return a borrowed reference to key
553
    :return: 0 if nothing left, 1 if we are returning a new value
554
    """
555
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
556
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
557
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
558
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
559
    i = pos[0]
560
    if (i < 0):
561
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
562
    mask = true_self._mask
563
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
564
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
565
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
566
    pos[0] = i + 1
567
    if (i > mask):
568
        return 0 # All done
569
    if (key != NULL):
570
        key[0] = table[i]
571
    return 1
572
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
573
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
574
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit,
575
                            void *arg) except -1:
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
576
    """This is an implementation of 'tp_traverse' that hits the whole table.
577
578
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
579
    define one for you if you have an 'object' attribute. Since they don't
580
    support C arrays of objects, we access the PyObject * directly.
581
    """
582
    cdef Py_ssize_t pos
583
    cdef PyObject *next_key
584
    cdef int ret
585
586
    pos = 0
587
    while SimpleSet_Next(self, &pos, &next_key):
588
        ret = visit(next_key, arg)
589
        if ret:
590
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
591
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
592
593
# It is a little bit ugly to do this, but it works, and means that Meliae can
594
# dump the total memory consumed by all child objects.
595
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse