/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2009, 2010 Canonical Ltd
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
17
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
6656.2.2 by Jelmer Vernooij
Use absolute_import.
19
from __future__ import absolute_import
20
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
21
cdef extern from "python-compat.h":
22
    pass
23
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
24
cdef extern from "Python.h":
25
    ctypedef unsigned long size_t
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
26
    ctypedef long (*hashfunc)(PyObject*) except -1
27
    ctypedef object (*richcmpfunc)(PyObject *, PyObject *, int)
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
28
    ctypedef int (*visitproc)(PyObject *, void *)
29
    ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
30
    int Py_EQ
31
    void Py_INCREF(PyObject *)
32
    void Py_DECREF(PyObject *)
33
    ctypedef struct PyTypeObject:
34
        hashfunc tp_hash
35
        richcmpfunc tp_richcompare
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
36
        traverseproc tp_traverse
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
37
38
    PyTypeObject *Py_TYPE(PyObject *)
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
39
    # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
40
    #       thus silently truncates to 32-bits on 64-bit machines.
41
    long PyObject_Hash(PyObject *) except -1
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
42
        
43
    void *PyMem_Malloc(size_t nbytes)
44
    void PyMem_Free(void *)
45
    void memset(void *, int, size_t)
46
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
47
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
48
# Dummy is an object used to mark nodes that have been deleted. Since
49
# collisions require us to move a node to an alternative location, if we just
50
# set an entry to NULL on delete, we won't find any relocated nodes.
51
# We have to use _dummy_obj because we need to keep a refcount to it, but we
52
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
53
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
54
cdef object _dummy_obj
55
cdef PyObject *_dummy
56
_dummy_obj = object()
57
_dummy = <PyObject *>_dummy_obj
58
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
59
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
60
cdef object _NotImplemented
61
_NotImplemented = NotImplemented
62
63
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
64
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
65
    cdef long other_hash
66
67
    if this == other:
68
        return 1
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
69
    other_hash = PyObject_Hash(other)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
70
    if other_hash != this_hash:
71
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
72
73
    # This implements a subset of the PyObject_RichCompareBool functionality.
74
    # Namely it:
75
    #   1) Doesn't try to do anything with old-style classes
76
    #   2) Assumes that both objects have a tp_richcompare implementation, and
77
    #      that if that is not enough to compare equal, then they are not
78
    #      equal. (It doesn't try to cast them both to some intermediate form
79
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
80
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
81
    if res is _NotImplemented:
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
82
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4744.1.1 by John Arbash Meinel
Add a test case for the bug w/ NotImplemented.
83
        if res is _NotImplemented:
84
            return 0
85
    if res:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
86
        return 1
87
    return 0
88
89
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
90
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
91
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
92
93
    It is similar in function to the interned dictionary that is used by
94
    strings. However:
95
96
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
97
         of it
98
      2) It only stores one reference to the object, rather than 2 (key vs
99
         key:value)
100
101
    As such, it uses 1/3rd the amount of memory to store a pointer to the
102
    interned object.
103
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
104
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
105
    DEF DEFAULT_SIZE=1024
106
107
    def __init__(self):
108
        cdef Py_ssize_t size, n_bytes
109
110
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
111
        self._mask = size - 1
112
        self._used = 0
113
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
114
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
115
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
116
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
117
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
118
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
119
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
120
    def __sizeof__(self):
5361.2.6 by John Arbash Meinel
We also have to re-implement it for _simple_set_pyx.pyx
121
        # Note: Pyrex doesn't allow sizeof(class) so we re-implement it here.
122
        # Bits are:
123
        #   1: PyObject
124
        #   2: vtable *
125
        #   3: 3 Py_ssize_t
126
        #   4: PyObject**
127
        # Note that we might get alignment, etc, wrong, but at least this is
128
        # better than no estimate at all
129
        # return sizeof(SimpleSet) + (self._mask + 1) * (sizeof(PyObject*))
130
        return (sizeof(PyObject) + sizeof(void*)
131
                + 3*sizeof(Py_ssize_t) + sizeof(PyObject**)
132
                + (self._mask + 1) * sizeof(PyObject*))
5361.2.1 by John Arbash Meinel
SimpleSet now has a __sizeof__ member which knows about its internal table.
133
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
134
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
135
        if self._table != NULL:
136
            PyMem_Free(self._table)
137
            self._table = NULL
138
139
    property used:
140
        def __get__(self):
141
            return self._used
142
143
    property fill:
144
        def __get__(self):
145
            return self._fill
146
147
    property mask:
148
        def __get__(self):
149
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
150
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
151
    def _memory_size(self):
152
        """Return the number of bytes of memory consumed by this class."""
153
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
154
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
155
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
156
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
157
158
    def _test_lookup(self, key):
159
        cdef PyObject **slot
160
4679.3.61 by John Arbash Meinel
Invert the logic.
161
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
162
        if slot[0] == NULL:
163
            res = '<null>'
164
        elif slot[0] == _dummy:
165
            res = '<dummy>'
166
        else:
167
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
168
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
169
170
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
171
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
172
        cdef PyObject **slot
173
4679.3.61 by John Arbash Meinel
Invert the logic.
174
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
175
        if slot[0] == NULL or slot[0] == _dummy:
176
            return False
177
        return True
178
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
179
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
180
        """Return the object (or nothing) define at the given location."""
181
        cdef PyObject **slot
182
183
        slot = _lookup(self, key)
184
        if slot[0] == NULL or slot[0] == _dummy:
185
            return NULL
186
        return slot[0]
187
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
188
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
189
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
190
        cdef PyObject *py_val
191
192
        py_val = self._get(key)
193
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
194
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
195
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
196
        return val
197
4679.3.63 by John Arbash Meinel
Implement resizing.
198
    cdef int _insert_clean(self, PyObject *key) except -1:
199
        """Insert a key into self.table.
200
201
        This is only meant to be used during times like '_resize',
202
        as it makes a lot of assuptions about keys not already being present,
203
        and there being no dummy entries.
204
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
205
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
206
        cdef long the_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
207
        cdef PyObject **table, **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
208
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
209
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
210
        mask = self._mask
211
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
212
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
213
        the_hash = PyObject_Hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
214
        i = the_hash
215
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
216
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
217
            if slot[0] == NULL:
218
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
219
                self._fill = self._fill + 1
220
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
221
                return 1
222
            i = i + 1 + n_lookup
223
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
224
225
    def _py_resize(self, min_used):
226
        """Do not use this directly, it is only exposed for testing."""
227
        return self._resize(min_used)
228
229
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
230
        """Resize the internal table.
231
232
        The final table will be big enough to hold at least min_used entries.
233
        We will copy the data from the existing table over, leaving out dummy
234
        entries.
235
236
        :return: The new size of the internal table
237
        """
238
        cdef Py_ssize_t new_size, n_bytes, remaining
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
239
        cdef PyObject **new_table, **old_table, **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
240
241
        new_size = DEFAULT_SIZE
242
        while new_size <= min_used and new_size > 0:
243
            new_size = new_size << 1
244
        # We rolled over our signed size field
245
        if new_size <= 0:
246
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
247
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
248
        # size, we will still run the algorithm so that dummy entries are
249
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
250
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
251
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
252
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
253
        #                        ' smaller than the number of used slots.')
254
        n_bytes = sizeof(PyObject*) * new_size;
255
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
256
        if new_table == NULL:
257
            raise MemoryError()
258
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
259
        old_table = self._table
260
        self._table = new_table
261
        memset(self._table, 0, n_bytes)
262
        self._mask = new_size - 1
263
        self._used = 0
264
        remaining = self._fill
265
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
266
267
        # Moving everything to the other table is refcount neutral, so we don't
268
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
269
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
270
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
271
            if slot[0] == NULL: # unused slot
4679.3.63 by John Arbash Meinel
Implement resizing.
272
                pass 
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
273
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
274
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
275
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
276
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
277
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
278
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
279
        PyMem_Free(old_table)
280
        return new_size
281
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
282
    def add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
283
        """Similar to set.add(), start tracking this key.
284
        
285
        There is one small difference, which is that we return the object that
286
        is stored at the given location. (which is closer to the
287
        dict.setdefault() functionality.)
288
        """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
289
        return self._add(key)
290
291
    cdef object _add(self, key):
4679.3.61 by John Arbash Meinel
Invert the logic.
292
        cdef PyObject **slot, *py_key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
293
        cdef int added
4679.3.61 by John Arbash Meinel
Invert the logic.
294
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
295
        py_key = <PyObject *>key
296
        if (Py_TYPE(py_key).tp_richcompare == NULL
297
            or Py_TYPE(py_key).tp_hash == NULL):
298
            raise TypeError('Types added to SimpleSet must implement'
299
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
300
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
301
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
302
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
303
        slot = _lookup(self, key)
304
        if (slot[0] == NULL):
305
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
306
            self._fill = self._fill + 1
307
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
308
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
309
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
310
        elif (slot[0] == _dummy):
311
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
312
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
313
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
314
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
315
        # No else: clause. If _lookup returns a pointer to
316
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
317
        retval = <object>(slot[0])
318
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
319
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
320
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
321
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
322
        # Even if we resized and ended up moving retval into a different slot,
323
        # it is still the value that is held at the slot equivalent to 'key',
324
        # so we can still return it
325
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
326
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
327
    def discard(self, key):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
328
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
329
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
330
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
331
        """
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
332
        if self._discard(key):
333
            return True
334
        return False
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
335
336
    cdef int _discard(self, key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
337
        cdef PyObject **slot, *py_key
338
339
        slot = _lookup(self, key)
340
        if slot[0] == NULL or slot[0] == _dummy:
341
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
342
        self._used = self._used - 1
4679.3.61 by John Arbash Meinel
Invert the logic.
343
        Py_DECREF(slot[0])
344
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
345
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
346
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
347
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
348
        # However, we are planning on using this as an interning structure, in
349
        # which we will be putting a lot of objects. And we expect that large
350
        # groups of them are going to have the same lifetime.
351
        # Dummy entries hurt a little bit because they cause the lookup to keep
352
        # searching, but resizing is also rather expensive
353
        # For now, we'll just use their algorithm, but we may want to revisit
354
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
355
        if ((self._fill - self._used) * 5 > self._mask):
356
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
357
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
358
4679.3.65 by John Arbash Meinel
Add __iter__ support.
359
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
360
        return _SimpleSet_iterator(self)
361
362
363
cdef class _SimpleSet_iterator:
364
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
365
366
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
367
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
368
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
369
    cdef Py_ssize_t len # number of entries left
370
371
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
372
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
373
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
374
        self._used = self.set._used
375
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
376
377
    def __iter__(self):
378
        return self
379
380
    def __next__(self):
381
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
382
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
383
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
384
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
385
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
386
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
387
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
388
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
389
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
390
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
391
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
392
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
393
        # we found something
394
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
395
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
396
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
397
398
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
399
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
400
            return self.len
401
        return 0
402
    
403
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
404
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
405
cdef api SimpleSet SimpleSet_New():
406
    """Create a new SimpleSet object."""
407
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
408
409
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
410
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
411
    """Check that the parameter is not None.
412
413
    Pyrex/Cython will do type checking, but only to ensure that an object is
414
    either the right type or None. You can say "object foo not None" for pure
415
    python functions, but not for C functions.
416
    So this is just a helper for all the apis that need to do the check.
417
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
418
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
419
    if self is None:
420
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
421
    true_self = self
422
    return true_self
423
424
425
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
426
    """Find the slot where 'key' would fit.
427
428
    This is the same as a dicts 'lookup' function.
429
430
    :param key: An object we are looking up
431
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
432
    :return: The location in self.table where key should be put.
433
        location == NULL is an exception, but (*location) == NULL just
434
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
435
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
436
    # This uses Quadratic Probing:
437
    #  http://en.wikipedia.org/wiki/Quadratic_probing
438
    # with c1 = c2 = 1/2
439
    # This leads to probe locations at:
440
    #  h0 = hash(k1)
441
    #  h1 = h0 + 1
442
    #  h2 = h0 + 3 = h1 + 1 + 1
443
    #  h3 = h0 + 6 = h2 + 1 + 2
444
    #  h4 = h0 + 10 = h2 + 1 + 3
445
    # Note that all of these are '& mask', but that is computed *after* the
446
    # offset.
447
    # This differs from the algorithm used by Set and Dict. Which, effectively,
448
    # use double-hashing, and a step size that starts large, but dwindles to
449
    # stepping one-by-one.
450
    # This gives more 'locality' in that if you have a collision at offset X,
451
    # the first fallback is X+1, which is fast to check. However, that means
452
    # that an object w/ hash X+1 will also check there, and then X+2 next.
453
    # However, for objects with differing hashes, their chains are different.
454
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
455
    # So different hashes diverge quickly.
456
    # A bigger problem is that we *only* ever use the lowest bits of the hash
457
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
458
    # use the same collision resolution. We may want to try to find a way to
459
    # incorporate the upper bits of the hash with quadratic probing. (For
460
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
461
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
462
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
463
    cdef long key_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
464
    cdef PyObject **table, **slot, *cur, **free_slot, *py_key
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
465
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
466
    py_key = <PyObject *>key
467
    # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
468
    #       (it treats hash() as returning an 'int' rather than a 'long')
469
    key_hash = PyObject_Hash(py_key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
470
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
471
    mask = self._mask
472
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
473
    free_slot = NULL
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
474
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
475
        slot = &table[i & mask]
476
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
477
        if cur == NULL:
478
            # Found a blank spot
479
            if free_slot != NULL:
480
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
481
                return free_slot
482
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
483
                return slot
484
        if cur == py_key:
485
            # Found an exact pointer to the key
486
            return slot
487
        if cur == _dummy:
488
            if free_slot == NULL:
489
                free_slot = slot
490
        elif _is_equal(py_key, key_hash, cur):
491
            # Both py_key and cur belong in this slot, return it
492
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
493
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
494
    raise AssertionError('should never get here')
495
496
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
497
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
498
    """Find the slot where 'key' would fit.
499
500
    This is the same as a dicts 'lookup' function. This is a private
501
    api because mutating what you get without maintaing the other invariants
502
    is a 'bad thing'.
503
504
    :param key: An object we are looking up
505
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
506
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
507
        should never be NULL, but may reference a NULL (PyObject*)
508
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
509
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
510
511
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
512
cdef api object SimpleSet_Add(object self, object key):
513
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
514
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
515
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
516
    :param key: The key to be added. If the key is already present,
517
        self will not be modified
518
    :return: The current key stored at the location defined by 'key'.
519
        This may be the same object, or it may be an equivalent object.
520
        (consider dict.setdefault(key, key))
521
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
522
    return _check_self(self)._add(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
523
    
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
524
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
525
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
526
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
527
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
528
529
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
530
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
531
    """Remove the object referenced at location 'key'.
532
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
533
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
534
    :param key: The key we are checking on
535
    :return: 1 if there was an object present, 0 if there was not, and -1 on
536
        error.
537
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
538
    return _check_self(self)._discard(key)
539
540
541
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
542
    """Get a pointer to the object present at location 'key'.
543
544
    This returns an object which is equal to key which was previously added to
545
    self. This returns a borrowed reference, as it may also return NULL if no
546
    value is present at that location.
547
548
    :param key: The value we are looking for
549
    :return: The object present at that location
550
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
551
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
552
553
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
554
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
555
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
556
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
557
558
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
559
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos,
560
                            PyObject **key) except -1:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
561
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
562
563
    :param pos: should be initialized to 0 by the caller, and will be updated
564
        by this function
565
    :param key: Will return a borrowed reference to key
566
    :return: 0 if nothing left, 1 if we are returning a new value
567
    """
568
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
569
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
570
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
571
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
572
    i = pos[0]
573
    if (i < 0):
574
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
575
    mask = true_self._mask
576
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
577
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
578
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
579
    pos[0] = i + 1
580
    if (i > mask):
581
        return 0 # All done
582
    if (key != NULL):
583
        key[0] = table[i]
584
    return 1
585
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
586
4932.1.2 by John Arbash Meinel
Clean up _simple_set.pyx, and handle 'nogil'.
587
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit,
588
                            void *arg) except -1:
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
589
    """This is an implementation of 'tp_traverse' that hits the whole table.
590
591
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
592
    define one for you if you have an 'object' attribute. Since they don't
593
    support C arrays of objects, we access the PyObject * directly.
594
    """
595
    cdef Py_ssize_t pos
596
    cdef PyObject *next_key
597
    cdef int ret
598
599
    pos = 0
600
    while SimpleSet_Next(self, &pos, &next_key):
601
        ret = visit(next_key, arg)
602
        if ret:
603
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
604
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
605
606
# It is a little bit ugly to do this, but it works, and means that Meliae can
607
# dump the total memory consumed by all child objects.
608
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse