/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
17
"""Definition of a class that is similar to Set with some small changes."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
18
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
19
cdef extern from "python-compat.h":
20
    pass
21
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
22
cdef extern from "Python.h":
23
    ctypedef unsigned long size_t
24
    ctypedef long (*hashfunc)(PyObject*)
25
    ctypedef PyObject *(*richcmpfunc)(PyObject *, PyObject *, int)
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
26
    ctypedef int (*visitproc)(PyObject *, void *)
27
    ctypedef int (*traverseproc)(PyObject *, visitproc, void *)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
28
    int Py_EQ
29
    PyObject *Py_True
30
    PyObject *Py_NotImplemented
31
    void Py_INCREF(PyObject *)
32
    void Py_DECREF(PyObject *)
33
    ctypedef struct PyTypeObject:
34
        hashfunc tp_hash
35
        richcmpfunc tp_richcompare
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
36
        traverseproc tp_traverse
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
37
38
    PyTypeObject *Py_TYPE(PyObject *)
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
39
    int PyObject_IsTrue(PyObject *)
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
40
    # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
41
    #       thus silently truncates to 32-bits on 64-bit machines.
42
    long PyObject_Hash(PyObject *) except -1
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
43
        
44
    void *PyMem_Malloc(size_t nbytes)
45
    void PyMem_Free(void *)
46
    void memset(void *, int, size_t)
47
4679.3.86 by John Arbash Meinel
some whitespace and expose the rest of the C api in the .pxd file.
48
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
49
# Dummy is an object used to mark nodes that have been deleted. Since
50
# collisions require us to move a node to an alternative location, if we just
51
# set an entry to NULL on delete, we won't find any relocated nodes.
52
# We have to use _dummy_obj because we need to keep a refcount to it, but we
53
# also use _dummy as a pointer, because it avoids having to put <PyObject*> all
54
# over the code base.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
55
cdef object _dummy_obj
56
cdef PyObject *_dummy
57
_dummy_obj = object()
58
_dummy = <PyObject *>_dummy_obj
59
4679.3.74 by John Arbash Meinel
Revert back to before I started trying to move to pyrex/cython code.
60
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
61
cdef int _is_equal(PyObject *this, long this_hash, PyObject *other) except -1:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
62
    cdef long other_hash
63
    cdef PyObject *res
64
65
    if this == other:
66
        return 1
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
67
    other_hash = PyObject_Hash(other)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
68
    if other_hash != this_hash:
69
        return 0
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
70
71
    # This implements a subset of the PyObject_RichCompareBool functionality.
72
    # Namely it:
73
    #   1) Doesn't try to do anything with old-style classes
74
    #   2) Assumes that both objects have a tp_richcompare implementation, and
75
    #      that if that is not enough to compare equal, then they are not
76
    #      equal. (It doesn't try to cast them both to some intermediate form
77
    #      that would compare equal.)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
78
    res = Py_TYPE(this).tp_richcompare(this, other, Py_EQ)
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
79
    if res == NULL: # Exception
80
        return -1
81
    if PyObject_IsTrue(res):
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
82
        Py_DECREF(res)
83
        return 1
84
    if res == Py_NotImplemented:
85
        Py_DECREF(res)
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
86
        res = Py_TYPE(other).tp_richcompare(other, this, Py_EQ)
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
87
    if res == NULL:
88
        return -1
89
    if PyObject_IsTrue(res):
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
90
        Py_DECREF(res)
91
        return 1
92
    Py_DECREF(res)
93
    return 0
94
95
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
96
cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
97
    """This class can be used to track canonical forms for objects.
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
98
99
    It is similar in function to the interned dictionary that is used by
100
    strings. However:
101
102
      1) It assumes that hash(obj) is cheap, so does not need to inline a copy
103
         of it
104
      2) It only stores one reference to the object, rather than 2 (key vs
105
         key:value)
106
107
    As such, it uses 1/3rd the amount of memory to store a pointer to the
108
    interned object.
109
    """
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
110
    # Attributes are defined in the .pxd file
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
111
    DEF DEFAULT_SIZE=1024
112
113
    def __init__(self):
114
        cdef Py_ssize_t size, n_bytes
115
116
        size = DEFAULT_SIZE
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
117
        self._mask = size - 1
118
        self._used = 0
119
        self._fill = 0
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
120
        n_bytes = sizeof(PyObject*) * size;
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
121
        self._table = <PyObject **>PyMem_Malloc(n_bytes)
122
        if self._table == NULL:
4679.3.63 by John Arbash Meinel
Implement resizing.
123
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
124
        memset(self._table, 0, n_bytes)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
125
126
    def __dealloc__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
127
        if self._table != NULL:
128
            PyMem_Free(self._table)
129
            self._table = NULL
130
131
    property used:
132
        def __get__(self):
133
            return self._used
134
135
    property fill:
136
        def __get__(self):
137
            return self._fill
138
139
    property mask:
140
        def __get__(self):
141
            return self._mask
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
142
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
143
    def _memory_size(self):
144
        """Return the number of bytes of memory consumed by this class."""
145
        return sizeof(self) + (sizeof(PyObject*)*(self._mask + 1))
146
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
147
    def __len__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
148
        return self._used
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
149
150
    def _test_lookup(self, key):
151
        cdef PyObject **slot
152
4679.3.61 by John Arbash Meinel
Invert the logic.
153
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
154
        if slot[0] == NULL:
155
            res = '<null>'
156
        elif slot[0] == _dummy:
157
            res = '<dummy>'
158
        else:
159
            res = <object>slot[0]
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
160
        return <int>(slot - self._table), res
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
161
162
    def __contains__(self, key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
163
        """Is key present in this SimpleSet."""
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
164
        cdef PyObject **slot
165
4679.3.61 by John Arbash Meinel
Invert the logic.
166
        slot = _lookup(self, key)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
167
        if slot[0] == NULL or slot[0] == _dummy:
168
            return False
169
        return True
170
4679.3.62 by John Arbash Meinel
Move some of the information into the pxd header file.
171
    cdef PyObject *_get(self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
172
        """Return the object (or nothing) define at the given location."""
173
        cdef PyObject **slot
174
175
        slot = _lookup(self, key)
176
        if slot[0] == NULL or slot[0] == _dummy:
177
            return NULL
178
        return slot[0]
179
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
180
    def __getitem__(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
181
        """Return a stored item that is equivalent to key."""
4679.3.61 by John Arbash Meinel
Invert the logic.
182
        cdef PyObject *py_val
183
184
        py_val = self._get(key)
185
        if py_val == NULL:
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
186
            raise KeyError("Key %s is not present" % key)
4679.3.61 by John Arbash Meinel
Invert the logic.
187
        val = <object>(py_val)
4679.3.58 by John Arbash Meinel
Adding a StaticTupleInterner class.
188
        return val
189
4679.3.63 by John Arbash Meinel
Implement resizing.
190
    cdef int _insert_clean(self, PyObject *key) except -1:
191
        """Insert a key into self.table.
192
193
        This is only meant to be used during times like '_resize',
194
        as it makes a lot of assuptions about keys not already being present,
195
        and there being no dummy entries.
196
        """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
197
        cdef size_t i, n_lookup
4679.3.63 by John Arbash Meinel
Implement resizing.
198
        cdef long the_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
199
        cdef PyObject **table, **slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
200
        cdef Py_ssize_t mask
4679.3.63 by John Arbash Meinel
Implement resizing.
201
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
202
        mask = self._mask
203
        table = self._table
4679.3.63 by John Arbash Meinel
Implement resizing.
204
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
205
        the_hash = PyObject_Hash(key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
206
        i = the_hash
207
        for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
208
            slot = &table[i & mask]
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
209
            if slot[0] == NULL:
210
                slot[0] = key
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
211
                self._fill = self._fill + 1
212
                self._used = self._used + 1
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
213
                return 1
214
            i = i + 1 + n_lookup
215
        raise RuntimeError('ran out of slots.')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
216
217
    def _py_resize(self, min_used):
218
        """Do not use this directly, it is only exposed for testing."""
219
        return self._resize(min_used)
220
221
    cdef Py_ssize_t _resize(self, Py_ssize_t min_used) except -1:
4679.3.63 by John Arbash Meinel
Implement resizing.
222
        """Resize the internal table.
223
224
        The final table will be big enough to hold at least min_used entries.
225
        We will copy the data from the existing table over, leaving out dummy
226
        entries.
227
228
        :return: The new size of the internal table
229
        """
230
        cdef Py_ssize_t new_size, n_bytes, remaining
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
231
        cdef PyObject **new_table, **old_table, **slot
4679.3.63 by John Arbash Meinel
Implement resizing.
232
233
        new_size = DEFAULT_SIZE
234
        while new_size <= min_used and new_size > 0:
235
            new_size = new_size << 1
236
        # We rolled over our signed size field
237
        if new_size <= 0:
238
            raise MemoryError()
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
239
        # Even if min_used == self._mask + 1, and we aren't changing the actual
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
240
        # size, we will still run the algorithm so that dummy entries are
241
        # removed
4679.3.63 by John Arbash Meinel
Implement resizing.
242
        # TODO: Test this
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
243
        # if new_size < self._used:
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
244
        #     raise RuntimeError('cannot shrink SimpleSet to something'
4679.3.63 by John Arbash Meinel
Implement resizing.
245
        #                        ' smaller than the number of used slots.')
246
        n_bytes = sizeof(PyObject*) * new_size;
247
        new_table = <PyObject **>PyMem_Malloc(n_bytes)
248
        if new_table == NULL:
249
            raise MemoryError()
250
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
251
        old_table = self._table
252
        self._table = new_table
253
        memset(self._table, 0, n_bytes)
254
        self._mask = new_size - 1
255
        self._used = 0
256
        remaining = self._fill
257
        self._fill = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
258
259
        # Moving everything to the other table is refcount neutral, so we don't
260
        # worry about it.
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
261
        slot = old_table
4679.3.63 by John Arbash Meinel
Implement resizing.
262
        while remaining > 0:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
263
            if slot[0] == NULL: # unused slot
4679.3.63 by John Arbash Meinel
Implement resizing.
264
                pass 
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
265
            elif slot[0] == _dummy: # dummy slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
266
                remaining = remaining - 1
4679.3.63 by John Arbash Meinel
Implement resizing.
267
            else: # active slot
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
268
                remaining = remaining - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
269
                self._insert_clean(slot[0])
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
270
            slot = slot + 1
4679.3.63 by John Arbash Meinel
Implement resizing.
271
        PyMem_Free(old_table)
272
        return new_size
273
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
274
    def add(self, key):
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
275
        """Similar to set.add(), start tracking this key.
276
        
277
        There is one small difference, which is that we return the object that
278
        is stored at the given location. (which is closer to the
279
        dict.setdefault() functionality.)
280
        """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
281
        return self._add(key)
282
283
    cdef object _add(self, key):
4679.3.61 by John Arbash Meinel
Invert the logic.
284
        cdef PyObject **slot, *py_key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
285
        cdef int added
4679.3.61 by John Arbash Meinel
Invert the logic.
286
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
287
        py_key = <PyObject *>key
288
        if (Py_TYPE(py_key).tp_richcompare == NULL
289
            or Py_TYPE(py_key).tp_hash == NULL):
290
            raise TypeError('Types added to SimpleSet must implement'
291
                            ' both tp_richcompare and tp_hash')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
292
        added = 0
4679.3.63 by John Arbash Meinel
Implement resizing.
293
        # We need at least one empty slot
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
294
        assert self._used < self._mask
4679.3.61 by John Arbash Meinel
Invert the logic.
295
        slot = _lookup(self, key)
296
        if (slot[0] == NULL):
297
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
298
            self._fill = self._fill + 1
299
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
300
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
301
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
302
        elif (slot[0] == _dummy):
303
            Py_INCREF(py_key)
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
304
            self._used = self._used + 1
4679.3.61 by John Arbash Meinel
Invert the logic.
305
            slot[0] = py_key
4679.3.63 by John Arbash Meinel
Implement resizing.
306
            added = 1
4679.3.61 by John Arbash Meinel
Invert the logic.
307
        # No else: clause. If _lookup returns a pointer to
308
        # a live object, then we already have a value at this location.
4679.3.63 by John Arbash Meinel
Implement resizing.
309
        retval = <object>(slot[0])
310
        # PySet and PyDict use a 2-3rds full algorithm, we'll follow suit
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
311
        if added and (self._fill * 3) >= ((self._mask + 1) * 2):
4679.3.63 by John Arbash Meinel
Implement resizing.
312
            # However, we always work for a load factor of 2:1
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
313
            self._resize(self._used * 2)
4679.3.63 by John Arbash Meinel
Implement resizing.
314
        # Even if we resized and ended up moving retval into a different slot,
315
        # it is still the value that is held at the slot equivalent to 'key',
316
        # so we can still return it
317
        return retval
4679.3.61 by John Arbash Meinel
Invert the logic.
318
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
319
    def discard(self, key):
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
320
        """Remove key from the set, whether it exists or not.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
321
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
322
        :return: False if the item did not exist, True if it did
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
323
        """
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
324
        if self._discard(key):
325
            return True
326
        return False
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
327
328
    cdef int _discard(self, key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
329
        cdef PyObject **slot, *py_key
330
331
        slot = _lookup(self, key)
332
        if slot[0] == NULL or slot[0] == _dummy:
333
            return 0
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
334
        self._used = self._used - 1
4679.3.61 by John Arbash Meinel
Invert the logic.
335
        Py_DECREF(slot[0])
336
        slot[0] = _dummy
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
337
        # PySet uses the heuristic: If more than 1/5 are dummies, then resize
338
        #                           them away
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
339
        #   if ((so->_fill - so->_used) * 5 < so->mask)
4679.3.64 by John Arbash Meinel
Add functionality for shrinking the table.
340
        # However, we are planning on using this as an interning structure, in
341
        # which we will be putting a lot of objects. And we expect that large
342
        # groups of them are going to have the same lifetime.
343
        # Dummy entries hurt a little bit because they cause the lookup to keep
344
        # searching, but resizing is also rather expensive
345
        # For now, we'll just use their algorithm, but we may want to revisit
346
        # it
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
347
        if ((self._fill - self._used) * 5 > self._mask):
348
            self._resize(self._used * 2)
4679.3.61 by John Arbash Meinel
Invert the logic.
349
        return 1
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
350
4679.3.65 by John Arbash Meinel
Add __iter__ support.
351
    def __iter__(self):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
352
        return _SimpleSet_iterator(self)
353
354
355
cdef class _SimpleSet_iterator:
356
    """Iterator over the SimpleSet structure."""
4679.3.65 by John Arbash Meinel
Add __iter__ support.
357
358
    cdef Py_ssize_t pos
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
359
    cdef SimpleSet set
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
360
    cdef Py_ssize_t _used # track if things have been mutated while iterating
4679.3.65 by John Arbash Meinel
Add __iter__ support.
361
    cdef Py_ssize_t len # number of entries left
362
363
    def __init__(self, obj):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
364
        self.set = obj
4679.3.65 by John Arbash Meinel
Add __iter__ support.
365
        self.pos = 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
366
        self._used = self.set._used
367
        self.len = self.set._used
4679.3.65 by John Arbash Meinel
Add __iter__ support.
368
369
    def __iter__(self):
370
        return self
371
372
    def __next__(self):
373
        cdef Py_ssize_t mask, i
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
374
        cdef PyObject *key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
375
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
376
        if self.set is None:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
377
            raise StopIteration
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
378
        if self.set._used != self._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
379
            # Force this exception to continue to be raised
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
380
            self._used = -1
4679.3.65 by John Arbash Meinel
Add __iter__ support.
381
            raise RuntimeError("Set size changed during iteration")
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
382
        if not SimpleSet_Next(self.set, &self.pos, &key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
383
            self.set = None
4679.3.65 by John Arbash Meinel
Add __iter__ support.
384
            raise StopIteration
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
385
        # we found something
386
        the_key = <object>key # INCREF
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
387
        self.len = self.len - 1
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
388
        return the_key
4679.3.65 by John Arbash Meinel
Add __iter__ support.
389
390
    def __length_hint__(self):
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
391
        if self.set is not None and self._used == self.set._used:
4679.3.65 by John Arbash Meinel
Add __iter__ support.
392
            return self.len
393
        return 0
394
    
395
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
396
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
397
cdef api SimpleSet SimpleSet_New():
398
    """Create a new SimpleSet object."""
399
    return SimpleSet()
4679.3.61 by John Arbash Meinel
Invert the logic.
400
401
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
402
cdef SimpleSet _check_self(object self):
4679.3.61 by John Arbash Meinel
Invert the logic.
403
    """Check that the parameter is not None.
404
405
    Pyrex/Cython will do type checking, but only to ensure that an object is
406
    either the right type or None. You can say "object foo not None" for pure
407
    python functions, but not for C functions.
408
    So this is just a helper for all the apis that need to do the check.
409
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
410
    cdef SimpleSet true_self
4679.3.61 by John Arbash Meinel
Invert the logic.
411
    if self is None:
412
        raise TypeError('self must not be None')
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
413
    true_self = self
414
    return true_self
415
416
417
cdef PyObject **_lookup(SimpleSet self, object key) except NULL:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
418
    """Find the slot where 'key' would fit.
419
420
    This is the same as a dicts 'lookup' function.
421
422
    :param key: An object we are looking up
423
    :param hash: The hash for key
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
424
    :return: The location in self.table where key should be put.
425
        location == NULL is an exception, but (*location) == NULL just
426
        indicates the slot is empty and can be used.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
427
    """
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
428
    # This uses Quadratic Probing:
429
    #  http://en.wikipedia.org/wiki/Quadratic_probing
430
    # with c1 = c2 = 1/2
431
    # This leads to probe locations at:
432
    #  h0 = hash(k1)
433
    #  h1 = h0 + 1
434
    #  h2 = h0 + 3 = h1 + 1 + 1
435
    #  h3 = h0 + 6 = h2 + 1 + 2
436
    #  h4 = h0 + 10 = h2 + 1 + 3
437
    # Note that all of these are '& mask', but that is computed *after* the
438
    # offset.
439
    # This differs from the algorithm used by Set and Dict. Which, effectively,
440
    # use double-hashing, and a step size that starts large, but dwindles to
441
    # stepping one-by-one.
442
    # This gives more 'locality' in that if you have a collision at offset X,
443
    # the first fallback is X+1, which is fast to check. However, that means
444
    # that an object w/ hash X+1 will also check there, and then X+2 next.
445
    # However, for objects with differing hashes, their chains are different.
446
    # The former checks X, X+1, X+3, ... the latter checks X+1, X+2, X+4, ...
447
    # So different hashes diverge quickly.
448
    # A bigger problem is that we *only* ever use the lowest bits of the hash
449
    # So all integers (x + SIZE*N) will resolve into the same bucket, and all
450
    # use the same collision resolution. We may want to try to find a way to
451
    # incorporate the upper bits of the hash with quadratic probing. (For
452
    # example, X, X+1, X+3+some_upper_bits, X+6+more_upper_bits, etc.)
453
    cdef size_t i, n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
454
    cdef Py_ssize_t mask
4679.3.61 by John Arbash Meinel
Invert the logic.
455
    cdef long key_hash
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
456
    cdef PyObject **table, **slot, *cur, **free_slot, *py_key
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
457
4739.2.1 by John Arbash Meinel
Stop using hash() because of bugs wrt pyrex 0.9.8.5
458
    py_key = <PyObject *>key
459
    # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
460
    #       (it treats hash() as returning an 'int' rather than a 'long')
461
    key_hash = PyObject_Hash(py_key)
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
462
    i = <size_t>key_hash
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
463
    mask = self._mask
464
    table = self._table
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
465
    free_slot = NULL
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
466
    for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
467
        slot = &table[i & mask]
468
        cur = slot[0]
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
469
        if cur == NULL:
470
            # Found a blank spot
471
            if free_slot != NULL:
472
                # Did we find an earlier _dummy entry?
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
473
                return free_slot
474
            else:
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
475
                return slot
476
        if cur == py_key:
477
            # Found an exact pointer to the key
478
            return slot
479
        if cur == _dummy:
480
            if free_slot == NULL:
481
                free_slot = slot
482
        elif _is_equal(py_key, key_hash, cur):
483
            # Both py_key and cur belong in this slot, return it
484
            return slot
4679.3.91 by John Arbash Meinel
Change the _lookup function to use Quadratic Probing.
485
        i = i + 1 + n_lookup
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
486
    raise AssertionError('should never get here')
487
488
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
489
cdef api PyObject **_SimpleSet_Lookup(object self, object key) except NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
490
    """Find the slot where 'key' would fit.
491
492
    This is the same as a dicts 'lookup' function. This is a private
493
    api because mutating what you get without maintaing the other invariants
494
    is a 'bad thing'.
495
496
    :param key: An object we are looking up
497
    :param hash: The hash for key
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
498
    :return: The location in self._table where key should be put
4679.3.61 by John Arbash Meinel
Invert the logic.
499
        should never be NULL, but may reference a NULL (PyObject*)
500
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
501
    return _lookup(_check_self(self), key)
4679.3.61 by John Arbash Meinel
Invert the logic.
502
503
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
504
cdef api object SimpleSet_Add(object self, object key):
505
    """Add a key to the SimpleSet (set).
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
506
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
507
    :param self: The SimpleSet to add the key to.
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
508
    :param key: The key to be added. If the key is already present,
509
        self will not be modified
510
    :return: The current key stored at the location defined by 'key'.
511
        This may be the same object, or it may be an equivalent object.
512
        (consider dict.setdefault(key, key))
513
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
514
    return _check_self(self)._add(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
515
    
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
516
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
517
cdef api int SimpleSet_Contains(object self, object key) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
518
    """Is key present in self?"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
519
    return (key in _check_self(self))
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
520
521
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
522
cdef api int SimpleSet_Discard(object self, object key) except -1:
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
523
    """Remove the object referenced at location 'key'.
524
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
525
    :param self: The SimpleSet being modified
4679.3.60 by John Arbash Meinel
Start working on more of the C api for StaticTupleInterner.
526
    :param key: The key we are checking on
527
    :return: 1 if there was an object present, 0 if there was not, and -1 on
528
        error.
529
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
530
    return _check_self(self)._discard(key)
531
532
533
cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL:
4679.3.61 by John Arbash Meinel
Invert the logic.
534
    """Get a pointer to the object present at location 'key'.
535
536
    This returns an object which is equal to key which was previously added to
537
    self. This returns a borrowed reference, as it may also return NULL if no
538
    value is present at that location.
539
540
    :param key: The value we are looking for
541
    :return: The object present at that location
542
    """
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
543
    return _check_self(self)._get(key)
4679.3.61 by John Arbash Meinel
Invert the logic.
544
545
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
546
cdef api Py_ssize_t SimpleSet_Size(object self) except -1:
4679.3.61 by John Arbash Meinel
Invert the logic.
547
    """Get the number of active entries in 'self'"""
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
548
    return _check_self(self)._used
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
549
550
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
551
cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key):
4679.3.76 by John Arbash Meinel
Rename StaticTupleInterner => SimpleSet.
552
    """Walk over items in a SimpleSet.
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
553
554
    :param pos: should be initialized to 0 by the caller, and will be updated
555
        by this function
556
    :param key: Will return a borrowed reference to key
557
    :return: 0 if nothing left, 1 if we are returning a new value
558
    """
559
    cdef Py_ssize_t i, mask
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
560
    cdef SimpleSet true_self
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
561
    cdef PyObject **table
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
562
    true_self = _check_self(self)
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
563
    i = pos[0]
564
    if (i < 0):
565
        return 0
4679.3.81 by John Arbash Meinel
Fix up _simple_set_pyx.pyx to be compatible with pyrex again.
566
    mask = true_self._mask
567
    table= true_self._table
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
568
    while (i <= mask and (table[i] == NULL or table[i] == _dummy)):
4679.3.94 by John Arbash Meinel
Python 2.5 and Pyrex 0.9.6.4 compatibility.
569
        i = i + 1
4679.3.66 by John Arbash Meinel
Implement the C variant of __iter__
570
    pos[0] = i + 1
571
    if (i > mask):
572
        return 0 # All done
573
    if (key != NULL):
574
        key[0] = table[i]
575
    return 1
576
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
577
578
cdef int SimpleSet_traverse(SimpleSet self, visitproc visit, void *arg):
579
    """This is an implementation of 'tp_traverse' that hits the whole table.
580
581
    Cython/Pyrex don't seem to let you define a tp_traverse, and they only
582
    define one for you if you have an 'object' attribute. Since they don't
583
    support C arrays of objects, we access the PyObject * directly.
584
    """
585
    cdef Py_ssize_t pos
586
    cdef PyObject *next_key
587
    cdef int ret
588
589
    pos = 0
590
    while SimpleSet_Next(self, &pos, &next_key):
591
        ret = visit(next_key, arg)
592
        if ret:
593
            return ret
4679.3.88 by John Arbash Meinel
Some review comments from Andrew.
594
    return 0
4679.3.84 by John Arbash Meinel
Remove 2 more mallocs per line by using a fixed-width StaticTuple for reflists.
595
596
# It is a little bit ugly to do this, but it works, and means that Meliae can
597
# dump the total memory consumed by all child objects.
598
(<PyTypeObject *>SimpleSet).tp_traverse = <traverseproc>SimpleSet_traverse